Skip to content

Commit

Permalink
fix bug
Browse files Browse the repository at this point in the history
  • Loading branch information
amazingDD committed Aug 8, 2022
1 parent 810acd3 commit 8ea9aba
Show file tree
Hide file tree
Showing 17 changed files with 60 additions and 50 deletions.
2 changes: 1 addition & 1 deletion daisy/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = 'v2.1.4'
__version__ = 'v2.2.0'
7 changes: 4 additions & 3 deletions daisy/model/EASERecommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,14 @@ def fit(self, train_set):
np.fill_diagonal(B, 0.)

self.item_similarity = B # item_num * item_num
self.item_similarity = np.array(self.item_similarity)
self.interaction_matrix = X # user_num * item_num

def predict(self, u, i):
self.interaction_matrix[u, :].multiply(self.item_similarity[:, i].T).sum(axis=1).getA1()

def rank(self, test_loader):
rec_ids = np.array([])
rec_ids = None

for us, cands_ids in test_loader:
us = us.numpy()
Expand All @@ -59,9 +60,9 @@ def rank(self, test_loader):
sims = self.item_similarity[cands_ids, :].transpose(0, 2, 1) # batch * cand_num * item_num -> batch * item_num * cand_num
scores = np.einsum('BNi,BiM -> BNM', slims, sims).squeeze() # batch * 1 * cand_num -> batch * cand_num
rank_ids = np.argsort(-scores)[:, :self.topk]
rank_list = cands_ids[:, rank_ids]
rank_list = cands_ids[np.repeat(np.arange(len(rank_ids)).reshape(-1, 1), rank_ids.shape[1], axis=1), rank_ids]

rec_ids = np.vstack([rec_ids, rank_list])
rec_ids = rank_list if rec_ids is None else np.vstack([rec_ids, rank_list])

return rec_ids

Expand Down
2 changes: 1 addition & 1 deletion daisy/model/FMRecommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def calc_loss(self, batch):
pos_pred = self.forward(user, pos_item)

if self.loss_type.upper() in ['CL', 'SL']:
label = batch[2].to(self.device)
label = batch[2].to(self.device).float()
loss = self.criterion(pos_pred, label)

loss += self.reg_1 * (self.embed_item(pos_item).norm(p=1))
Expand Down
2 changes: 1 addition & 1 deletion daisy/model/Item2VecRecommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def fit(self, train_loader):
def calc_loss(self, batch):
target_i = batch[0].to(self.device)
context_j = batch[1].to(self.device)
label = batch[2].to(self.device)
label = batch[2].to(self.device).float()
prediction = self.forward(target_i, context_j)
loss = self.criterion(prediction, label)

Expand Down
28 changes: 16 additions & 12 deletions daisy/model/KNNCFRecommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def check_matrix(X, format='csc', dtype=np.float32):
class Similarity:
def __init__(self, data_matrix, topK=100, shrink=0, normalize=True,
asymmetric_alpha=0.5, tversky_alpha=1.0, tversky_beta=1.0,
similarity="cosine", row_weights=None):
similarity="cosine", row_weights=None, logger=None):
'''
Computes the cosine similarity on the columns of data_matrix
If it is computed on URM=|users|x|items|, pass the URM as is.
Expand Down Expand Up @@ -106,7 +106,7 @@ def __init__(self, data_matrix, topK=100, shrink=0, normalize=True,
Multiply the values in each row by a specified value. Array, by default None
'''
super(Similarity, self).__init__()

self.logger = logger
self.shrink = shrink
self.normalize = normalize

Expand Down Expand Up @@ -399,6 +399,7 @@ def __init__(self, config):
by default "cosine"
normalize : bool, whether calculate similarity with normalized value
"""
super(ItemKNNCF, self).__init__(config)
self.user_num = config['user_num']
self.item_num = config['item_num']

Expand All @@ -422,7 +423,8 @@ def fit(self, train_set):
shrink=self.shrink,
topK=self.k,
normalize=self.normalize,
similarity=self.similarity)
similarity=self.similarity,
logger=self.logger)

w_sparse = similarity.compute_similarity()
w_sparse = w_sparse.tocsc()
Expand All @@ -436,16 +438,16 @@ def predict(self, u, i):
return self.pred_mat[u, i]

def rank(self, test_loader):
rec_ids = np.array([])
rec_ids = None

for us, cands_ids in test_loader:
us = us.numpy()
cands_ids = cands_ids.numpy()
scores = self.pred_mat[us, cands_ids].A
scores = self.pred_mat[us[:, np.newaxis], cands_ids].A
rank_ids = np.argsort(-scores)[:, :self.topk]
rank_list = cands_ids[:, rank_ids]
rank_list = cands_ids[np.repeat(np.arange(len(rank_ids)).reshape(-1, 1), rank_ids.shape[1], axis=1), rank_ids]

rec_ids = np.vstack([rec_ids, rank_list])
rec_ids = rank_list if rec_ids is None else np.vstack([rec_ids, rank_list])

return rec_ids

Expand Down Expand Up @@ -476,6 +478,7 @@ def __init__(self, config):
by default "cosine"
normalize : bool, whether calculate similarity with normalized value
"""
super(UserKNNCF, self).__init__(config)
self.user_num = config['user_num']
self.item_num = config['item_num']

Expand All @@ -498,7 +501,8 @@ def fit(self, train_set):
shrink=self.shrink,
topK=self.k,
normalize=self.normalize,
similarity = self.similarity)
similarity = self.similarity,
logger=self.logger)

w_sparse = similarity.compute_similarity()
w_sparse = w_sparse.tocsc()
Expand All @@ -512,16 +516,16 @@ def predict(self, u, i):
return self.pred_mat[u, i]

def rank(self, test_loader):
rec_ids = np.array([])
rec_ids = None

for us, cands_ids in test_loader:
us = us.numpy()
cands_ids = cands_ids.numpy()
scores = self.pred_mat[us, cands_ids].A
scores = self.pred_mat[us[:, np.newaxis], cands_ids].A
rank_ids = np.argsort(-scores)[:, :self.topk]
rank_list = cands_ids[:, rank_ids]
rank_list = cands_ids[np.repeat(np.arange(len(rank_ids)).reshape(-1, 1), rank_ids.shape[1], axis=1), rank_ids]

rec_ids = np.vstack([rec_ids, rank_list])
rec_ids = rank_list if rec_ids is None else np.vstack([rec_ids, rank_list])

return rec_ids

Expand Down
14 changes: 7 additions & 7 deletions daisy/model/LightGCNRecommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ def get_norm_adj_mat(self):

def get_ego_embeddings(self):
''' Get the embedding of users and items and combine to an new embedding matrix '''
user_embeddings = self.user_embedding.weight
item_embeddings = self.item_embedding.weight
user_embeddings = self.embed_user.weight
item_embeddings = self.embed_item.weight
ego_embeddings = torch.cat([user_embeddings, item_embeddings], dim=0)

return ego_embeddings
Expand All @@ -133,8 +133,8 @@ def calc_loss(self, batch):
if self.restore_user_e is not None or self.restore_item_e is not None:
self.restore_user_e, self.restore_item_e = None, None

user = batch[0].to(self.device)
pos_item = batch[1].to(self.device)
user = batch[0].to(self.device).long()
pos_item = batch[1].to(self.device).long()

embed_user, embed_item = self.forward()

Expand All @@ -146,17 +146,17 @@ def calc_loss(self, batch):
pos_ego_embeddings = self.embed_item(pos_item)

if self.loss_type.upper() in ['CL', 'SL']:
label = batch[2].to(self.device)
label = batch[2].to(self.device).float()
loss = self.criterion(pos_pred, label)
# add regularization term
loss += self.reg_1 * (u_ego_embeddings.norm(p=1) + pos_ego_embeddings.norm(p=1))
loss += self.reg_2 * (u_ego_embeddings.norm() + pos_ego_embeddings.norm())

elif self.loss_type.upper() in ['BPR', 'TL', 'HL']:
neg_item = batch[2].to(self.device)
neg_item = batch[2].to(self.device).long()
neg_embeddings = embed_item[neg_item]
neg_pred = torch.mul(u_embeddings, neg_embeddings).sum(dim=1)
neg_ego_embeddings = self.item_embedding(neg_item)
neg_ego_embeddings = self.embed_item(neg_item)

loss = self.criterion(pos_pred, neg_pred)

Expand Down
2 changes: 1 addition & 1 deletion daisy/model/MFRecommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def calc_loss(self, batch):
pos_pred = self.forward(user, pos_item)

if self.loss_type.upper() in ['CL', 'SL']:
label = batch[2].to(self.device)
label = batch[2].to(self.device).float()
loss = self.criterion(pos_pred, label)

# add regularization term
Expand Down
2 changes: 1 addition & 1 deletion daisy/model/NFMRecommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def calc_loss(self, batch):
pos_pred = self.forward(user, pos_item)

if self.loss_type.upper() in ['CL', 'SL']:
label = batch[2].to(self.device)
label = batch[2].to(self.device).float()
loss = self.criterion(pos_pred, label)

loss += self.reg_1 * (self.embed_item(pos_item).norm(p=1))
Expand Down
12 changes: 6 additions & 6 deletions daisy/model/NGCFRecommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def __init__(self, config):
self.embed_item = nn.Embedding(self.item_num, self.embedding_size)
self.gnn_layers = torch.nn.ModuleList()
for _, (in_size, out_size) in enumerate(zip(self.hidden_size_list[:-1], self.hidden_size_list[1:])):
self.GNNlayers.append(BiGNN(in_size, out_size))
self.gnn_layers.append(BiGNN(in_size, out_size))

# storage variables for evaluation acceleration
self.restore_user_e = None
Expand Down Expand Up @@ -175,8 +175,8 @@ def calc_loss(self, batch):
if self.restore_user_e is not None or self.restore_item_e is not None:
self.restore_user_e, self.restore_item_e = None, None

user = batch[0].to(self.device)
pos_item = batch[1].to(self.device)
user = batch[0].to(self.device).long()
pos_item = batch[1].to(self.device).long()

embed_user, embed_item = self.forward()

Expand All @@ -188,16 +188,16 @@ def calc_loss(self, batch):
pos_ego_embeddings = self.embed_item(pos_item)

if self.loss_type.upper() in ['CL', 'SL']:
label = batch[2].to(self.device)
label = batch[2].to(self.device).float()
loss = self.criterion(pos_pred, label)
# add regularization term
loss += self.reg_1 * (u_ego_embeddings.norm(p=1) + pos_ego_embeddings.norm(p=1))
loss += self.reg_2 * (u_ego_embeddings.norm() + pos_ego_embeddings.norm())
elif self.loss_type.upper() in ['BPR', 'TL', 'HL']:
neg_item = batch[2].to(self.device)
neg_item = batch[2].to(self.device).long()
neg_embeddings = embed_item[neg_item]
neg_pred = torch.mul(u_embeddings, neg_embeddings).sum(dim=1)
neg_ego_embeddings = self.item_embedding(neg_item)
neg_ego_embeddings = self.embed_item(neg_item)

loss = self.criterion(pos_pred, neg_pred)

Expand Down
2 changes: 1 addition & 1 deletion daisy/model/NeuMFRecommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def calc_loss(self, batch):
pos_pred = self.forward(user, pos_item)

if self.loss_type.upper() in ['CL', 'SL']:
label = batch[2].to(self.device)
label = batch[2].to(self.device).float()
loss = self.criterion(pos_pred, label)

loss += self.reg_1 * (self.embed_item_GMF(pos_item).norm(p=1))
Expand Down
4 changes: 3 additions & 1 deletion daisy/model/PopRecommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@ def __init__(self, config):
Parameters
----------
"""
super(MostPop, self).__init__(config)
self.item_num = config['item_num']
self.item_cnt_ref = np.zeros(self.item_num)
self.topk = config['topk']
self.cnt_col = config['IID_NAME']

def fit(self, train_set):
item_cnt = train_set['item'].size()
item_cnt = train_set[self.cnt_col].value_counts()
idx, cnt = item_cnt.index, item_cnt.values
self.item_cnt_ref[idx] = cnt
self.item_score = self.item_cnt_ref / (1 + self.item_cnt_ref)
Expand Down
9 changes: 5 additions & 4 deletions daisy/model/PureSVDRecommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def __init__(self, config):
item_num : int, the number of items
factors : int, latent factor number
"""
super(PureSVD, self).__init__(config)
self.user_num = config['user_num']
self.item_num = config['item_num']
self.factors = config['factors']
Expand All @@ -35,7 +36,7 @@ def __init__(self, config):
self.topk = config['topk']

def fit(self, train_set):
self.logger.info(" Computing SVD decomposition...")
self.logger.info("Computing SVD decomposition...")
train_set = self._convert_df(self.user_num, self.item_num, train_set)
self.logger.info('Finish build train matrix for decomposition')
U, sigma, Vt = randomized_svd(train_set,
Expand All @@ -60,7 +61,7 @@ def predict(self, u, i):
return self.user_vec[u, :].dot(self.item_vec[i, :])

def rank(self, test_loader):
rec_ids = np.array([])
rec_ids = None

for us, cands_ids in test_loader:
us = us.numpy()
Expand All @@ -70,9 +71,9 @@ def rank(self, test_loader):
items_emb = self.item_vec[cands_ids, :].transpose(0, 2, 1) # batch * cand_num * factor -> batch * factor * cand_num
scores = np.einsum('BNi,BiM -> BNM', user_emb, items_emb).squeeze() # batch * 1 * cand_num -> batch * cand_num
rank_ids = np.argsort(-scores)[:, :self.topk]
rank_list = cands_ids[:, rank_ids]
rank_list = cands_ids[np.repeat(np.arange(len(rank_ids)).reshape(-1, 1), rank_ids.shape[1], axis=1), rank_ids]

rec_ids = np.vstack([rec_ids, rank_list])
rec_ids = rank_list if rec_ids is None else np.vstack([rec_ids, rank_list])

return rec_ids

Expand Down
13 changes: 7 additions & 6 deletions daisy/model/SLiMRecommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def __init__(self, config):
alpha : float, Constant that multiplies the penalty terms
positive_only : bool, When set to True, forces the coefficients to be positive
"""
super(SLiM, self).__init__(config)
self.md = ElasticNet(alpha=config['alpha'],
l1_ratio=config['elastic'],
positive=True,
Expand Down Expand Up @@ -85,9 +86,9 @@ def fit(self, train_set, verbose=True):
nonzero_model_coef_index = self.md.sparse_coef_.indices
nonzero_model_coef_value = self.md.sparse_coef_.data

# local_topk = min(len(nonzero_model_coef_value) - 1, self.topk)
local_topk = min(len(nonzero_model_coef_value) - 1, self.topk)
# just keep all nonzero coef value for ranking, if you want improve speed, use code above
local_topk = len(nonzero_model_coef_value) - 1
# local_topk = len(nonzero_model_coef_value) - 1

relevant_items_partition = (-nonzero_model_coef_value).argpartition(local_topk)[0:local_topk]
relevant_items_partition_sorting = np.argsort(-nonzero_model_coef_value[relevant_items_partition])
Expand Down Expand Up @@ -126,16 +127,16 @@ def predict(self, u, i):
return self.A_tilde[u, i]

def rank(self, test_loader):
rec_ids = np.array([])
rec_ids = None

for us, cands_ids in test_loader:
us = us.numpy()
cands_ids = cands_ids.numpy()
scores = self.A_tilde[us, cands_ids].A
scores = self.A_tilde[us[:, np.newaxis], cands_ids].A
rank_ids = np.argsort(-scores)[:, :self.topk]
rank_list = cands_ids[:, rank_ids]
rank_list = cands_ids[np.repeat(np.arange(len(rank_ids)).reshape(-1, 1), rank_ids.shape[1], axis=1), rank_ids]

rec_ids = np.vstack([rec_ids, rank_list])
rec_ids = rank_list if rec_ids is None else np.vstack([rec_ids, rank_list])

return rec_ids

Expand Down
3 changes: 2 additions & 1 deletion daisy/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def init_config(param_dict=None):
model_init_file = os.path.join(current_path, f'../assets/{algo_name}.yaml')
model_conf = yaml.load(
open(model_init_file), Loader=yaml.loader.SafeLoader)
config.update(model_conf)
if model_conf is not None:
config.update(model_conf)

args_conf = vars(args)

Expand Down
2 changes: 1 addition & 1 deletion run_examples/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@

''' build and train model '''
s_time = time.time()
if config['algo_name'].lower() in ['itemknn', 'puresvd', 'slim', 'mostpop']:
if config['algo_name'].lower() in ['itemknn', 'puresvd', 'slim', 'mostpop', 'ease']:
model = model_config[config['algo_name']](config)
model.fit(train_set)

Expand Down
2 changes: 1 addition & 1 deletion run_examples/tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def objective(trial):
config['train_ur'] = train_ur

''' build and train model '''
if config['algo_name'].lower() in ['itemknn', 'puresvd', 'slim', 'mostpop']:
if config['algo_name'].lower() in ['itemknn', 'puresvd', 'slim', 'mostpop', 'ease']:
model = model_config[config['algo_name']](config)
model.fit(train)

Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@
# package_dir={"": "daisy"},
package_data={"": ["*.yaml"]},
# packages = find_packages(exclude=['tests*']),
version='v2.1.4', # Ideally should be same as your GitHub release tag varsion
version='v2.2.0', # Ideally should be same as your GitHub release tag varsion
description=('An easy-to-use library for recommender systems.'),
long_description=long_description,
# long_description_content_type="text/markdown",
author='Yu Di',
author_email='[email protected]',
url='https://github.com/AmazingDD/daisyRec',
download_url='https://github.com/AmazingDD/daisyRec/archive/refs/tags/v2.1.4.tar.gz',
download_url='https://github.com/AmazingDD/daisyRec/archive/refs/tags/v2.2.0.tar.gz',
keywords=['ranking', 'recommendation'],
# include_package_data=True,
install_requires=install_requires,
Expand Down

0 comments on commit 8ea9aba

Please sign in to comment.