diff --git a/daisy/__init__.py b/daisy/__init__.py index 08848eb..0203e20 100644 --- a/daisy/__init__.py +++ b/daisy/__init__.py @@ -1 +1 @@ -__version__ = 'v2.1.4' +__version__ = 'v2.2.0' diff --git a/daisy/model/EASERecommender.py b/daisy/model/EASERecommender.py index ed98d14..bc3c490 100644 --- a/daisy/model/EASERecommender.py +++ b/daisy/model/EASERecommender.py @@ -43,13 +43,14 @@ def fit(self, train_set): np.fill_diagonal(B, 0.) self.item_similarity = B # item_num * item_num + self.item_similarity = np.array(self.item_similarity) self.interaction_matrix = X # user_num * item_num def predict(self, u, i): self.interaction_matrix[u, :].multiply(self.item_similarity[:, i].T).sum(axis=1).getA1() def rank(self, test_loader): - rec_ids = np.array([]) + rec_ids = None for us, cands_ids in test_loader: us = us.numpy() @@ -59,9 +60,9 @@ def rank(self, test_loader): sims = self.item_similarity[cands_ids, :].transpose(0, 2, 1) # batch * cand_num * item_num -> batch * item_num * cand_num scores = np.einsum('BNi,BiM -> BNM', slims, sims).squeeze() # batch * 1 * cand_num -> batch * cand_num rank_ids = np.argsort(-scores)[:, :self.topk] - rank_list = cands_ids[:, rank_ids] + rank_list = cands_ids[np.repeat(np.arange(len(rank_ids)).reshape(-1, 1), rank_ids.shape[1], axis=1), rank_ids] - rec_ids = np.vstack([rec_ids, rank_list]) + rec_ids = rank_list if rec_ids is None else np.vstack([rec_ids, rank_list]) return rec_ids diff --git a/daisy/model/FMRecommender.py b/daisy/model/FMRecommender.py index bfd4aaf..bb8aac7 100644 --- a/daisy/model/FMRecommender.py +++ b/daisy/model/FMRecommender.py @@ -73,7 +73,7 @@ def calc_loss(self, batch): pos_pred = self.forward(user, pos_item) if self.loss_type.upper() in ['CL', 'SL']: - label = batch[2].to(self.device) + label = batch[2].to(self.device).float() loss = self.criterion(pos_pred, label) loss += self.reg_1 * (self.embed_item(pos_item).norm(p=1)) diff --git a/daisy/model/Item2VecRecommender.py b/daisy/model/Item2VecRecommender.py index b8bef25..bc7b2dc 100644 --- a/daisy/model/Item2VecRecommender.py +++ b/daisy/model/Item2VecRecommender.py @@ -65,7 +65,7 @@ def fit(self, train_loader): def calc_loss(self, batch): target_i = batch[0].to(self.device) context_j = batch[1].to(self.device) - label = batch[2].to(self.device) + label = batch[2].to(self.device).float() prediction = self.forward(target_i, context_j) loss = self.criterion(prediction, label) diff --git a/daisy/model/KNNCFRecommender.py b/daisy/model/KNNCFRecommender.py index 8a07b77..74ab51b 100644 --- a/daisy/model/KNNCFRecommender.py +++ b/daisy/model/KNNCFRecommender.py @@ -72,7 +72,7 @@ def check_matrix(X, format='csc', dtype=np.float32): class Similarity: def __init__(self, data_matrix, topK=100, shrink=0, normalize=True, asymmetric_alpha=0.5, tversky_alpha=1.0, tversky_beta=1.0, - similarity="cosine", row_weights=None): + similarity="cosine", row_weights=None, logger=None): ''' Computes the cosine similarity on the columns of data_matrix If it is computed on URM=|users|x|items|, pass the URM as is. @@ -106,7 +106,7 @@ def __init__(self, data_matrix, topK=100, shrink=0, normalize=True, Multiply the values in each row by a specified value. Array, by default None ''' super(Similarity, self).__init__() - + self.logger = logger self.shrink = shrink self.normalize = normalize @@ -399,6 +399,7 @@ def __init__(self, config): by default "cosine" normalize : bool, whether calculate similarity with normalized value """ + super(ItemKNNCF, self).__init__(config) self.user_num = config['user_num'] self.item_num = config['item_num'] @@ -422,7 +423,8 @@ def fit(self, train_set): shrink=self.shrink, topK=self.k, normalize=self.normalize, - similarity=self.similarity) + similarity=self.similarity, + logger=self.logger) w_sparse = similarity.compute_similarity() w_sparse = w_sparse.tocsc() @@ -436,16 +438,16 @@ def predict(self, u, i): return self.pred_mat[u, i] def rank(self, test_loader): - rec_ids = np.array([]) + rec_ids = None for us, cands_ids in test_loader: us = us.numpy() cands_ids = cands_ids.numpy() - scores = self.pred_mat[us, cands_ids].A + scores = self.pred_mat[us[:, np.newaxis], cands_ids].A rank_ids = np.argsort(-scores)[:, :self.topk] - rank_list = cands_ids[:, rank_ids] + rank_list = cands_ids[np.repeat(np.arange(len(rank_ids)).reshape(-1, 1), rank_ids.shape[1], axis=1), rank_ids] - rec_ids = np.vstack([rec_ids, rank_list]) + rec_ids = rank_list if rec_ids is None else np.vstack([rec_ids, rank_list]) return rec_ids @@ -476,6 +478,7 @@ def __init__(self, config): by default "cosine" normalize : bool, whether calculate similarity with normalized value """ + super(UserKNNCF, self).__init__(config) self.user_num = config['user_num'] self.item_num = config['item_num'] @@ -498,7 +501,8 @@ def fit(self, train_set): shrink=self.shrink, topK=self.k, normalize=self.normalize, - similarity = self.similarity) + similarity = self.similarity, + logger=self.logger) w_sparse = similarity.compute_similarity() w_sparse = w_sparse.tocsc() @@ -512,16 +516,16 @@ def predict(self, u, i): return self.pred_mat[u, i] def rank(self, test_loader): - rec_ids = np.array([]) + rec_ids = None for us, cands_ids in test_loader: us = us.numpy() cands_ids = cands_ids.numpy() - scores = self.pred_mat[us, cands_ids].A + scores = self.pred_mat[us[:, np.newaxis], cands_ids].A rank_ids = np.argsort(-scores)[:, :self.topk] - rank_list = cands_ids[:, rank_ids] + rank_list = cands_ids[np.repeat(np.arange(len(rank_ids)).reshape(-1, 1), rank_ids.shape[1], axis=1), rank_ids] - rec_ids = np.vstack([rec_ids, rank_list]) + rec_ids = rank_list if rec_ids is None else np.vstack([rec_ids, rank_list]) return rec_ids diff --git a/daisy/model/LightGCNRecommender.py b/daisy/model/LightGCNRecommender.py index 6ae3009..66f9249 100644 --- a/daisy/model/LightGCNRecommender.py +++ b/daisy/model/LightGCNRecommender.py @@ -108,8 +108,8 @@ def get_norm_adj_mat(self): def get_ego_embeddings(self): ''' Get the embedding of users and items and combine to an new embedding matrix ''' - user_embeddings = self.user_embedding.weight - item_embeddings = self.item_embedding.weight + user_embeddings = self.embed_user.weight + item_embeddings = self.embed_item.weight ego_embeddings = torch.cat([user_embeddings, item_embeddings], dim=0) return ego_embeddings @@ -133,8 +133,8 @@ def calc_loss(self, batch): if self.restore_user_e is not None or self.restore_item_e is not None: self.restore_user_e, self.restore_item_e = None, None - user = batch[0].to(self.device) - pos_item = batch[1].to(self.device) + user = batch[0].to(self.device).long() + pos_item = batch[1].to(self.device).long() embed_user, embed_item = self.forward() @@ -146,17 +146,17 @@ def calc_loss(self, batch): pos_ego_embeddings = self.embed_item(pos_item) if self.loss_type.upper() in ['CL', 'SL']: - label = batch[2].to(self.device) + label = batch[2].to(self.device).float() loss = self.criterion(pos_pred, label) # add regularization term loss += self.reg_1 * (u_ego_embeddings.norm(p=1) + pos_ego_embeddings.norm(p=1)) loss += self.reg_2 * (u_ego_embeddings.norm() + pos_ego_embeddings.norm()) elif self.loss_type.upper() in ['BPR', 'TL', 'HL']: - neg_item = batch[2].to(self.device) + neg_item = batch[2].to(self.device).long() neg_embeddings = embed_item[neg_item] neg_pred = torch.mul(u_embeddings, neg_embeddings).sum(dim=1) - neg_ego_embeddings = self.item_embedding(neg_item) + neg_ego_embeddings = self.embed_item(neg_item) loss = self.criterion(pos_pred, neg_pred) diff --git a/daisy/model/MFRecommender.py b/daisy/model/MFRecommender.py index 8181350..648ea52 100644 --- a/daisy/model/MFRecommender.py +++ b/daisy/model/MFRecommender.py @@ -73,7 +73,7 @@ def calc_loss(self, batch): pos_pred = self.forward(user, pos_item) if self.loss_type.upper() in ['CL', 'SL']: - label = batch[2].to(self.device) + label = batch[2].to(self.device).float() loss = self.criterion(pos_pred, label) # add regularization term diff --git a/daisy/model/NFMRecommender.py b/daisy/model/NFMRecommender.py index de54d65..da1c0ce 100644 --- a/daisy/model/NFMRecommender.py +++ b/daisy/model/NFMRecommender.py @@ -128,7 +128,7 @@ def calc_loss(self, batch): pos_pred = self.forward(user, pos_item) if self.loss_type.upper() in ['CL', 'SL']: - label = batch[2].to(self.device) + label = batch[2].to(self.device).float() loss = self.criterion(pos_pred, label) loss += self.reg_1 * (self.embed_item(pos_item).norm(p=1)) diff --git a/daisy/model/NGCFRecommender.py b/daisy/model/NGCFRecommender.py index 7761bb0..14acbb9 100644 --- a/daisy/model/NGCFRecommender.py +++ b/daisy/model/NGCFRecommender.py @@ -104,7 +104,7 @@ def __init__(self, config): self.embed_item = nn.Embedding(self.item_num, self.embedding_size) self.gnn_layers = torch.nn.ModuleList() for _, (in_size, out_size) in enumerate(zip(self.hidden_size_list[:-1], self.hidden_size_list[1:])): - self.GNNlayers.append(BiGNN(in_size, out_size)) + self.gnn_layers.append(BiGNN(in_size, out_size)) # storage variables for evaluation acceleration self.restore_user_e = None @@ -175,8 +175,8 @@ def calc_loss(self, batch): if self.restore_user_e is not None or self.restore_item_e is not None: self.restore_user_e, self.restore_item_e = None, None - user = batch[0].to(self.device) - pos_item = batch[1].to(self.device) + user = batch[0].to(self.device).long() + pos_item = batch[1].to(self.device).long() embed_user, embed_item = self.forward() @@ -188,16 +188,16 @@ def calc_loss(self, batch): pos_ego_embeddings = self.embed_item(pos_item) if self.loss_type.upper() in ['CL', 'SL']: - label = batch[2].to(self.device) + label = batch[2].to(self.device).float() loss = self.criterion(pos_pred, label) # add regularization term loss += self.reg_1 * (u_ego_embeddings.norm(p=1) + pos_ego_embeddings.norm(p=1)) loss += self.reg_2 * (u_ego_embeddings.norm() + pos_ego_embeddings.norm()) elif self.loss_type.upper() in ['BPR', 'TL', 'HL']: - neg_item = batch[2].to(self.device) + neg_item = batch[2].to(self.device).long() neg_embeddings = embed_item[neg_item] neg_pred = torch.mul(u_embeddings, neg_embeddings).sum(dim=1) - neg_ego_embeddings = self.item_embedding(neg_item) + neg_ego_embeddings = self.embed_item(neg_item) loss = self.criterion(pos_pred, neg_pred) diff --git a/daisy/model/NeuMFRecommender.py b/daisy/model/NeuMFRecommender.py index 579c38f..0dbcf54 100644 --- a/daisy/model/NeuMFRecommender.py +++ b/daisy/model/NeuMFRecommender.py @@ -142,7 +142,7 @@ def calc_loss(self, batch): pos_pred = self.forward(user, pos_item) if self.loss_type.upper() in ['CL', 'SL']: - label = batch[2].to(self.device) + label = batch[2].to(self.device).float() loss = self.criterion(pos_pred, label) loss += self.reg_1 * (self.embed_item_GMF(pos_item).norm(p=1)) diff --git a/daisy/model/PopRecommender.py b/daisy/model/PopRecommender.py index 97bed25..d765f44 100644 --- a/daisy/model/PopRecommender.py +++ b/daisy/model/PopRecommender.py @@ -20,12 +20,14 @@ def __init__(self, config): Parameters ---------- """ + super(MostPop, self).__init__(config) self.item_num = config['item_num'] self.item_cnt_ref = np.zeros(self.item_num) self.topk = config['topk'] + self.cnt_col = config['IID_NAME'] def fit(self, train_set): - item_cnt = train_set['item'].size() + item_cnt = train_set[self.cnt_col].value_counts() idx, cnt = item_cnt.index, item_cnt.values self.item_cnt_ref[idx] = cnt self.item_score = self.item_cnt_ref / (1 + self.item_cnt_ref) diff --git a/daisy/model/PureSVDRecommender.py b/daisy/model/PureSVDRecommender.py index e73c51f..b5d105e 100644 --- a/daisy/model/PureSVDRecommender.py +++ b/daisy/model/PureSVDRecommender.py @@ -25,6 +25,7 @@ def __init__(self, config): item_num : int, the number of items factors : int, latent factor number """ + super(PureSVD, self).__init__(config) self.user_num = config['user_num'] self.item_num = config['item_num'] self.factors = config['factors'] @@ -35,7 +36,7 @@ def __init__(self, config): self.topk = config['topk'] def fit(self, train_set): - self.logger.info(" Computing SVD decomposition...") + self.logger.info("Computing SVD decomposition...") train_set = self._convert_df(self.user_num, self.item_num, train_set) self.logger.info('Finish build train matrix for decomposition') U, sigma, Vt = randomized_svd(train_set, @@ -60,7 +61,7 @@ def predict(self, u, i): return self.user_vec[u, :].dot(self.item_vec[i, :]) def rank(self, test_loader): - rec_ids = np.array([]) + rec_ids = None for us, cands_ids in test_loader: us = us.numpy() @@ -70,9 +71,9 @@ def rank(self, test_loader): items_emb = self.item_vec[cands_ids, :].transpose(0, 2, 1) # batch * cand_num * factor -> batch * factor * cand_num scores = np.einsum('BNi,BiM -> BNM', user_emb, items_emb).squeeze() # batch * 1 * cand_num -> batch * cand_num rank_ids = np.argsort(-scores)[:, :self.topk] - rank_list = cands_ids[:, rank_ids] + rank_list = cands_ids[np.repeat(np.arange(len(rank_ids)).reshape(-1, 1), rank_ids.shape[1], axis=1), rank_ids] - rec_ids = np.vstack([rec_ids, rank_list]) + rec_ids = rank_list if rec_ids is None else np.vstack([rec_ids, rank_list]) return rec_ids diff --git a/daisy/model/SLiMRecommender.py b/daisy/model/SLiMRecommender.py index 61472a6..645336a 100644 --- a/daisy/model/SLiMRecommender.py +++ b/daisy/model/SLiMRecommender.py @@ -37,6 +37,7 @@ def __init__(self, config): alpha : float, Constant that multiplies the penalty terms positive_only : bool, When set to True, forces the coefficients to be positive """ + super(SLiM, self).__init__(config) self.md = ElasticNet(alpha=config['alpha'], l1_ratio=config['elastic'], positive=True, @@ -85,9 +86,9 @@ def fit(self, train_set, verbose=True): nonzero_model_coef_index = self.md.sparse_coef_.indices nonzero_model_coef_value = self.md.sparse_coef_.data - # local_topk = min(len(nonzero_model_coef_value) - 1, self.topk) + local_topk = min(len(nonzero_model_coef_value) - 1, self.topk) # just keep all nonzero coef value for ranking, if you want improve speed, use code above - local_topk = len(nonzero_model_coef_value) - 1 + # local_topk = len(nonzero_model_coef_value) - 1 relevant_items_partition = (-nonzero_model_coef_value).argpartition(local_topk)[0:local_topk] relevant_items_partition_sorting = np.argsort(-nonzero_model_coef_value[relevant_items_partition]) @@ -126,16 +127,16 @@ def predict(self, u, i): return self.A_tilde[u, i] def rank(self, test_loader): - rec_ids = np.array([]) + rec_ids = None for us, cands_ids in test_loader: us = us.numpy() cands_ids = cands_ids.numpy() - scores = self.A_tilde[us, cands_ids].A + scores = self.A_tilde[us[:, np.newaxis], cands_ids].A rank_ids = np.argsort(-scores)[:, :self.topk] - rank_list = cands_ids[:, rank_ids] + rank_list = cands_ids[np.repeat(np.arange(len(rank_ids)).reshape(-1, 1), rank_ids.shape[1], axis=1), rank_ids] - rec_ids = np.vstack([rec_ids, rank_list]) + rec_ids = rank_list if rec_ids is None else np.vstack([rec_ids, rank_list]) return rec_ids diff --git a/daisy/utils/config.py b/daisy/utils/config.py index 952ffb8..9bf98d1 100644 --- a/daisy/utils/config.py +++ b/daisy/utils/config.py @@ -58,7 +58,8 @@ def init_config(param_dict=None): model_init_file = os.path.join(current_path, f'../assets/{algo_name}.yaml') model_conf = yaml.load( open(model_init_file), Loader=yaml.loader.SafeLoader) - config.update(model_conf) + if model_conf is not None: + config.update(model_conf) args_conf = vars(args) diff --git a/run_examples/test.py b/run_examples/test.py index c6dbb4e..f94fc6a 100644 --- a/run_examples/test.py +++ b/run_examples/test.py @@ -72,7 +72,7 @@ ''' build and train model ''' s_time = time.time() - if config['algo_name'].lower() in ['itemknn', 'puresvd', 'slim', 'mostpop']: + if config['algo_name'].lower() in ['itemknn', 'puresvd', 'slim', 'mostpop', 'ease']: model = model_config[config['algo_name']](config) model.fit(train_set) diff --git a/run_examples/tune.py b/run_examples/tune.py index 2b45746..b4ece9b 100644 --- a/run_examples/tune.py +++ b/run_examples/tune.py @@ -163,7 +163,7 @@ def objective(trial): config['train_ur'] = train_ur ''' build and train model ''' - if config['algo_name'].lower() in ['itemknn', 'puresvd', 'slim', 'mostpop']: + if config['algo_name'].lower() in ['itemknn', 'puresvd', 'slim', 'mostpop', 'ease']: model = model_config[config['algo_name']](config) model.fit(train) diff --git a/setup.py b/setup.py index 6c306c9..276e265 100644 --- a/setup.py +++ b/setup.py @@ -44,14 +44,14 @@ # package_dir={"": "daisy"}, package_data={"": ["*.yaml"]}, # packages = find_packages(exclude=['tests*']), - version='v2.1.4', # Ideally should be same as your GitHub release tag varsion + version='v2.2.0', # Ideally should be same as your GitHub release tag varsion description=('An easy-to-use library for recommender systems.'), long_description=long_description, # long_description_content_type="text/markdown", author='Yu Di', author_email='di.yu.2021@mitb.smu.edu.sg', url='https://github.com/AmazingDD/daisyRec', - download_url='https://github.com/AmazingDD/daisyRec/archive/refs/tags/v2.1.4.tar.gz', + download_url='https://github.com/AmazingDD/daisyRec/archive/refs/tags/v2.2.0.tar.gz', keywords=['ranking', 'recommendation'], # include_package_data=True, install_requires=install_requires,