Source code for recbole.model.general_recommender.asymknn

import numpy as np
import scipy.sparse as sp
import torch
from recbole.model.abstract_recommender import GeneralRecommender
from recbole.utils import InputType, ModelType
from scipy.sparse import csr_matrix


[docs]class ComputeSimilarity: def __init__(self, dataMatrix, topk=100, alpha=0.5, method="item"): r"""Computes the asymmetric cosine similarity of dataMatrix with alpha parameter. Args: dataMatrix (scipy.sparse.csr_matrix): The sparse data matrix. topk (int) : The k value in KNN. alpha (float): Asymmetry control parameter in cosine similarity calculation. method (str) : Caculate the similarity of users if method is 'user', otherwise, calculate the similarity of items. """ super(ComputeSimilarity, self).__init__() self.method = method self.alpha = alpha self.n_rows, self.n_columns = dataMatrix.shape if self.method == "user": self.TopK = min(topk, self.n_rows) else: self.TopK = min(topk, self.n_columns) self.dataMatrix = dataMatrix.copy()
[docs] def compute_asym_similarity(self, block_size=100): r"""Compute the asymmetric cosine similarity for the given dataset. Args: block_size (int): Divide matrix into blocks for efficient calculation. Returns: list: The similar nodes, if method is 'user', the shape is [number of users, neigh_num], else, the shape is [number of items, neigh_num]. scipy.sparse.csr_matrix: sparse matrix W, if method is 'user', the shape is [self.n_rows, self.n_rows], else, the shape is [self.n_columns, self.n_columns]. """ values = [] rows = [] cols = [] neigh = [] self.dataMatrix = self.dataMatrix.astype(np.float32) if self.method == "user": sumOfMatrix = np.array(self.dataMatrix.sum(axis=1)).ravel() end_local = self.n_rows elif self.method == "item": sumOfMatrix = np.array(self.dataMatrix.sum(axis=0)).ravel() end_local = self.n_columns else: raise NotImplementedError("Make sure 'method' is in ['user', 'item']!") start_block = 0 # Compute all similarities using vectorization while start_block < end_local: end_block = min(start_block + block_size, end_local) this_block_size = end_block - start_block # All data points for a given user or item if self.method == "user": data = self.dataMatrix[start_block:end_block, :] else: data = self.dataMatrix[:, start_block:end_block] data = data.toarray() # Compute similarities if self.method == "user": this_block_weights = self.dataMatrix.dot(data.T) else: this_block_weights = self.dataMatrix.T.dot(data) for index_in_block in range(this_block_size): this_line_weights = this_block_weights[:, index_in_block] Index = index_in_block + start_block this_line_weights[Index] = 0.0 # Apply asymmetric cosine normalization denominator = (sumOfMatrix[Index] ** self.alpha) * ( sumOfMatrix ** (1 - self.alpha) ) + 1e-6 this_line_weights = np.multiply(this_line_weights, 1 / denominator) # Sort indices and select TopK relevant_partition = (-this_line_weights).argpartition(self.TopK - 1)[ 0 : self.TopK ] relevant_partition_sorting = np.argsort( -this_line_weights[relevant_partition] ) top_k_idx = relevant_partition[relevant_partition_sorting] neigh.append(top_k_idx) # Incrementally build sparse matrix, do not add zeros notZerosMask = this_line_weights[top_k_idx] != 0.0 numNotZeros = np.sum(notZerosMask) values.extend(this_line_weights[top_k_idx][notZerosMask]) if self.method == "user": rows.extend(np.ones(numNotZeros) * Index) cols.extend(top_k_idx[notZerosMask]) else: rows.extend(top_k_idx[notZerosMask]) cols.extend(np.ones(numNotZeros) * Index) start_block += block_size # End while if self.method == "user": W_sparse = sp.csr_matrix( (values, (rows, cols)), shape=(self.n_rows, self.n_rows), dtype=np.float32, ) else: W_sparse = sp.csr_matrix( (values, (rows, cols)), shape=(self.n_columns, self.n_columns), dtype=np.float32, ) return neigh, W_sparse.tocsc()
[docs]class AsymKNN(GeneralRecommender): r"""AsymKNN: A traditional recommender model based on asymmetric cosine similarity and score prediction. AsymKNN computes user-item recommendations by leveraging asymmetric cosine similarity over the interaction matrix. This model allows for flexible adjustment of similarity calculations and scoring normalization via several tunable parameters. Config: k (int): Number of neighbors to consider in the similarity calculation. method (str): Specifies whether to calculate similarities based on users or items. Valid options are 'user' or 'item'. alpha (float): Weight parameter for asymmetric cosine similarity, controlling the importance of the interaction matrix in the similarity computation. Must be in the range [0, 1]. q (int): Exponent for adjusting the 'locality of scoring function' after similarity computation. beta (float): Parameter for controlling the balance between factors in the final score normalization. Must be in the range [0, 1]. Reference: Aiolli,F et al. Efficient top-n recommendation for very large scale binary rated datasets. In Proceedings of the 7th ACM conference on Recommender systems (pp. 273-280). ACM. """ input_type = InputType.POINTWISE type = ModelType.TRADITIONAL def __init__(self, config, dataset): super(AsymKNN, self).__init__(config, dataset) # load parameters info self.k = config["k"] # Size of neighborhood for cosine self.method = config[ "knn_method" ] # Caculate the similarity of users if method is 'user', otherwise, calculate the similarity of items. self.alpha = ( config["alpha"] if "alpha" in config else 0.5 ) # Asymmetric cosine parameter self.q = config["q"] if "q" in config else 1.0 # Weight adjustment exponent self.beta = ( config["beta"] if "beta" in config else 0.5 ) # Beta for final score normalization assert ( 0 <= self.alpha <= 1 ), f"The asymmetric parameter 'alpha' must be value between in [0,1], but got {self.alpha}" assert ( 0 <= self.beta <= 1 ), f"The asymmetric parameter 'beta' must be value between [0,1], but got {self.beta}" assert isinstance( self.k, int ), f"The neighborhood parameter 'k' must be an integer, but got {self.k}" assert isinstance( self.q, int ), f"The exponent parameter 'q' must be an integer, but got {self.q}" self.interaction_matrix = dataset.inter_matrix(form="csr").astype(np.float32) shape = self.interaction_matrix.shape assert self.n_users == shape[0] and self.n_items == shape[1] _, self.w = ComputeSimilarity( self.interaction_matrix, topk=self.k, alpha=self.alpha, method=self.method ).compute_asym_similarity() if self.method == "user": nominator = self.w.dot(self.interaction_matrix) factor1 = np.power(np.sqrt(self.w.power(2).sum(axis=1)), 2 * self.beta) factor2 = np.power( np.sqrt(self.interaction_matrix.power(2).sum(axis=0)), 2 * (1 - self.beta), ) denominator = factor1.dot(factor2) + 1e-6 else: nominator = self.interaction_matrix.dot(self.w) factor1 = np.power( np.sqrt(self.interaction_matrix.power(2).sum(axis=1)), 2 * self.beta ) factor2 = np.power( np.sqrt(self.w.power(2).sum(axis=1)), 2 * (1 - self.beta) ) denominator = factor1.dot(factor2.T) + 1e-6 self.pred_mat = csr_matrix(nominator / denominator).tolil() # Apply 'locality of scoring function' via q: f(w) = w^q self.pred_mat = self.pred_mat.power(self.q) self.fake_loss = torch.nn.Parameter(torch.zeros(1)) self.other_parameter_name = ["w", "pred_mat"]
[docs] def forward(self, user, item): pass
[docs] def calculate_loss(self, interaction): return torch.nn.Parameter(torch.zeros(1))
[docs] def predict(self, interaction): user = interaction[self.USER_ID] item = interaction[self.ITEM_ID] user = user.cpu().numpy().astype(int) item = item.cpu().numpy().astype(int) result = [] for index in range(len(user)): uid = user[index] iid = item[index] score = self.pred_mat[uid, iid] result.append(score) result = torch.from_numpy(np.array(result)).to(self.device) return result
[docs] def full_sort_predict(self, interaction): user = interaction[self.USER_ID] user = user.cpu().numpy() score = self.pred_mat[user, :].toarray().flatten() result = torch.from_numpy(score).to(self.device) return result