Source code for recbole.model.general_recommender.dgcf

# -*- coding: utf-8 -*-
# @Time   : 2020/8/26
# @Author : Gaole He
# @Email  : hegaole@ruc.edu.cn

# UPDATE:
# @Time   : 2020/9/16
# @Author : Shanlei Mu
# @Email  : slmu@ruc.edu.cn

r"""
DGCF
################################################
Reference:
    Wang Xiang et al. "Disentangled Graph Collaborative Filtering." in SIGIR 2020.

Reference code:
    https://github.com/xiangwang1223/disentangled_graph_collaborative_filtering
"""

import random as rd

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

from recbole.model.abstract_recommender import GeneralRecommender
from recbole.model.init import xavier_normal_initialization
from recbole.model.loss import BPRLoss, EmbLoss
from recbole.utils import InputType


[docs]def sample_cor_samples(n_users, n_items, cor_batch_size): r"""This is a function that sample item ids and user ids. Args: n_users (int): number of users in total n_items (int): number of items in total cor_batch_size (int): number of id to sample Returns: list: cor_users, cor_items. The result sampled ids with both as cor_batch_size long. Note: We have to sample some embedded representations out of all nodes. Because we have no way to store cor-distance for each pair. """ cor_users = rd.sample(list(range(n_users)), cor_batch_size) cor_items = rd.sample(list(range(n_items)), cor_batch_size) return cor_users, cor_items
[docs]class DGCF(GeneralRecommender): r"""DGCF is a disentangled representation enhanced matrix factorization model. The interaction matrix of :math:`n_{users} \times n_{items}` is decomposed to :math:`n_{factors}` intent graph, we carefully design the data interface and use sparse tensor to train and test efficiently. We implement the model following the original author with a pairwise training mode. """ input_type = InputType.PAIRWISE def __init__(self, config, dataset): super(DGCF, self).__init__(config, dataset) # load dataset info self.interaction_matrix = dataset.inter_matrix(form="coo").astype(np.float32) # load parameters info self.embedding_size = config["embedding_size"] self.n_factors = config["n_factors"] self.n_iterations = config["n_iterations"] self.n_layers = config["n_layers"] self.reg_weight = config["reg_weight"] self.cor_weight = config["cor_weight"] n_batch = dataset.inter_num // config["train_batch_size"] + 1 self.cor_batch_size = int(max(self.n_users / n_batch, self.n_items / n_batch)) # ensure embedding can be divided into <n_factors> intent assert self.embedding_size % self.n_factors == 0 # generate intermediate data row = self.interaction_matrix.row.tolist() col = self.interaction_matrix.col.tolist() col = [item_index + self.n_users for item_index in col] all_h_list = row + col # row.extend(col) all_t_list = col + row # col.extend(row) num_edge = len(all_h_list) edge_ids = range(num_edge) self.all_h_list = torch.LongTensor(all_h_list).to(self.device) self.all_t_list = torch.LongTensor(all_t_list).to(self.device) self.edge2head = torch.LongTensor([all_h_list, edge_ids]).to(self.device) self.head2edge = torch.LongTensor([edge_ids, all_h_list]).to(self.device) self.tail2edge = torch.LongTensor([edge_ids, all_t_list]).to(self.device) val_one = torch.ones_like(self.all_h_list).float().to(self.device) num_node = self.n_users + self.n_items self.edge2head_mat = self._build_sparse_tensor( self.edge2head, val_one, (num_node, num_edge) ) self.head2edge_mat = self._build_sparse_tensor( self.head2edge, val_one, (num_edge, num_node) ) self.tail2edge_mat = self._build_sparse_tensor( self.tail2edge, val_one, (num_edge, num_node) ) self.num_edge = num_edge self.num_node = num_node # define layers and loss self.user_embedding = nn.Embedding(self.n_users, self.embedding_size) self.item_embedding = nn.Embedding(self.n_items, self.embedding_size) self.softmax = torch.nn.Softmax(dim=1) self.mf_loss = BPRLoss() self.reg_loss = EmbLoss() self.restore_user_e = None self.restore_item_e = None self.other_parameter_name = ["restore_user_e", "restore_item_e"] # parameters initialization self.apply(xavier_normal_initialization) def _build_sparse_tensor(self, indices, values, size): # Construct the sparse matrix with indices, values and size. return torch.sparse.FloatTensor(indices, values, size).to(self.device) def _get_ego_embeddings(self): # concat of user embeddings and item embeddings user_emb = self.user_embedding.weight item_emb = self.item_embedding.weight ego_embeddings = torch.cat([user_emb, item_emb], dim=0) return ego_embeddings
[docs] def build_matrix(self, A_values): r"""Get the normalized interaction matrix of users and items according to A_values. Construct the square matrix from the training data and normalize it using the laplace matrix. Args: A_values (torch.cuda.FloatTensor): (num_edge, n_factors) .. math:: A_{hat} = D^{-0.5} \times A \times D^{-0.5} Returns: torch.cuda.FloatTensor: Sparse tensor of the normalized interaction matrix. shape: (num_edge, n_factors) """ norm_A_values = self.softmax(A_values) factor_edge_weight = [] for i in range(self.n_factors): tp_values = norm_A_values[:, i].unsqueeze(1) # (num_edge, 1) d_values = torch.sparse.mm(self.edge2head_mat, tp_values) # (num_node, num_edge) (num_edge, 1) -> (num_node, 1) d_values = torch.clamp(d_values, min=1e-8) try: assert not torch.isnan(d_values).any() except AssertionError: self.logger.info("d_values", torch.min(d_values), torch.max(d_values)) d_values = 1.0 / torch.sqrt(d_values) head_term = torch.sparse.mm(self.head2edge_mat, d_values) # (num_edge, num_node) (num_node, 1) -> (num_edge, 1) tail_term = torch.sparse.mm(self.tail2edge_mat, d_values) edge_weight = tp_values * head_term * tail_term factor_edge_weight.append(edge_weight) return factor_edge_weight
[docs] def forward(self): ego_embeddings = self._get_ego_embeddings() all_embeddings = [ego_embeddings.unsqueeze(1)] # initialize with every factor value as 1 A_values = torch.ones((self.num_edge, self.n_factors)).to(self.device) A_values = Variable(A_values, requires_grad=True) for k in range(self.n_layers): layer_embeddings = [] # split the input embedding table # .... ego_layer_embeddings is a (n_factors)-length list of embeddings # [n_users+n_items, embed_size/n_factors] ego_layer_embeddings = torch.chunk(ego_embeddings, self.n_factors, 1) for t in range(0, self.n_iterations): iter_embeddings = [] A_iter_values = [] factor_edge_weight = self.build_matrix(A_values=A_values) for i in range(0, self.n_factors): # update the embeddings via simplified graph convolution layer edge_weight = factor_edge_weight[i] # (num_edge, 1) edge_val = torch.sparse.mm( self.tail2edge_mat, ego_layer_embeddings[i] ) # (num_edge, dim / n_factors) edge_val = edge_val * edge_weight # (num_edge, dim / n_factors) factor_embeddings = torch.sparse.mm(self.edge2head_mat, edge_val) # (num_node, num_edge) (num_edge, dim) -> (num_node, dim) iter_embeddings.append(factor_embeddings) if t == self.n_iterations - 1: layer_embeddings = iter_embeddings # get the factor-wise embeddings # .... head_factor_embeddings is a dense tensor with the size of [all_h_list, embed_size/n_factors] # .... analogous to tail_factor_embeddings head_factor_embeddings = torch.index_select( factor_embeddings, dim=0, index=self.all_h_list ) tail_factor_embeddings = torch.index_select( ego_layer_embeddings[i], dim=0, index=self.all_t_list ) # .... constrain the vector length # .... make the following attentive weights within the range of (0,1) # to adapt to torch version head_factor_embeddings = F.normalize( head_factor_embeddings, p=2, dim=1 ) tail_factor_embeddings = F.normalize( tail_factor_embeddings, p=2, dim=1 ) # get the attentive weights # .... A_factor_values is a dense tensor with the size of [num_edge, 1] A_factor_values = torch.sum( head_factor_embeddings * torch.tanh(tail_factor_embeddings), dim=1, keepdim=True, ) # update the attentive weights A_iter_values.append(A_factor_values) A_iter_values = torch.cat(A_iter_values, dim=1) # (num_edge, n_factors) # add all layer-wise attentive weights up. A_values = A_values + A_iter_values # sum messages of neighbors, [n_users+n_items, embed_size] side_embeddings = torch.cat(layer_embeddings, dim=1) ego_embeddings = side_embeddings # concatenate outputs of all layers all_embeddings += [ego_embeddings.unsqueeze(1)] all_embeddings = torch.cat(all_embeddings, dim=1) # (num_node, n_layer + 1, embedding_size) all_embeddings = torch.mean(all_embeddings, dim=1, keepdim=False) # (num_node, embedding_size) u_g_embeddings = all_embeddings[: self.n_users, :] i_g_embeddings = all_embeddings[self.n_users :, :] return u_g_embeddings, i_g_embeddings
[docs] def calculate_loss(self, interaction): # clear the storage variable when training if self.restore_user_e is not None or self.restore_item_e is not None: self.restore_user_e, self.restore_item_e = None, None user = interaction[self.USER_ID] pos_item = interaction[self.ITEM_ID] neg_item = interaction[self.NEG_ITEM_ID] user_all_embeddings, item_all_embeddings = self.forward() u_embeddings = user_all_embeddings[user] pos_embeddings = item_all_embeddings[pos_item] neg_embeddings = item_all_embeddings[neg_item] pos_scores = torch.mul(u_embeddings, pos_embeddings).sum(dim=1) neg_scores = torch.mul(u_embeddings, neg_embeddings).sum(dim=1) mf_loss = self.mf_loss(pos_scores, neg_scores) # cul regularized u_ego_embeddings = self.user_embedding(user) pos_ego_embeddings = self.item_embedding(pos_item) neg_ego_embeddings = self.item_embedding(neg_item) reg_loss = self.reg_loss( u_ego_embeddings, pos_ego_embeddings, neg_ego_embeddings ) if self.n_factors > 1 and self.cor_weight > 1e-9: cor_users, cor_items = sample_cor_samples( self.n_users, self.n_items, self.cor_batch_size ) cor_users = torch.LongTensor(cor_users).to(self.device) cor_items = torch.LongTensor(cor_items).to(self.device) cor_u_embeddings = user_all_embeddings[cor_users] cor_i_embeddings = item_all_embeddings[cor_items] cor_loss = self.create_cor_loss(cor_u_embeddings, cor_i_embeddings) loss = mf_loss + self.reg_weight * reg_loss + self.cor_weight * cor_loss else: loss = mf_loss + self.reg_weight * reg_loss return loss
[docs] def create_cor_loss(self, cor_u_embeddings, cor_i_embeddings): r"""Calculate the correlation loss for a sampled users and items. Args: cor_u_embeddings (torch.cuda.FloatTensor): (cor_batch_size, n_factors) cor_i_embeddings (torch.cuda.FloatTensor): (cor_batch_size, n_factors) Returns: torch.Tensor : correlation loss. """ cor_loss = None ui_embeddings = torch.cat((cor_u_embeddings, cor_i_embeddings), dim=0) ui_factor_embeddings = torch.chunk(ui_embeddings, self.n_factors, 1) for i in range(0, self.n_factors - 1): x = ui_factor_embeddings[i] # (M + N, emb_size / n_factor) y = ui_factor_embeddings[i + 1] # (M + N, emb_size / n_factor) if i == 0: cor_loss = self._create_distance_correlation(x, y) else: cor_loss += self._create_distance_correlation(x, y) cor_loss /= (self.n_factors + 1.0) * self.n_factors / 2 return cor_loss
def _create_distance_correlation(self, X1, X2): def _create_centered_distance(X): """ X: (batch_size, dim) return: X - E(X) """ # calculate the pairwise distance of X # .... A with the size of [batch_size, embed_size/n_factors] # .... D with the size of [batch_size, batch_size] r = torch.sum(X * X, dim=1, keepdim=True) # (N, 1) # (x^2 - 2xy + y^2) -> l2 distance between all vectors value = r - 2 * torch.mm(X, X.T) + r.T zero_value = torch.zeros_like(value) value = torch.where(value > 0.0, value, zero_value) D = torch.sqrt(value + 1e-8) # # calculate the centered distance of X # # .... D with the size of [batch_size, batch_size] # matrix - average over row - average over col + average over matrix D = ( D - torch.mean(D, dim=0, keepdim=True) - torch.mean(D, dim=1, keepdim=True) + torch.mean(D) ) return D def _create_distance_covariance(D1, D2): # calculate distance covariance between D1 and D2 n_samples = float(D1.size(0)) value = torch.sum(D1 * D2) / (n_samples * n_samples) zero_value = torch.zeros_like(value) value = torch.where(value > 0.0, value, zero_value) dcov = torch.sqrt(value + 1e-8) return dcov D1 = _create_centered_distance(X1) D2 = _create_centered_distance(X2) dcov_12 = _create_distance_covariance(D1, D2) dcov_11 = _create_distance_covariance(D1, D1) dcov_22 = _create_distance_covariance(D2, D2) # calculate the distance correlation value = dcov_11 * dcov_22 zero_value = torch.zeros_like(value) value = torch.where(value > 0.0, value, zero_value) dcor = dcov_12 / (torch.sqrt(value) + 1e-10) return dcor
[docs] def predict(self, interaction): user = interaction[self.USER_ID] item = interaction[self.ITEM_ID] u_embedding, i_embedding = self.forward() u_embeddings = u_embedding[user] i_embeddings = i_embedding[item] scores = torch.mul(u_embeddings, i_embeddings).sum(dim=1) return scores
[docs] def full_sort_predict(self, interaction): user = interaction[self.USER_ID] if self.restore_user_e is None or self.restore_item_e is None: self.restore_user_e, self.restore_item_e = self.forward() u_embeddings = self.restore_user_e[user] scores = torch.matmul(u_embeddings, self.restore_item_e.transpose(0, 1)) return scores.view(-1)