Source code for recbole.model.general_recommender.gcmc

# -*- coding: utf-8 -*-
# @Time   : 2020/9/1 14:00
# @Author : Changxin Tian
# @Email  : cx.tian@outlook.com

# UPDATE
# @Time   : 2020/10/1
# @Author : Changxin Tian
# @Email  : cx.tian@outlook.com

r"""
GCMC
################################################

Reference:
    van den Berg et al. "Graph Convolutional Matrix Completion." in SIGKDD 2018.

Reference code:
    https://github.com/riannevdberg/gc-mc
"""


import math
import torch
import torch.nn as nn
import scipy.sparse as sp
import numpy as np

from recbole.utils import InputType
from recbole.model.abstract_recommender import GeneralRecommender


[docs]class GCMC(GeneralRecommender): r"""GCMC is a model that incorporate graph autoencoders for recommendation. Graph autoencoders are comprised of: 1) a graph encoder model :math:`Z = f(X; A)`, which take as input an :math:`N \times D` feature matrix X and a graph adjacency matrix A, and produce an :math:`N \times E` node embedding matrix :math:`Z = [z_1^T,..., z_N^T ]^T`; 2) a pairwise decoder model :math:`\hat A = g(Z)`, which takes pairs of node embeddings :math:`(z_i, z_j)` and predicts respective entries :math:`\hat A_{ij}` in the adjacency matrix. Note that :math:`N` denotes the number of nodes, :math:`D` the number of input features, and :math:`E` the embedding size. We implement the model following the original author with a pairwise training mode. """ input_type = InputType.PAIRWISE def __init__(self, config, dataset): super(GCMC, self).__init__(config, dataset) # load dataset info self.num_all = self.n_users + self.n_items self.interaction_matrix = dataset.inter_matrix( form='coo').astype(np.float32) # csr # load parameters info self.dropout_prob = config['dropout_prob'] self.sparse_feature = config['sparse_feature'] self.gcn_output_dim = config['gcn_output_dim'] self.dense_output_dim = config['embedding_size'] self.n_class = config['class_num'] self.num_basis_functions = config['num_basis_functions'] # generate node feature if self.sparse_feature: features = self.get_sparse_eye_mat(self.num_all) i = features._indices() v = features._values() self.user_features = torch.sparse.FloatTensor(i[:, :self.n_users], v[:self.n_users], torch.Size([self.n_users, self.num_all])).to(self.device) item_i = i[:, self.n_users:] item_i[0, :] = item_i[0, :] - self.n_users self.item_features = torch.sparse.FloatTensor(item_i, v[self.n_users:], torch.Size([self.n_items, self.num_all])).to(self.device) else: features = torch.eye(self.num_all).to(self.device) self.user_features, self.item_features = torch.split( features, [self.n_users, self.n_items]) self.input_dim = self.user_features.shape[1] # adj matrixs for each relation are stored in self.support self.Graph = self.get_norm_adj_mat().to(self.device) self.support = [self.Graph] # accumulation operation self.accum = config['accum'] if self.accum == 'stack': div = self.gcn_output_dim // len(self.support) if self.gcn_output_dim % len(self.support) != 0: print("""\nWARNING: HIDDEN[0] (=%d) of stack layer is adjusted to %d (in %d splits).\n""" % (self.gcn_output_dim, len(self.support) * div, len(self.support))) self.gcn_output_dim = len(self.support) * div # define layers and loss self.GcEncoder = GcEncoder(accum=self.accum, num_user=self.n_users, num_item=self.n_items, support=self.support, input_dim=self.input_dim, gcn_output_dim=self.gcn_output_dim, dense_output_dim=self.dense_output_dim, drop_prob=self.dropout_prob, device=self.device, sparse_feature=self.sparse_feature).to(self.device) self.BiDecoder = BiDecoder(input_dim=self.dense_output_dim, output_dim=self.n_class, drop_prob=0., device=self.device, num_weights=self.num_basis_functions).to(self.device) self.loss_function = nn.CrossEntropyLoss()
[docs] def get_sparse_eye_mat(self, num): r"""Get the normalized sparse eye matrix. Construct the sparse eye matrix as node feature. Args: num: the number of rows Returns: Sparse tensor of the normalized interaction matrix. """ i = torch.LongTensor([range(0, num), range(0, num)]) val = torch.FloatTensor([1] * num) return torch.sparse.FloatTensor(i, val)
[docs] def get_norm_adj_mat(self): r"""Get the normalized interaction matrix of users and items. Construct the square matrix from the training data and normalize it using the laplace matrix. .. math:: A_{hat} = D^{-0.5} \times A \times D^{-0.5} Returns: Sparse tensor of the normalized interaction matrix. """ # build adj matrix A = sp.dok_matrix((self.n_users + self.n_items, self.n_users + self.n_items), dtype=np.float32) inter_M = self.interaction_matrix inter_M_t = self.interaction_matrix.transpose() data_dict = dict(zip(zip(inter_M.row, inter_M.col+self.n_users), [1]*inter_M.nnz)) data_dict.update(dict(zip(zip(inter_M_t.row+self.n_users, inter_M_t.col), [1]*inter_M_t.nnz))) A._update(data_dict) # norm adj matrix sumArr = (A > 0).sum(axis=1) # add epsilon to avoid Devide by zero Warning diag = np.array(sumArr.flatten())[0] + 1e-7 diag = np.power(diag, -0.5) D = sp.diags(diag) L = D * A * D # covert norm_adj matrix to tensor L = sp.coo_matrix(L) row = L.row col = L.col i = torch.LongTensor([row, col]) data = torch.FloatTensor(L.data) SparseL = torch.sparse.FloatTensor(i, data, torch.Size(L.shape)) return SparseL
[docs] def forward(self, user_X, item_X, user, item): # Graph autoencoders are comprised of a graph encoder model and a pairwise decoder model. user_embedding, item_embedding = self.GcEncoder(user_X, item_X) predict_score = self.BiDecoder( user_embedding, item_embedding, user, item) return predict_score
[docs] def calculate_loss(self, interaction): user = interaction[self.USER_ID] pos_item = interaction[self.ITEM_ID] neg_item = interaction[self.NEG_ITEM_ID] users = torch.cat((user, user)) items = torch.cat((pos_item, neg_item)) user_X, item_X = self.user_features, self.item_features predict = self.forward(user_X, item_X, users, items) target = torch.zeros(len(user) * 2, dtype=torch.long).to(self.device) target[:len(user)] = 1 loss = self.loss_function(predict, target) return loss
[docs] def predict(self, interaction): user = interaction[self.USER_ID] item = interaction[self.ITEM_ID] user_X, item_X = self.user_features, self.item_features predict = self.forward(user_X, item_X, user, item) score = predict[:, 1] return score
[docs] def full_sort_predict(self, interaction): user = interaction[self.USER_ID] user_X, item_X = self.user_features, self.item_features predict = self.forward(user_X, item_X, user, None) score = predict[:, 1] return score
[docs]class GcEncoder(nn.Module): """Graph Convolutional Encoder GcEncoder take as input an :math:`N \times D` feature matrix :math:`X` and a graph adjacency matrix :math:`A`, and produce an :math:`N \times E` node embedding matrix; Note that :math:`N` denotes the number of nodes, :math:`D` the number of input features, and :math:`E` the embedding size. """ def __init__(self, accum, num_user, num_item, support, input_dim, gcn_output_dim, dense_output_dim, drop_prob, device, sparse_feature=True, act_dense=lambda x: x, share_user_item_weights=True, bias=False): super(GcEncoder, self).__init__() self.num_users = num_user self.num_items = num_item self.input_dim = input_dim self.gcn_output_dim = gcn_output_dim self.dense_output_dim = dense_output_dim self.accum = accum self.sparse_feature = sparse_feature self.device = device self.dropout_prob = drop_prob self.dropout = nn.Dropout(p=self.dropout_prob) if self.sparse_feature: self.sparse_dropout = SparseDropout(p=self.dropout_prob) else: self.sparse_dropout = nn.Dropout(p=self.dropout_prob) self.dense_activate = act_dense self.activate = nn.ReLU() self.share_weights = share_user_item_weights self.bias = bias self.support = support self.num_support = len(support) # gcn layer if self.accum == 'sum': self.weights_u = nn.ParameterList( [nn.Parameter(torch.FloatTensor(self.input_dim, self.gcn_output_dim).to(self.device), requires_grad=True) for _ in range(self.num_support)]) if share_user_item_weights: self.weights_v = self.weights_u else: self.weights_v = nn.ParameterList( [nn.Parameter(torch.FloatTensor(self.input_dim, self.gcn_output_dim).to(self.device), requires_grad=True) for _ in range(self.num_support)]) else: assert self.gcn_output_dim % self.num_support == 0, 'output_dim must be multiple of num_support for stackGC' self.sub_hidden_dim = self.gcn_output_dim // self.num_support self.weights_u = nn.ParameterList( [nn.Parameter(torch.FloatTensor(self.input_dim, self.sub_hidden_dim).to(self.device), requires_grad=True) for _ in range(self.num_support)]) if share_user_item_weights: self.weights_v = self.weights_u else: self.weights_v = nn.ParameterList( [nn.Parameter(torch.FloatTensor(self.input_dim, self.sub_hidden_dim).to(self.device), requires_grad=True) for _ in range(self.num_support)]) # dense layer self.dense_layer_u = nn.Linear( self.gcn_output_dim, self.dense_output_dim, bias=self.bias) if share_user_item_weights: self.dense_layer_v = self.dense_layer_u else: self.dense_layer_v = nn.Linear( self.gcn_output_dim, self.dense_output_dim, bias=self.bias) self._init_weights() def _init_weights(self): init_range = math.sqrt((self.num_support + 1) / (self.input_dim + self.gcn_output_dim)) for w in range(self.num_support): self.weights_u[w].data.uniform_(-init_range, init_range) if not self.share_weights: for w in range(self.num_support): self.weights_v[w].data.uniform_(-init_range, init_range) dense_init_range = math.sqrt( (self.num_support + 1) / (self.dense_output_dim + self.gcn_output_dim)) self.dense_layer_u.weight.data.uniform_( -dense_init_range, dense_init_range) if not self.share_weights: self.dense_layer_v.weight.data.uniform_( -dense_init_range, dense_init_range) if self.bias: self.dense_layer_u.bias.data.fill_(0) if not self.share_weights: self.dense_layer_v.bias.data.fill_(0)
[docs] def forward(self, user_X, item_X): # ----------------------------------------GCN layer---------------------------------------- user_X = self.sparse_dropout(user_X) item_X = self.sparse_dropout(item_X) embeddings = [] if self.accum == 'sum': wu = 0. wv = 0. for i in range(self.num_support): # weight sharing wu = self.weights_u[i] + wu wv = self.weights_v[i] + wv # multiply feature matrices with weights if self.sparse_feature: temp_u = torch.sparse.mm(user_X, wu) temp_v = torch.sparse.mm(item_X, wv) else: temp_u = torch.mm(user_X, wu) temp_v = torch.mm(item_X, wv) all_embedding = torch.cat([temp_u, temp_v]) # then multiply with adj matrices graph_A = self.support[i] all_emb = torch.sparse.mm(graph_A, all_embedding) embeddings.append(all_emb) embeddings = torch.stack(embeddings, dim=1) embeddings = torch.sum(embeddings, dim=1) else: for i in range(self.num_support): # multiply feature matrices with weights if self.sparse_feature: temp_u = torch.sparse.mm(user_X, self.weights_u[i]) temp_v = torch.sparse.mm(item_X, self.weights_v[i]) else: temp_u = torch.mm(user_X, self.weights_u[i]) temp_v = torch.mm(item_X, self.weights_v[i]) all_embedding = torch.cat([temp_u, temp_v]) # then multiply with adj matrices graph_A = self.support[i] all_emb = torch.sparse.mm(graph_A, all_embedding) embeddings.append(all_emb) embeddings = torch.cat(embeddings, dim=1) users, items = torch.split( embeddings, [self.num_users, self.num_items]) u_hidden = self.activate(users) v_hidden = self.activate(items) # ----------------------------------------Dense Layer---------------------------------------- u_hidden = self.dropout(u_hidden) v_hidden = self.dropout(v_hidden) u_hidden = self.dense_layer_u(u_hidden) v_hidden = self.dense_layer_u(v_hidden) u_outputs = self.dense_activate(u_hidden) v_outputs = self.dense_activate(v_hidden) return u_outputs, v_outputs
[docs]class BiDecoder(nn.Module): """Bilinear decoder BiDecoder takes pairs of node embeddings and predicts respective entries in the adjacency matrix. """ def __init__(self, input_dim, output_dim, drop_prob, device, num_weights=3, act=lambda x: x): super(BiDecoder, self).__init__() self.input_dim = input_dim self.output_dim = output_dim self.num_weights = num_weights self.device = device self.activate = act self.dropout_prob = drop_prob self.dropout = nn.Dropout(p=self.dropout_prob) self.weights = nn.ParameterList( [nn.Parameter(orthogonal([self.input_dim, self.input_dim]).to(self.device)) for _ in range(self.num_weights)]) self.dense_layer = nn.Linear( self.num_weights, self.output_dim, bias=False) self._init_weights() def _init_weights(self): dense_init_range = math.sqrt( self.output_dim / (self.num_weights + self.output_dim)) self.dense_layer.weight.data.uniform_( -dense_init_range, dense_init_range)
[docs] def forward(self, u_inputs, i_inputs, users, items=None): u_inputs = self.dropout(u_inputs) i_inputs = self.dropout(i_inputs) if items is not None: users_emb = u_inputs[users] items_emb = i_inputs[items] basis_outputs = [] for i in range(self.num_weights): users_emb_temp = torch.mm(users_emb, self.weights[i]) scores = torch.mul(users_emb_temp, items_emb) scores = torch.sum(scores, dim=1) basis_outputs.append(scores) else: users_emb = u_inputs[users] items_emb = i_inputs basis_outputs = [] for i in range(self.num_weights): users_emb_temp = torch.mm(users_emb, self.weights[i]) scores = torch.mm(users_emb_temp, items_emb.transpose(0, 1)) basis_outputs.append(scores.view(-1)) basis_outputs = torch.stack(basis_outputs, dim=1) basis_outputs = self.dense_layer(basis_outputs) output = self.activate(basis_outputs) return output
[docs]class SparseDropout(nn.Module): """ This is a Module that execute Dropout on Pytorch sparse tensor. """ def __init__(self, p=0.5): super(SparseDropout, self).__init__() # p is ratio of dropout # convert to keep probability self.kprob = 1 - p
[docs] def forward(self, x): mask = ((torch.rand(x._values().size()) + self.kprob).floor()).type(torch.bool) rc = x._indices()[:, mask] val = x._values()[mask] * (1.0 / self.kprob) return torch.sparse.FloatTensor(rc, val, x.shape)
[docs]def orthogonal(shape, scale=1.1): """ Initialization function for weights in class GCMC. From Lasagne. Reference: Saxe et al., http://arxiv.org/abs/1312.6120 """ flat_shape = (shape[0], np.prod(shape[1:])) a = np.random.normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) # pick the one with the correct shape q = u if u.shape == flat_shape else v q = q.reshape(shape) return torch.tensor(scale * q[:shape[0], :shape[1]], dtype=torch.float32)