Source code for recbole.model.general_recommender.gcmc

# -*- coding: utf-8 -*-
# @Time   : 2020/9/1 14:00
# @Author : Changxin Tian
# @Email  : cx.tian@outlook.com

# UPDATE
# @Time   : 2020/10/1
# @Author : Changxin Tian
# @Email  : cx.tian@outlook.com

r"""
GCMC
################################################

Reference:
    van den Berg et al. "Graph Convolutional Matrix Completion." in SIGKDD 2018.

Reference code:
    https://github.com/riannevdberg/gc-mc
"""

import math

import numpy as np
import scipy.sparse as sp
import torch
import torch.nn as nn

from recbole.model.abstract_recommender import GeneralRecommender
from recbole.model.layers import SparseDropout
from recbole.utils import InputType


[docs]class GCMC(GeneralRecommender):
    r"""GCMC is a model that incorporate graph autoencoders for recommendation.

    Graph autoencoders are comprised of:

    1) a graph encoder model :math:`Z = f(X; A)`, which take as input an :math:`N \times D` feature matrix X and
    a graph adjacency matrix A, and produce an :math:`N \times E` node embedding matrix
    :math:`Z = [z_1^T,..., z_N^T ]^T`;

    2) a pairwise decoder model :math:`\hat A = g(Z)`, which takes pairs of node embeddings :math:`(z_i, z_j)` and
    predicts respective entries :math:`\hat A_{ij}` in the adjacency matrix.

    Note that :math:`N` denotes the number of nodes, :math:`D` the number of input features,
    and :math:`E` the embedding size.

    We implement the model following the original author with a pairwise training mode.
    """

    input_type = InputType.PAIRWISE

    def __init__(self, config, dataset):
        super(GCMC, self).__init__(config, dataset)

        # load dataset info
        self.num_all = self.n_users + self.n_items
        self.interaction_matrix = dataset.inter_matrix(form="coo").astype(
            np.float32
        )  # csr

        # load parameters info
        self.dropout_prob = config["dropout_prob"]
        self.sparse_feature = config["sparse_feature"]
        self.gcn_output_dim = config["gcn_output_dim"]
        self.dense_output_dim = config["embedding_size"]
        self.n_class = config["class_num"]
        self.num_basis_functions = config["num_basis_functions"]

        # generate node feature
        if self.sparse_feature:
            features = self.get_sparse_eye_mat(self.num_all)
            i = features._indices()
            v = features._values()
            self.user_features = torch.sparse.FloatTensor(
                i[:, : self.n_users],
                v[: self.n_users],
                torch.Size([self.n_users, self.num_all]),
            ).to(self.device)
            item_i = i[:, self.n_users :]
            item_i[0, :] = item_i[0, :] - self.n_users
            self.item_features = torch.sparse.FloatTensor(
                item_i, v[self.n_users :], torch.Size([self.n_items, self.num_all])
            ).to(self.device)
        else:
            features = torch.eye(self.num_all).to(self.device)
            self.user_features, self.item_features = torch.split(
                features, [self.n_users, self.n_items]
            )
        self.input_dim = self.user_features.shape[1]

        # adj matrices for each relation are stored in self.support
        self.Graph = self.get_norm_adj_mat().to(self.device)
        self.support = [self.Graph]

        # accumulation operation
        self.accum = config["accum"]
        if self.accum == "stack":
            div = self.gcn_output_dim // len(self.support)
            if self.gcn_output_dim % len(self.support) != 0:
                self.logger.warning(
                    "HIDDEN[0] (=%d) of stack layer is adjusted to %d (in %d splits)."
                    % (self.gcn_output_dim, len(self.support) * div, len(self.support))
                )
            self.gcn_output_dim = len(self.support) * div

        # define layers and loss
        self.GcEncoder = GcEncoder(
            accum=self.accum,
            num_user=self.n_users,
            num_item=self.n_items,
            support=self.support,
            input_dim=self.input_dim,
            gcn_output_dim=self.gcn_output_dim,
            dense_output_dim=self.dense_output_dim,
            drop_prob=self.dropout_prob,
            device=self.device,
            sparse_feature=self.sparse_feature,
        ).to(self.device)
        self.BiDecoder = BiDecoder(
            input_dim=self.dense_output_dim,
            output_dim=self.n_class,
            drop_prob=0.0,
            device=self.device,
            num_weights=self.num_basis_functions,
        ).to(self.device)
        self.loss_function = nn.CrossEntropyLoss()

[docs]    def get_sparse_eye_mat(self, num):
        r"""Get the normalized sparse eye matrix.

        Construct the sparse eye matrix as node feature.

        Args:
            num: the number of rows

        Returns:
            Sparse tensor of the normalized interaction matrix.
        """
        i = torch.LongTensor([range(0, num), range(0, num)])
        val = torch.FloatTensor([1] * num)
        return torch.sparse.FloatTensor(i, val)

[docs]    def get_norm_adj_mat(self):
        r"""Get the normalized interaction matrix of users and items.

        Construct the square matrix from the training data and normalize it
        using the laplace matrix.

        .. math::
            A_{hat} = D^{-0.5} \times A \times D^{-0.5}

        Returns:
            Sparse tensor of the normalized interaction matrix.
        """
        # build adj matrix
        A = sp.dok_matrix(
            (self.n_users + self.n_items, self.n_users + self.n_items), dtype=np.float32
        )
        inter_M = self.interaction_matrix
        inter_M_t = self.interaction_matrix.transpose()
        data_dict = dict(
            zip(zip(inter_M.row, inter_M.col + self.n_users), [1] * inter_M.nnz)
        )
        data_dict.update(
            dict(
                zip(
                    zip(inter_M_t.row + self.n_users, inter_M_t.col),
                    [1] * inter_M_t.nnz,
                )
            )
        )
        A._update(data_dict)
        # norm adj matrix
        sumArr = (A > 0).sum(axis=1)
        # add epsilon to avoid divide by zero Warning
        diag = np.array(sumArr.flatten())[0] + 1e-7
        diag = np.power(diag, -0.5)
        D = sp.diags(diag)
        L = D * A * D
        # covert norm_adj matrix to tensor
        L = sp.coo_matrix(L)
        row = L.row
        col = L.col
        i = torch.LongTensor([row, col])
        data = torch.FloatTensor(L.data)
        SparseL = torch.sparse.FloatTensor(i, data, torch.Size(L.shape))
        return SparseL

[docs]    def forward(self, user_X, item_X, user, item):
        # Graph autoencoders are comprised of a graph encoder model and a pairwise decoder model.
        user_embedding, item_embedding = self.GcEncoder(user_X, item_X)
        predict_score = self.BiDecoder(user_embedding, item_embedding, user, item)
        return predict_score

[docs]    def calculate_loss(self, interaction):
        user = interaction[self.USER_ID]
        pos_item = interaction[self.ITEM_ID]
        neg_item = interaction[self.NEG_ITEM_ID]

        users = torch.cat((user, user))
        items = torch.cat((pos_item, neg_item))

        user_X, item_X = self.user_features, self.item_features
        predict = self.forward(user_X, item_X, users, items)
        target = torch.zeros(len(user) * 2, dtype=torch.long).to(self.device)
        target[: len(user)] = 1

        loss = self.loss_function(predict, target)
        return loss

[docs]    def predict(self, interaction):
        user = interaction[self.USER_ID]
        item = interaction[self.ITEM_ID]

        user_X, item_X = self.user_features, self.item_features
        predict = self.forward(user_X, item_X, user, item)

        score = predict[:, 1]
        return score

[docs]    def full_sort_predict(self, interaction):
        user = interaction[self.USER_ID]

        user_X, item_X = self.user_features, self.item_features
        predict = self.forward(user_X, item_X, user, None)

        score = predict[:, 1]
        return score


[docs]class GcEncoder(nn.Module):
    r"""Graph Convolutional Encoder
    GcEncoder take as input an :math:`N \times D` feature matrix :math:`X` and a graph adjacency matrix :math:`A`,
    and produce an :math:`N \times E` node embedding matrix;
    Note that :math:`N` denotes the number of nodes, :math:`D` the number of input features,
    and :math:`E` the embedding size.
    """

    def __init__(
        self,
        accum,
        num_user,
        num_item,
        support,
        input_dim,
        gcn_output_dim,
        dense_output_dim,
        drop_prob,
        device,
        sparse_feature=True,
        act_dense=lambda x: x,
        share_user_item_weights=True,
        bias=False,
    ):
        super(GcEncoder, self).__init__()
        self.num_users = num_user
        self.num_items = num_item
        self.input_dim = input_dim
        self.gcn_output_dim = gcn_output_dim
        self.dense_output_dim = dense_output_dim
        self.accum = accum
        self.sparse_feature = sparse_feature

        self.device = device
        self.dropout_prob = drop_prob
        self.dropout = nn.Dropout(p=self.dropout_prob)
        if self.sparse_feature:
            self.sparse_dropout = SparseDropout(p=self.dropout_prob)
        else:
            self.sparse_dropout = nn.Dropout(p=self.dropout_prob)

        self.dense_activate = act_dense
        self.activate = nn.ReLU()
        self.share_weights = share_user_item_weights
        self.bias = bias

        self.support = support
        self.num_support = len(support)

        # gcn layer
        if self.accum == "sum":
            self.weights_u = nn.ParameterList(
                [
                    nn.Parameter(
                        torch.FloatTensor(self.input_dim, self.gcn_output_dim).to(
                            self.device
                        ),
                        requires_grad=True,
                    )
                    for _ in range(self.num_support)
                ]
            )
            if share_user_item_weights:
                self.weights_v = self.weights_u
            else:
                self.weights_v = nn.ParameterList(
                    [
                        nn.Parameter(
                            torch.FloatTensor(self.input_dim, self.gcn_output_dim).to(
                                self.device
                            ),
                            requires_grad=True,
                        )
                        for _ in range(self.num_support)
                    ]
                )
        else:
            assert (
                self.gcn_output_dim % self.num_support == 0
            ), "output_dim must be multiple of num_support for stackGC"
            self.sub_hidden_dim = self.gcn_output_dim // self.num_support

            self.weights_u = nn.ParameterList(
                [
                    nn.Parameter(
                        torch.FloatTensor(self.input_dim, self.sub_hidden_dim).to(
                            self.device
                        ),
                        requires_grad=True,
                    )
                    for _ in range(self.num_support)
                ]
            )
            if share_user_item_weights:
                self.weights_v = self.weights_u
            else:
                self.weights_v = nn.ParameterList(
                    [
                        nn.Parameter(
                            torch.FloatTensor(self.input_dim, self.sub_hidden_dim).to(
                                self.device
                            ),
                            requires_grad=True,
                        )
                        for _ in range(self.num_support)
                    ]
                )

        # dense layer
        self.dense_layer_u = nn.Linear(
            self.gcn_output_dim, self.dense_output_dim, bias=self.bias
        )
        if share_user_item_weights:
            self.dense_layer_v = self.dense_layer_u
        else:
            self.dense_layer_v = nn.Linear(
                self.gcn_output_dim, self.dense_output_dim, bias=self.bias
            )

        self._init_weights()

    def _init_weights(self):
        init_range = math.sqrt(
            (self.num_support + 1) / (self.input_dim + self.gcn_output_dim)
        )
        for w in range(self.num_support):
            self.weights_u[w].data.uniform_(-init_range, init_range)
        if not self.share_weights:
            for w in range(self.num_support):
                self.weights_v[w].data.uniform_(-init_range, init_range)

        dense_init_range = math.sqrt(
            (self.num_support + 1) / (self.dense_output_dim + self.gcn_output_dim)
        )
        self.dense_layer_u.weight.data.uniform_(-dense_init_range, dense_init_range)
        if not self.share_weights:
            self.dense_layer_v.weight.data.uniform_(-dense_init_range, dense_init_range)

        if self.bias:
            self.dense_layer_u.bias.data.fill_(0)
            if not self.share_weights:
                self.dense_layer_v.bias.data.fill_(0)

[docs]    def forward(self, user_X, item_X):
        # ----------------------------------------GCN layer----------------------------------------

        user_X = self.sparse_dropout(user_X)
        item_X = self.sparse_dropout(item_X)

        embeddings = []
        if self.accum == "sum":
            wu = 0.0
            wv = 0.0
            for i in range(self.num_support):
                # weight sharing
                wu = self.weights_u[i] + wu
                wv = self.weights_v[i] + wv

                # multiply feature matrices with weights
                if self.sparse_feature:
                    temp_u = torch.sparse.mm(user_X, wu)
                    temp_v = torch.sparse.mm(item_X, wv)
                else:
                    temp_u = torch.mm(user_X, wu)
                    temp_v = torch.mm(item_X, wv)
                all_embedding = torch.cat([temp_u, temp_v])

                # then multiply with adj matrices
                graph_A = self.support[i]
                all_emb = torch.sparse.mm(graph_A, all_embedding)
                embeddings.append(all_emb)

            embeddings = torch.stack(embeddings, dim=1)
            embeddings = torch.sum(embeddings, dim=1)
        else:
            for i in range(self.num_support):
                # multiply feature matrices with weights
                if self.sparse_feature:
                    temp_u = torch.sparse.mm(user_X, self.weights_u[i])
                    temp_v = torch.sparse.mm(item_X, self.weights_v[i])
                else:
                    temp_u = torch.mm(user_X, self.weights_u[i])
                    temp_v = torch.mm(item_X, self.weights_v[i])
                all_embedding = torch.cat([temp_u, temp_v])

                # then multiply with adj matrices
                graph_A = self.support[i]
                all_emb = torch.sparse.mm(graph_A, all_embedding)
                embeddings.append(all_emb)

            embeddings = torch.cat(embeddings, dim=1)

        users, items = torch.split(embeddings, [self.num_users, self.num_items])

        u_hidden = self.activate(users)
        v_hidden = self.activate(items)

        # ----------------------------------------Dense Layer----------------------------------------

        u_hidden = self.dropout(u_hidden)
        v_hidden = self.dropout(v_hidden)

        u_hidden = self.dense_layer_u(u_hidden)
        v_hidden = self.dense_layer_v(v_hidden)

        u_outputs = self.dense_activate(u_hidden)
        v_outputs = self.dense_activate(v_hidden)

        return u_outputs, v_outputs


[docs]class BiDecoder(nn.Module):
    """Bi-linear decoder
    BiDecoder takes pairs of node embeddings and predicts respective entries in the adjacency matrix.
    """

    def __init__(
        self, input_dim, output_dim, drop_prob, device, num_weights=3, act=lambda x: x
    ):
        super(BiDecoder, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_weights = num_weights
        self.device = device

        self.activate = act
        self.dropout_prob = drop_prob
        self.dropout = nn.Dropout(p=self.dropout_prob)

        self.weights = nn.ParameterList(
            [
                nn.Parameter(
                    orthogonal([self.input_dim, self.input_dim]).to(self.device)
                )
                for _ in range(self.num_weights)
            ]
        )
        self.dense_layer = nn.Linear(self.num_weights, self.output_dim, bias=False)
        self._init_weights()

    def _init_weights(self):
        dense_init_range = math.sqrt(
            self.output_dim / (self.num_weights + self.output_dim)
        )
        self.dense_layer.weight.data.uniform_(-dense_init_range, dense_init_range)

[docs]    def forward(self, u_inputs, i_inputs, users, items=None):
        u_inputs = self.dropout(u_inputs)
        i_inputs = self.dropout(i_inputs)

        if items is not None:
            users_emb = u_inputs[users]
            items_emb = i_inputs[items]

            basis_outputs = []
            for i in range(self.num_weights):
                users_emb_temp = torch.mm(users_emb, self.weights[i])
                scores = torch.mul(users_emb_temp, items_emb)
                scores = torch.sum(scores, dim=1)
                basis_outputs.append(scores)
        else:
            users_emb = u_inputs[users]
            items_emb = i_inputs

            basis_outputs = []
            for i in range(self.num_weights):
                users_emb_temp = torch.mm(users_emb, self.weights[i])
                scores = torch.mm(users_emb_temp, items_emb.transpose(0, 1))
                basis_outputs.append(scores.view(-1))

        basis_outputs = torch.stack(basis_outputs, dim=1)
        basis_outputs = self.dense_layer(basis_outputs)
        output = self.activate(basis_outputs)

        return output


[docs]def orthogonal(shape, scale=1.1):
    """
    Initialization function for weights in class GCMC.
    From Lasagne. Reference: Saxe et al., http://arxiv.org/abs/1312.6120
    """
    flat_shape = (shape[0], np.prod(shape[1:]))
    a = np.random.normal(0.0, 1.0, flat_shape)
    u, _, v = np.linalg.svd(a, full_matrices=False)

    # pick the one with the correct shape
    q = u if u.shape == flat_shape else v
    q = q.reshape(shape)
    return torch.tensor(scale * q[: shape[0], : shape[1]], dtype=torch.float32)