# -*- encoding: utf-8 -*-
# @Time : 2020/08/04
# @Author : Kaiyuan Li
# @email : tsotfsk@outlook.com
# UPDATE
# @Time : 2021/01/07, 2020/08/11, 2020/12/18
# @Author : Kaiyuan Li, Yupeng Hou, Zhichao Feng
# @email : tsotfsk@outlook.com, houyupeng@ruc.edu.cn, fzcbupt@gmail.com
"""
recbole.evaluator.evaluators
#####################################
"""
from collections import ChainMap
import numpy as np
import torch
from recbole.evaluator.abstract_evaluator import GroupedEvaluator, IndividualEvaluator
from recbole.evaluator.metrics import metrics_dict
# These metrics are typical in topk recommendations
topk_metrics = {metric.lower(): metric for metric in ['Hit', 'Recall', 'MRR', 'Precision', 'NDCG', 'MAP']}
# These metrics are typical in loss recommendations
loss_metrics = {metric.lower(): metric for metric in ['AUC', 'RMSE', 'MAE', 'LOGLOSS']}
# For GAUC
rank_metrics = {metric.lower(): metric for metric in ['GAUC']}
# group-based metrics
group_metrics = ChainMap(topk_metrics, rank_metrics)
# not group-based metrics
individual_metrics = ChainMap(loss_metrics)
[docs]class TopKEvaluator(GroupedEvaluator):
r"""TopK Evaluator is mainly used in ranking tasks. Now, we support six topk metrics which
contain `'Hit', 'Recall', 'MRR', 'Precision', 'NDCG', 'MAP'`.
Note:
The metrics used calculate group-based metrics which considers the metrics scores averaged
across users. Some of them are also limited to k.
"""
def __init__(self, config, metrics):
super().__init__(config, metrics)
self.topk = config['topk']
self._check_args()
[docs] def collect(self, interaction, scores_tensor):
"""collect the topk intermediate result of one batch, this function mainly
implements padding and TopK finding. It is called at the end of each batch
Args:
interaction (Interaction): :class:`AbstractEvaluator` of the batch
scores_tensor (tensor): the tensor of model output with size of `(N, )`
Returns:
torch.Tensor : a matrix contain topk matrix and shape matrix
"""
user_len_list = interaction.user_len_list
scores_matrix = self.get_score_matrix(scores_tensor, user_len_list)
scores_matrix = torch.flip(scores_matrix, dims=[-1])
shape_matrix = torch.full((len(user_len_list), 1), scores_matrix.shape[1], device=scores_matrix.device)
# get topk
_, topk_idx = torch.topk(scores_matrix, max(self.topk), dim=-1) # n_users x k
# pack top_idx and shape_matrix
result = torch.cat((topk_idx, shape_matrix), dim=1)
return result
[docs] def evaluate(self, batch_matrix_list, eval_data):
"""calculate the metrics of all batches. It is called at the end of each epoch
Args:
batch_matrix_list (list): the results of all batches
eval_data (Dataset): the class of test data
Returns:
dict: such as ``{'Hit@20': 0.3824, 'Recall@20': 0.0527, 'Hit@10': 0.3153, 'Recall@10': 0.0329}``
"""
pos_len_list = eval_data.get_pos_len_list()
batch_result = torch.cat(batch_matrix_list, dim=0).cpu().numpy()
# unpack top_idx and shape_matrix
topk_idx = batch_result[:, :-1]
shapes = batch_result[:, -1]
assert len(pos_len_list) == len(topk_idx)
# get metrics
metric_dict = {}
result_list = self._calculate_metrics(pos_len_list, topk_idx, shapes)
for metric, value in zip(self.metrics, result_list):
for k in self.topk:
key = '{}@{}'.format(metric, k)
metric_dict[key] = round(value[k - 1], self.precision)
return metric_dict
def _check_args(self):
# Check topk:
if isinstance(self.topk, (int, list)):
if isinstance(self.topk, int):
self.topk = [self.topk]
for topk in self.topk:
if topk <= 0:
raise ValueError(
'topk must be a positive integer or a list of positive integers, '
'but get `{}`'.format(topk)
)
else:
raise TypeError('The topk must be a integer, list')
def _calculate_metrics(self, pos_len_list, topk_idx, shapes):
"""integrate the results of each batch and evaluate the topk metrics by users
Args:
pos_len_list (numpy.ndarray): a list of users' positive items
topk_idx (numpy.ndarray): a matrix which contains the index of the topk items for users
shapes (numpy.ndarray): a list which contains the columns of the padded batch matrix
Returns:
numpy.ndarray: a matrix which contains the metrics result
"""
pos_idx_matrix = (topk_idx >= (shapes - pos_len_list).reshape(-1, 1))
result_list = []
for metric in self.metrics:
metric_fuc = metrics_dict[metric.lower()]
result = metric_fuc(pos_idx_matrix, pos_len_list)
result_list.append(result) # n_users x len(metrics) x len(ranks)
result = np.stack(result_list, axis=0).mean(axis=1) # len(metrics) x len(ranks)
return result
def __str__(self):
msg = 'The TopK Evaluator Info:\n' + \
'\tMetrics:[' + \
', '.join([topk_metrics[metric.lower()] for metric in self.metrics]) + \
'], TopK:[' + \
', '.join(map(str, self.topk)) + \
']'
return msg
[docs]class RankEvaluator(GroupedEvaluator):
r"""Rank Evaluator is mainly used in ranking tasks except for topk tasks. Now, we support one
rank metric containing `'GAUC'`.
Note:
The metrics used calculate group-based metrics which considers the metrics scores averaged
across users except for top-k metrics.
"""
def __init__(self, config, metrics):
super().__init__(config, metrics)
pass
[docs] def get_user_pos_len_list(self, interaction, scores_tensor):
"""get number of positive items and all items in test set of each user
Args:
interaction (Interaction): :class:`AbstractEvaluator` of the batch
scores_tensor (tensor): the tensor of model output with size of `(N, )`
Returns:
list: number of positive items,
list: number of all items
"""
pos_len_list = torch.Tensor(interaction.pos_len_list).to(scores_tensor.device)
user_len_list = interaction.user_len_list
return pos_len_list, user_len_list
[docs] def average_rank(self, scores):
"""Get the ranking of an ordered tensor, and take the average of the ranking for positions with equal values.
Args:
scores(tensor): an ordered tensor, with size of `(N, )`
Returns:
torch.Tensor: average_rank
Example:
>>> average_rank(tensor([[1,2,2,2,3,3,6],[2,2,2,2,4,5,5]]))
tensor([[1.0000, 3.0000, 3.0000, 3.0000, 5.5000, 5.5000, 7.0000],
[2.5000, 2.5000, 2.5000, 2.5000, 5.0000, 6.5000, 6.5000]])
Reference:
https://github.com/scipy/scipy/blob/v0.17.1/scipy/stats/stats.py#L5262-L5352
"""
length, width = scores.shape
device = scores.device
true_tensor = torch.full((length, 1), True, dtype=torch.bool, device=device)
obs = torch.cat([true_tensor, scores[:, 1:] != scores[:, :-1]], dim=1)
# bias added to dense
bias = torch.arange(0, length, device=device).repeat(width).reshape(width, -1). \
transpose(1, 0).reshape(-1)
dense = obs.view(-1).cumsum(0) + bias
# cumulative counts of each unique value
count = torch.where(torch.cat([obs, true_tensor], dim=1))[1]
# get average rank
avg_rank = .5 * (count[dense] + count[dense - 1] + 1).view(length, -1)
return avg_rank
[docs] def collect(self, interaction, scores_tensor):
"""collect the rank intermediate result of one batch, this function mainly implements ranking
and calculating the sum of rank for positive items. It is called at the end of each batch
Args:
interaction (Interaction): :class:`AbstractEvaluator` of the batch
scores_tensor (tensor): the tensor of model output with size of `(N, )`
"""
pos_len_list, user_len_list = self.get_user_pos_len_list(interaction, scores_tensor)
scores_matrix = self.get_score_matrix(scores_tensor, user_len_list)
desc_scores, desc_index = torch.sort(scores_matrix, dim=-1, descending=True)
# get the index of positive items in the ranking list
pos_index = (desc_index < pos_len_list.reshape(-1, 1))
avg_rank = self.average_rank(desc_scores)
pos_rank_sum = torch.where(pos_index, avg_rank, torch.zeros_like(avg_rank)).sum(axis=-1).reshape(-1, 1)
return pos_rank_sum
[docs] def evaluate(self, batch_matrix_list, eval_data):
"""calculate the metrics of all batches. It is called at the end of each epoch
Args:
batch_matrix_list (list): the results of all batches
eval_data (Dataset): the class of test data
Returns:
dict: such as ``{'GAUC': 0.9286}``
"""
pos_len_list = eval_data.get_pos_len_list()
user_len_list = eval_data.get_user_len_list()
pos_rank_sum = torch.cat(batch_matrix_list, dim=0).cpu().numpy()
assert len(pos_len_list) == len(pos_rank_sum)
# get metrics
metric_dict = {}
result_list = self._calculate_metrics(user_len_list, pos_len_list, pos_rank_sum)
for metric, value in zip(self.metrics, result_list):
key = '{}'.format(metric)
metric_dict[key] = round(value, self.precision)
return metric_dict
def _calculate_metrics(self, user_len_list, pos_len_list, pos_rank_sum):
"""integrate the results of each batch and evaluate the topk metrics by users
Args:
pos_len_list (numpy.ndarray): a list of users' positive items
topk_idx (numpy.ndarray): a matrix which contains the index of the topk items for users
Returns:
numpy.ndarray: a matrix which contains the metrics result
"""
result_list = []
for metric in self.metrics:
metric_fuc = metrics_dict[metric.lower()]
result = metric_fuc(user_len_list, pos_len_list, pos_rank_sum)
result_list.append(result)
return result_list
def __str__(self):
msg = 'The Rank Evaluator Info:\n' + \
'\tMetrics:[' + \
', '.join([rank_metrics[metric.lower()] for metric in self.metrics]) + \
']'
return msg
[docs]class LossEvaluator(IndividualEvaluator):
r"""Loss Evaluator is mainly used in rating prediction and click through rate prediction. Now, we support four
loss metrics which contain `'AUC', 'RMSE', 'MAE', 'LOGLOSS'`.
Note:
The metrics used do not calculate group-based metrics which considers the metrics scores averaged
across users. They are also not limited to k. Instead, they calculate the scores on the entire
prediction results regardless the users.
"""
def __init__(self, config, metrics):
super().__init__(config, metrics)
self.label_field = config['LABEL_FIELD']
[docs] def collect(self, interaction, pred_scores):
"""collect the loss intermediate result of one batch, this function mainly
implements concatenating preds and trues. It is called at the end of each batch
Args:
interaction (Interaction): :class:`AbstractEvaluator` of the batch
pred_scores (tensor): the tensor of model output with a size of `(N, )`
Returns:
tensor : a batch of scores with a size of `(N, 2)`
"""
true_scores = interaction[self.label_field].to(pred_scores.device)
assert len(true_scores) == len(pred_scores)
return self.get_score_matrix(true_scores, pred_scores)
[docs] def evaluate(self, batch_matrix_list, *args):
"""calculate the metrics of all batches. It is called at the end of each epoch
Args:
batch_matrix_list (list): the results of all batches
Returns:
dict: such as {'AUC': 0.83}
"""
concat = torch.cat(batch_matrix_list, dim=0).cpu().numpy()
trues = concat[:, 0]
preds = concat[:, 1]
# get metrics
metric_dict = {}
result_list = self._calculate_metrics(trues, preds)
for metric, value in zip(self.metrics, result_list):
key = '{}'.format(metric)
metric_dict[key] = round(value, self.precision)
return metric_dict
def _calculate_metrics(self, trues, preds):
"""get metrics result
Args:
trues (numpy.ndarray): the true scores' list
preds (numpy.ndarray): the predict scores' list
Returns:
list: a list of metrics result
"""
result_list = []
for metric in self.metrics:
metric_fuc = metrics_dict[metric.lower()]
result = metric_fuc(trues, preds)
result_list.append(result)
return result_list
def __str__(self):
msg = 'The Loss Evaluator Info:\n' + \
'\tMetrics:[' + \
', '.join([loss_metrics[metric.lower()] for metric in self.metrics]) + \
']'
return msg
metric_eval_bind = [(topk_metrics, TopKEvaluator), (loss_metrics, LossEvaluator), (rank_metrics, RankEvaluator)]