Source code for recbole.evaluator.utils

# -*- encoding: utf-8 -*-
# @Time    :   2020/08/04
# @Author  :   Kaiyuan Li
# @email   :   tsotfsk@outlook.com

# UPDATE
# @Time    :   2020/09/28, 2020/08/09
# @Author  :   Kaiyuan Li, Zhichao Feng
# @email   :   tsotfsk@outlook.com, fzcbupt@gmail.com

"""
recbole.evaluator.utils
################################
"""

import itertools

import numpy as np
import torch


[docs]def pad_sequence(sequences, len_list, pad_to=None, padding_value=0): """pad sequences to a matrix Args: sequences (list): list of variable length sequences. len_list (list): the length of the tensors in the sequences pad_to (int, optional): if pad_to is not None, the sequences will pad to the length you set, else the sequence will pad to the max length of the sequences. padding_value (int, optional): value for padded elements. Default: 0. Returns: torch.Tensor: [seq_num, max_len] or [seq_num, pad_to] """ max_len = np.max(len_list) if pad_to is None else pad_to min_len = np.min(len_list) device = sequences[0].device if max_len == min_len: result = torch.cat(sequences, dim=0).view(-1, max_len) else: extra_len_list = np.subtract(max_len, len_list).tolist() padding_nums = max_len * len(len_list) - np.sum(len_list) padding_tensor = torch.tensor([-np.inf], device=device).repeat(padding_nums) padding_list = torch.split(padding_tensor, extra_len_list) result = list(itertools.chain.from_iterable(zip(sequences, padding_list))) result = torch.cat(result) return result.view(-1, max_len)
[docs]def trunc(scores, method): """Round the scores by using the given method Args: scores (numpy.ndarray): scores method (str): one of ['ceil', 'floor', 'around'] Raises: NotImplementedError: method error Returns: numpy.ndarray: processed scores """ try: cut_method = getattr(np, method) except NotImplementedError: raise NotImplementedError( "module 'numpy' has no function named '{}'".format(method) ) scores = cut_method(scores) return scores
[docs]def cutoff(scores, threshold): """cut of the scores based on threshold Args: scores (numpy.ndarray): scores threshold (float): between 0 and 1 Returns: numpy.ndarray: processed scores """ return np.where(scores > threshold, 1, 0)
def _binary_clf_curve(trues, preds): """Calculate true and false positives per binary classification threshold Args: trues (numpy.ndarray): the true scores' list preds (numpy.ndarray): the predict scores' list Returns: fps (numpy.ndarray): A count of false positives, at index i being the number of negative samples assigned a score >= thresholds[i] preds (numpy.ndarray): An increasing count of true positives, at index i being the number of positive samples assigned a score >= thresholds[i]. Note: To improve efficiency, we referred to the source code(which is available at sklearn.metrics.roc_curve) in SkLearn and made some optimizations. """ trues = trues == 1 desc_idxs = np.argsort(preds)[::-1] preds = preds[desc_idxs] trues = trues[desc_idxs] unique_val_idxs = np.where(np.diff(preds))[0] threshold_idxs = np.r_[unique_val_idxs, trues.size - 1] tps = np.cumsum(trues)[threshold_idxs] fps = 1 + threshold_idxs - tps return fps, tps