Source code for GOOD.utils.evaluation

r"""
Evaluation: model evaluation functions.
"""

import numpy as np
import torch

from typing import List, Tuple
from GOOD.utils.config_reader import Union, CommonArgs, Munch


[docs]def eval_data_preprocess(y: torch.Tensor, raw_pred: torch.Tensor, mask: torch.Tensor, config: Union[CommonArgs, Munch] ) -> Tuple[Union[np.ndarray, List], Union[np.ndarray, List]]: r""" Preprocess data for evaluations by converting data into np.ndarray or List[np.ndarray] (Multi-task) format. When the task of the dataset is not multi-task, data is converted into np.ndarray. When it is multi-task, data is converted into List[np.ndarray] in which each np.ndarray in the list represents one task. For example, GOOD-PCBA is a 128-task binary classification dataset. Therefore, the output list will contain 128 elements. Args: y (torch.Tensor): Ground truth values. raw_pred (torch.Tensor): Raw prediction values without softmax or sigmoid. mask (torch.Tensor): Ground truth NAN mask for removing empty label. config (Union[CommonArgs, Munch]): The required config is ``config.metric.dataset_task`` Returns: Processed prediction values and ground truth values. """ if config.metric.dataset_task == 'Binary classification': pred_prob = raw_pred.sigmoid() if y.shape[1] > 1: # multi-task preds = [] targets = [] for i in range(y.shape[1]): # pred and target per task preds.append(pred_prob[:, i][mask[:, i]].detach().cpu().numpy()) targets.append(y[:, i][mask[:, i]].detach().cpu().numpy()) return preds, targets pred = pred_prob[mask].reshape(-1).detach().cpu().numpy() elif config.metric.dataset_task == 'Multi-label classification': pred_prob = raw_pred.softmax(dim=1) pred = pred_prob[mask].detach().cpu().numpy() elif 'Regression' in config.metric.dataset_task: pred = raw_pred[mask].reshape(-1).detach().cpu().numpy() else: raise ValueError('Dataset task value error.') target = y[mask].reshape(-1).detach().cpu().numpy() return pred, target
[docs]def eval_score(pred_all: Union[List[np.ndarray], List[List[np.ndarray]]], target_all: Union[List[np.ndarray], List[List[np.ndarray]]], config: Union[CommonArgs, Munch] ) -> Union[np.ndarray, float, float]: r""" Calculate metric scores given preprocessed prediction values and ground truth values. Args: pred_all (Union[List[np.ndarray], List[List[np.ndarray]]]): Prediction value list. It is a list of output pred of :func:`eval_data_preprocess`. target_all (Union[List[np.ndarray], List[List[np.ndarray]]]): Ground truth value list. It is a list of output target of :func:`eval_data_preprocess`. config (Union[CommonArgs, Munch]): The required config is ``config.metric.score_func`` that is a function for score calculation (*e.g.*, :func:`GOOD.utils.metric.Metric.acc`). Returns: A float score value. """ np.seterr(invalid='ignore') assert type(pred_all) is list, 'Wrong prediction input.' if type(pred_all[0]) is list: # multi-task all_task_preds = [] all_task_targets = [] for task_i in range(len(pred_all[0])): preds = [] targets = [] for pred, target in zip(pred_all, target_all): preds.append(pred[task_i]) targets.append(target[task_i]) all_task_preds.append(np.concatenate(preds)) all_task_targets.append(np.concatenate(targets)) scores = [] for i in range(len(all_task_preds)): if all_task_targets[i].shape[0] > 0: scores.append(np.nanmean(config.metric.score_func(all_task_targets[i], all_task_preds[i]))) score = np.nanmean(scores) else: pred_all = np.concatenate(pred_all) target_all = np.concatenate(target_all) score = np.nanmean(config.metric.score_func(target_all, pred_all)) return score