add metrics

KiddoZhu · KiddoZhu · commit 7b17e4de0261 · 2022-06-03T22:45:03.000-04:00
diff --git a/torchdrug/metrics/__init__.py b/torchdrug/metrics/__init__.py
@@ -1,12 +1,13 @@
 from .metric import area_under_roc, area_under_prc, r2, QED, logP, penalized_logP, SA, chemical_validity, \
-    variadic_accuracy
+    accuracy, matthews_corrcoef, pearsonr, spearmanr, variadic_accuracy
 
 # alias
 AUROC = area_under_roc
 AUPRC = area_under_prc
 
 __all__ = [
     "area_under_roc", "area_under_prc", "r2", "QED", "logP", "penalized_logP", "SA", "chemical_validity",
+    "accuracy", "matthews_corrcoef", "pearsonr", "spearmanr",
     "variadic_accuracy",
     "AUROC", "AUPRC",
-]
+]
diff --git a/torchdrug/metrics/metric.py b/torchdrug/metrics/metric.py
@@ -1,12 +1,9 @@
-import os
-import sys
-
 import torch
 from torch.nn import functional as F
-from torch_scatter import scatter_max
+from torch_scatter import scatter_add, scatter_mean, scatter_max
 import networkx as nx
 from rdkit import Chem
-from rdkit.Chem import RDConfig, Descriptors
+from rdkit.Chem import Descriptors
 
 from torchdrug import utils
 from torchdrug.layers import functional
@@ -23,6 +20,8 @@ def area_under_roc(pred, target):
         pred (Tensor): predictions of shape :math:`(n,)`
         target (Tensor): binary targets of shape :math:`(n,)`
     """
+    if target.dtype != torch.long:
+        raise TypeError("Expect `target` to be torch.long, but found %s" % target.dtype)
     order = pred.argsort(descending=True)
     target = target[order]
     hit = target.cumsum(0)
@@ -40,6 +39,8 @@ def area_under_prc(pred, target):
         pred (Tensor): predictions of shape :math:`(n,)`
         target (Tensor): binary targets of shape :math:`(n,)`
     """
+    if target.dtype != torch.long:
+        raise TypeError("Expect `target` to be torch.long, but found %s" % target.dtype)
     order = pred.argsort(descending=True)
     target = target[order]
     precision = target.cumsum(0) / torch.arange(1, len(target) + 1, device=target.device)
@@ -178,13 +179,103 @@ def chemical_validity(pred):
         validity.append(1 if mol else 0)
 
     return torch.tensor(validity, dtype=torch.float, device=pred.device)
+    
+
+@R.register("metrics.accuracy")
+def accuracy(pred, target):
+    """
+    Compute classification accuracy over sets with equal size.
+
+    Suppose there are :math:`N` sets and :math:`C` categories.
+
+    Parameters:
+        pred (Tensor): prediction of shape :math:`(N, C)`
+        target (Tensor): target of shape :math:`(N,)`
+    """
+    return (pred.argmax(dim=-1) == target).float().mean()
+
+
+@R.register("metrics.mcc")
+def matthews_corrcoef(pred, target, eps=1e-6):
+    """
+    Matthews correlation coefficient between target and prediction.
+
+    Definition follows matthews_corrcoef for K classes in sklearn.
+    For details, see: 'https://scikit-learn.org/stable/modules/model_evaluation.html#matthews-corrcoef'
+
+    Parameters:
+        pred (Tensor): prediction of shape :math: `(N,)`
+        target (Tensor): target of shape :math: `(N,)`
+    """
+    num_class = pred.size(-1)
+    pred = pred.argmax(-1)
+    ones = torch.ones(len(target), device=pred.device)
+    confusion_matrix = scatter_add(ones, target * num_class + pred, dim=0, dim_size=num_class ** 2)
+    confusion_matrix = confusion_matrix.view(num_class, num_class)
+    t = confusion_matrix.sum(dim=1)
+    p = confusion_matrix.sum(dim=0)
+    c = confusion_matrix.trace()
+    s = confusion_matrix.sum()
+    return (c * s - t @ p) / ((s * s - p @ p) * (s * s - t @ t) + eps).sqrt()
+
+
+@R.register("metrics.pearsonr")
+def pearsonr(pred, target):
+    """
+    Pearson correlation between target and prediction.
+    Mimics `scipy.stats.pearsonr`.
+
+    Parameters:
+        pred (Tensor): prediction of shape :math: `(N,)`
+        target (Tensor): target of shape :math: `(N,)`
+    """
+    pred_mean = pred.float().mean()
+    target_mean = target.float().mean()
+    pred_centered = pred - pred_mean
+    target_centered = target - target_mean
+    pred_normalized = pred_centered / pred_centered.norm(2)
+    target_normalized = target_centered / target_centered.norm(2)
+    pearsonr = pred_normalized @ target_normalized
+    return pearsonr
+
+
+@R.register("metrics.spearmanr")
+def spearmanr(pred, target, eps=1e-6):
+    """
+    Spearman correlation between target and prediction.
+    Implement in PyTorch, but non-diffierentiable. (validation metric only)
+
+    Parameters:
+        pred (Tensor): prediction of shape :math: `(N,)`
+        target (Tensor): target of shape :math: `(N,)`
+    """
+
+    def get_ranking(input):
+        input_set, input_inverse = input.unique(return_inverse=True)
+        order = input_inverse.argsort()
+        ranking = torch.zeros(len(input_inverse), device=input.device)
+        ranking[order] = torch.arange(1, len(input) + 1, dtype=torch.float, device=input.device)
+
+        # for elements that have the same value, replace their rankings with the mean of their rankings
+        mean_ranking = scatter_mean(ranking, input_inverse, dim=0, dim_size=len(input_set))
+        ranking = mean_ranking[input_inverse]
+        return ranking
+
+    pred = get_ranking(pred)
+    target = get_ranking(target)
+    covariance = (pred * target).mean() - pred.mean() * target.mean()
+    pred_std = pred.std(unbiased=False)
+    target_std = target.std(unbiased=False)
+    spearmanr = covariance / (pred_std * target_std + eps)
+    return spearmanr
 
 
+@R.register("metrics.variadic_accuracy")
 def variadic_accuracy(input, target, size):
     """
     Compute classification accuracy over variadic sizes of categories.
 
-    Suppose there are :math:`N` samples, and the number of categories in all samples is summed to :math`B`.
+    Suppose there are :math:`N` samples, and the number of categories in all samples is summed to :math:`B`.
 
     Parameters:
         input (Tensor): prediction of shape :math:`(B,)`
@@ -196,4 +287,4 @@ def variadic_accuracy(input, target, size):
     input_class = scatter_max(input, index2graph)[1]
     target_index = target + size.cumsum(0) - size
     accuracy = (input_class == target_index).float()
-    return accuracy
+    return accuracy
diff --git a/torchdrug/tasks/__init__.py b/torchdrug/tasks/__init__.py
@@ -19,6 +19,7 @@
     "mse": "mean squared error",
     "rmse": "root mean squared error",
     "acc": "accuracy",
+    "mcc": "matthews correlation coefficient",
 }
 
 

Original file line number	Diff line number	Diff line change
`@@ -19,6 +19,7 @@`
`19`	`19`	`"mse": "mean squared error",`
`20`	`20`	`"rmse": "root mean squared error",`
`21`	`21`	`"acc": "accuracy",`
	`22`	`+ "mcc": "matthews correlation coefficient",`
`22`	`23`	`}`
`23`	`24`
`24`	`25`