Source code for oddt.metrics

"""Metrics for estimating performance of drug discovery methods implemented in
ODDT"""

from __future__ import division

from math import ceil
import numpy as np
from scipy.stats import linregress
from sklearn.metrics import roc_curve as roc, auc, mean_squared_error

__all__ = ['roc', 'auc', 'roc_auc', 'roc_log_auc', 'enrichment_factor',
           'random_roc_log_auc', 'rmse', 'rie', 'bedroc']


[docs]def roc_auc(y_true, y_score, pos_label=None, ascending_score=True): """Computes ROC AUC score Parameters ---------- y_true : array, shape=[n_samples] True binary labels, in range {0,1} or {-1,1}. If positive label is different than 1, it must be explicitly defined. y_score : array, shape=[n_samples] Scores for tested series of samples pos_label: int Positive label of samples (if other than 1) ascending_score: bool (default=True) Indicates if your score is ascendig. Ascending score icreases with deacreasing activity. In other words it ascends on ranking list (where actives are on top). Returns ------- roc_auc : float ROC AUC in range 0:1 """ if ascending_score: y_score = -y_score fpr, tpr, tresholds = roc(y_true, y_score, pos_label=pos_label) return auc(fpr, tpr)
[docs]def rmse(y_true, y_pred): """Compute Root Mean Squared Error (RMSE) Parameters ---------- y_true : array-like of shape = [n_samples] or [n_samples, n_outputs] Ground truth (correct) target values. y_pred : array-like of shape = [n_samples] or [n_samples, n_outputs] Estimated target values. Returns ------- rmse : float A positive floating point value (the best value is 0.0). """ return np.sqrt(mean_squared_error(y_true, y_pred))
[docs]def enrichment_factor(y_true, y_score, percentage=1, pos_label=None, kind='fold'): """Computes enrichment factor for given percentage, i.e. EF_1% is enrichment factor for first percent of given samples. This function assumes that results are already sorted and samples with best predictions are first. Parameters ---------- y_true : array, shape=[n_samples] True binary labels, in range {0,1} or {-1,1}. If positive label is different than 1, it must be explicitly defined. y_score : array, shape=[n_samples] Scores for tested series of samples percentage : int or float The percentage for which EF is being calculated pos_label: int Positive label of samples (if other than 1) kind: 'fold' or 'percentage' (default='fold') Two kinds of enrichment factor: fold and percentage. Fold shows the increase over random distribution (1 is random, the higher EF the better enrichment). Percentage returns the fraction of positive labels within the top x% of dataset. Returns ------- ef : float Enrichment Factor for given percenage in range 0:1 """ if pos_label is None: pos_label = 1 labels = y_true == pos_label assert labels.sum() > 0, "There are no correct predicions. Double-check the pos_label" assert len(labels) > 0, "Sample size must be greater than 0" # calculate fraction of positve labels n_perc = int(ceil(percentage / 100. * len(labels))) out = labels[:n_perc].sum() / n_perc if kind == 'fold': out /= (labels.sum() / len(labels)) return out
[docs]def roc_log_auc(y_true, y_score, pos_label=None, ascending_score=True, log_min=0.001, log_max=1.): """Computes area under semi-log ROC. Parameters ---------- y_true : array, shape=[n_samples] True binary labels, in range {0,1} or {-1,1}. If positive label is different than 1, it must be explicitly defined. y_score : array, shape=[n_samples] Scores for tested series of samples pos_label: int Positive label of samples (if other than 1) ascending_score: bool (default=True) Indicates if your score is ascendig. Ascending score icreases with deacreasing activity. In other words it ascends on ranking list (where actives are on top). log_min : float (default=0.001) Minimum value for estimating AUC. Lower values will be clipped for numerical stability. log_max : float (default=1.) Maximum value for estimating AUC. Higher values will be ignored. Returns ------- auc : float semi-log ROC AUC """ if ascending_score: y_score = -y_score fpr, tpr, t = roc(y_true, y_score, pos_label=pos_label) fpr = fpr.clip(log_min) idx = (fpr <= log_max) log_fpr = 1 - np.log10(fpr[idx]) / np.log10(log_min) return auc(log_fpr, tpr[idx])
[docs]def random_roc_log_auc(log_min=0.001, log_max=1.): """Computes area under semi-log ROC for random distribution. Parameters ---------- log_min : float (default=0.001) Minimum logarithm value for estimating AUC log_max : float (default=1.) Maximum logarithm value for estimating AUC. Returns ------- auc : float semi-log ROC AUC for random distribution """ return (log_max - log_min) / (np.log(10) * np.log10(log_max / log_min))
def standard_deviation_error(y_true, y_pred): """Standard Deviation (SD) error implemented as used by Li et. al for CASF-2013 (http://dx.doi.org/10.1021/ci500081m). Parameters ---------- y_true : array-like True values. y_pred : array-like Prediction to be scored. Returns ------- sd : float semi-log ROC AUC for random distribution """ y_true = np.atleast_1d(y_true).flatten() y_pred = np.atleast_1d(y_pred).flatten() A, B = linregress(y_pred, y_true)[:2] # y = Ax + B y_ = A * y_pred + B sd = (((y_true - y_) ** 2).sum() / (len(y_pred) - 1)) ** 0.5 return sd
[docs]def rie(y_true, y_score, alpha=20, pos_label=None): """Computes Robust Initial Enhancement [1]_. This function assumes that results are already sorted and samples with best predictions are first. Parameters ---------- y_true : array, shape=[n_samples] True binary labels, in range {0,1} or {-1,1}. If positive label is different than 1, it must be explicitly defined. y_score : array, shape=[n_samples] Scores for tested series of samples alpha: float Alpha. 1/Alpha should be proportional to the percentage in EF. pos_label: int Positive label of samples (if other than 1) Returns ------- rie_score : float Robust Initial Enhancement References ---------- .. [1] Sheridan, R. P.; Singh, S. B.; Fluder, E. M.; Kearsley, S. K. Protocols for bridging the peptide to nonpeptide gap in topological similarity searches. J. Chem. Inf. Comput. Sci. 2001, 41, 1395-1406. DOI: 10.1021/ci0100144 """ if pos_label is None: pos_label = 1 labels = y_true == pos_label N = len(labels) ra = labels.sum() / N ranks = np.argwhere(labels).astype(float) + 1 # need 1-based ranking observed = np.exp(-alpha * ranks / N).sum() expected = (ra * (1 - np.exp(-alpha)) / (np.exp(alpha / N) - 1)) rie_score = observed / expected return rie_score
[docs]def bedroc(y_true, y_score, alpha=20., pos_label=None): """Computes Boltzmann-Enhanced Discrimination of Receiver Operating Characteristic [1]_. This function assumes that results are already sorted and samples with best predictions are first. Parameters ---------- y_true : array, shape=[n_samples] True binary labels, in range {0,1} or {-1,1}. If positive label is different than 1, it must be explicitly defined. y_score : array, shape=[n_samples] Scores for tested series of samples alpha: float Alpha. 1/Alpha should be proportional to the percentage in EF. pos_label: int Positive label of samples (if other than 1) Returns ------- bedroc_score : float Boltzmann-Enhanced Discrimination of Receiver Operating Characteristic References ---------- .. [1] Truchon J-F, Bayly CI. Evaluating virtual screening methods: good and bad metrics for the "early recognition" problem. J Chem Inf Model. 2007;47: 488-508. DOI: 10.1021/ci600426e """ if pos_label is None: pos_label = 1 labels = y_true == pos_label ra = labels.sum() / len(labels) ri = 1 - ra rie_score = rie(y_true, y_score, alpha=alpha, pos_label=pos_label) bedroc_score = (rie_score * ra * np.sinh(alpha / 2) / (np.cosh(alpha / 2) - np.cosh(alpha / 2 - alpha * ra)) + 1 / (1 - np.exp(alpha * ri))) return bedroc_score