# -*- coding: utf-8 -*-

Classification metrics.

The metrics in this module assume the link prediction setting to be a (binary) classification of individual triples.

.. note ::
    many metrics in this module use `scikit-learn` under the hood, cf.

# see also:

from __future__ import annotations

import abc
import math
import warnings
from typing import ClassVar, Collection, Literal

import numpy
from class_resolver import ClassResolver
from docdata import parse_docdata
from sklearn import metrics

from .utils import Metric, ValueRange

__all__ = [

ZeroDivisionPolicy = Literal["warn", 0, 1]

def safe_divide(numerator: float, denominator: float, zero_division: ZeroDivisionPolicy = "warn") -> float:
    Perform division and handle divide-by-zero similar to scikit-learn.

    :param numerator:
        the numerator
    :param denominator:
        the denominator
    :param zero_division:
        the zero-division policy; If "warn", act like 0, but warn about the case.

        the division result
    # todo: do we need numpy support?
    if denominator != 0:
        return numerator / denominator
    if zero_division == "warn":
        zero_division = 0
            message=f"Division by zero. Result set to {zero_division} according to policy.", category=UserWarning
    return zero_division

def construct_indicator(*, y_score: numpy.ndarray, y_true: numpy.ndarray) -> numpy.ndarray:
    """Construct binary indicators from a list of scores.

    If there are $n$ positively labeled entries in ``y_true``, this function
    assigns the top $n$ highest scores in ``y_score`` as positive and remainder
    as negative.

    .. note ::
        Since the method uses the number of true labels to determine a threshold, the
        results will typically be overly optimistic estimates of the generalization performance.

    .. todo ::
        Add a method which estimates a threshold based on a validation set, and applies this
        threshold for binarization on the test set.

    :param y_score:
        A 1-D array of the score values
    :param y_true:
        A 1-D array of binary values (1 and 0)
        A 1-D array of indicator values

    .. seealso::

        This implementation was inspired by
    # TODO: re-consider threshold
    number_pos = numpy.sum(y_true, dtype=int)
    y_sort = numpy.flip(numpy.argsort(y_score))
    y_pred = numpy.zeros_like(y_true, dtype=int)
    y_pred[y_sort[numpy.arange(number_pos)]] = 1
    return y_pred

[docs]class ClassificationMetric(Metric, abc.ABC): """A base class for classification metrics."""
[docs] def __call__(self, y_true: numpy.ndarray, y_score: numpy.ndarray, weights: numpy.ndarray | None = None) -> float: """Evaluate the metric. :param y_true: shape: (num_samples,) the true labels, either 0 or 1. :param y_score: shape: (num_samples,) the predictions, either continuous or binarized. :param weights: shape: (num_samples,) weights for individual predictions .. warning :: not all metrics support sample weights - check :attr:`supports_weights` first :return: the scalar metric value :raises ValueError: when weights are provided but the function does not support them. """ if weights is None: return self.forward(y_true=y_true, y_score=y_score) if not self.supports_weights: raise ValueError( f"{self.__call__.__qualname__} does not support sample weights but received" f"weights={weights}.", ) return self.forward(y_true=y_true, y_score=y_score, sample_weight=weights)
[docs] @abc.abstractmethod def forward( self, y_true: numpy.ndarray, y_score: numpy.ndarray, sample_weight: numpy.ndarray | None = None ) -> float: """ Calculate the metric. :param y_true: shape: (num_samples,) the true label, either 0 or 1. :param y_score: shape: (num_samples,) the predictions, either as continuous scores, or as binarized prediction (depending on the concrete metric at hand). :param sample_weight: shape: (num_samples,) sample weights :return: a scalar metric value """ # noqa: DAR202
@parse_docdata class NumScores(ClassificationMetric): """ The number of scores. Lower numbers may indicate unreliable results. --- description: The number of scores. link: """ name: ClassVar[str] = "Number of Scores" value_range: ClassVar[ValueRange] = ValueRange(lower=0, lower_inclusive=True) increasing: ClassVar[bool] = True synonyms: ClassVar[Collection[str]] = ("score_count",) # docstr-coverage: inherited def forward( self, y_true: numpy.ndarray, y_score: numpy.ndarray, weights: numpy.ndarray | None = None ) -> float: # noqa: D102 return y_score.size class BinarizedClassificationMetric(ClassificationMetric, abc.ABC): """A classification metric which requires binarized predictions instead of scores.""" binarize: ClassVar[bool] = True # docstr-coverage: inherited def __call__( self, y_true: numpy.ndarray, y_score: numpy.ndarray, weights: numpy.ndarray | None = None ) -> float: # noqa: D102 return super().__call__( y_true=y_true, y_score=construct_indicator(y_score=y_score, y_true=y_true), weights=weights ) @parse_docdata class BalancedAccuracyScore(BinarizedClassificationMetric): """ The average of recall obtained on each class. --- link: description: The average of recall obtained on each class. """ name = "Balanced Accuracy Score" value_range = ValueRange(lower=0, lower_inclusive=True, upper=1, upper_inclusive=True) increasing: ClassVar[bool] = True synonyms: ClassVar[Collection[str]] = ("b-acc", "bas") supports_weights: ClassVar[bool] = True # docstr-coverage: inherited def forward( self, y_true: numpy.ndarray, y_score: numpy.ndarray, sample_weight: numpy.ndarray | None = None ) -> float: # noqa: D102 return float(metrics.balanced_accuracy_score(y_true=y_true, y_pred=y_score, sample_weight=sample_weight)) @parse_docdata class AveragePrecisionScore(ClassificationMetric): """The average precision from prediction scores. .. note :: this metric is different from the area under the precision-recall curve, which uses interpolation and can be too optimistic. --- link: description: The average precision across different thresholds. """ # TODO: can we directly include sklearn's docstring here? name = "Average Precision Score" value_range = ValueRange(lower=0, lower_inclusive=True, upper=1, upper_inclusive=True) increasing: ClassVar[bool] = True synonyms: ClassVar[Collection[str]] = ("aps", "ap") supports_weights: ClassVar[bool] = True # docstr-coverage: inherited def forward( self, y_true: numpy.ndarray, y_score: numpy.ndarray, sample_weight: numpy.ndarray | None = None ) -> float: # noqa: D102 return float(metrics.average_precision_score(y_true=y_true, y_score=y_score, sample_weight=sample_weight)) @parse_docdata class AreaUnderTheReceiverOperatingCharacteristicCurve(ClassificationMetric): """ The area under the receiver operating characteristic curve. --- link: description: The area under the receiver operating characteristic curve. """ name = "Area Under The Receiver Operating Characteristic Curve" value_range = ValueRange(lower=0, lower_inclusive=True, upper=1, upper_inclusive=True) increasing: ClassVar[bool] = True synonyms: ClassVar[Collection[str]] = ("roc-auc",) supports_weights: ClassVar[bool] = True # docstr-coverage: inherited def forward( self, y_true: numpy.ndarray, y_score: numpy.ndarray, sample_weight: numpy.ndarray | None = None ) -> float: # noqa: D102 return float(metrics.roc_auc_score(y_true=y_true, y_score=y_score, sample_weight=sample_weight)) class ConfusionMatrixClassificationMetric(ClassificationMetric, abc.ABC): """A classification metric based on the confusion matrix.""" binarize: ClassVar[bool] = True zero_division: ZeroDivisionPolicy = "warn" @abc.abstractmethod def extract_from_confusion_matrix(self, matrix: numpy.ndarray) -> float: """ Calculate the metric from the confusion table. :param matrix: shape: (2, 2) the confusion table of the form:: [[ TP, FN ] [ FP, TN ]] :return: the scalar metric """ # noqa: DAR202 # todo: it would make sense to have a separate evaluator which constructs the confusion matrix only once # docstr-coverage: inherited def forward( self, y_true: numpy.ndarray, y_score: numpy.ndarray, weights: numpy.ndarray | None = None ) -> float: # noqa: D102 y_pred = construct_indicator(y_score=y_score, y_true=y_true) matrix = metrics.confusion_matrix(y_true=y_true, y_pred=y_pred, sample_weight=weights, normalize=None) return self.extract_from_confusion_matrix(matrix=matrix) @parse_docdata class TruePositiveRate(ConfusionMatrixClassificationMetric): """ The true positive rate is the probability that the prediction is positive, given the triple is truly positive. .. math :: TPR = TP / (TP + FN) --- link: description: The probability that a truly positive triple is predicted positive. """ name = "True Positive Rate" value_range = ValueRange(lower=0, lower_inclusive=True, upper=1, upper_inclusive=True) increasing: ClassVar[bool] = True synonyms: ClassVar[Collection[str]] = ("tpr", "sensitivity", "recall", "hit rate") # docstr-coverage: inherited def extract_from_confusion_matrix(self, matrix: numpy.ndarray) -> float: # noqa: D102 return safe_divide( numerator=matrix[1, 1].item(), denominator=matrix[1, :].sum().item(), zero_division=self.zero_division ) @parse_docdata class TrueNegativeRate(ConfusionMatrixClassificationMetric): """ The true negative rate is the probability that the prediction is negative, given the triple is truly negative. .. math :: TNR = TN / (TN + FP) .. warning :: most knowledge graph datasets do not have true negatives, i.e., verified false facts, but rather are collection of (mostly) true facts, where the missing ones are generally unknown rather than false. --- link: description: The probability that a truly false triple is predicted negative. """ name = "True Negative Rate" value_range = ValueRange(lower=0, lower_inclusive=True, upper=1, upper_inclusive=True) increasing: ClassVar[bool] = True synonyms: ClassVar[Collection[str]] = ("tnr", "specificity", "selectivity") # docstr-coverage: inherited def extract_from_confusion_matrix(self, matrix: numpy.ndarray) -> float: # noqa: D102 return safe_divide( numerator=matrix[0, 0].item(), denominator=matrix[0, :].sum().item(), zero_division=self.zero_division ) @parse_docdata class FalsePositiveRate(ConfusionMatrixClassificationMetric): """ The false positive rate is the probability that the prediction is positive, given the triple is truly negative. .. math :: FPR = FP / (FP + TN) --- link: description: The probability that a truly negative triple is predicted positive. """ name = "False Positive Rate" value_range = ValueRange(lower=0, lower_inclusive=True, upper=1, upper_inclusive=True) increasing: ClassVar[bool] = False synonyms: ClassVar[Collection[str]] = ("fpr", "fall-out", "false alarm ratio") # docstr-coverage: inherited def extract_from_confusion_matrix(self, matrix: numpy.ndarray) -> float: # noqa: D102 return safe_divide( numerator=matrix[1, 0].item(), denominator=matrix[1, :].sum().item(), zero_division=self.zero_division ) @parse_docdata class FalseNegativeRate(ConfusionMatrixClassificationMetric): """ The false negative rate is the probability that the prediction is negative, given the triple is truly positive. .. math :: FNR = FN / (FN + TP) --- link: description: The probability that a truly positive triple is predicted negative. """ name = "False Negative Rate" value_range = ValueRange(lower=0, lower_inclusive=True, upper=1, upper_inclusive=True) increasing: ClassVar[bool] = False synonyms: ClassVar[Collection[str]] = ("fnr", "miss-rate") # docstr-coverage: inherited def extract_from_confusion_matrix(self, matrix: numpy.ndarray) -> float: # noqa: D102 return safe_divide( numerator=matrix[0, 1].item(), denominator=matrix[0, :].sum().item(), zero_division=self.zero_division ) @parse_docdata class PositivePredictiveValue(ConfusionMatrixClassificationMetric): """ The positive predictive value is the proportion of predicted positives which are true positive. .. math :: PPV = TP / (TP + FP) --- link: description: The proportion of predicted positives which are true positive. """ name = "Positive Predictive Value" value_range = ValueRange(lower=0, lower_inclusive=True, upper=1, upper_inclusive=True) increasing: ClassVar[bool] = True synonyms: ClassVar[Collection[str]] = ("ppv",) # docstr-coverage: inherited def extract_from_confusion_matrix(self, matrix: numpy.ndarray) -> float: # noqa: D102 return safe_divide( numerator=matrix[0, 0].item(), denominator=matrix[:, 0].sum().item(), zero_division=self.zero_division ) @parse_docdata class NegativePredictiveValue(ConfusionMatrixClassificationMetric): """ The negative predictive value is the proportion of predicted negatives which are true negative. .. math :: NPV = TN / (TN + FN) --- link: description: The proportion of predicted negatives which are true negatives. """ name = "Negative Predictive Value" value_range = ValueRange(lower=0, lower_inclusive=True, upper=1, upper_inclusive=True) increasing: ClassVar[bool] = True synonyms: ClassVar[Collection[str]] = ("npv",) # docstr-coverage: inherited def extract_from_confusion_matrix(self, matrix: numpy.ndarray) -> float: # noqa: D102 return safe_divide( numerator=matrix[1, 1].item(), denominator=matrix[:, 1].sum().item(), zero_division=self.zero_division ) @parse_docdata class FalseDiscoveryRate(ConfusionMatrixClassificationMetric): """ The false discovery rate is the proportion of predicted negatives which are true positive. .. math :: FDR = FP / (FP + TP) --- link: description: The proportion of predicted negatives which are true positive. """ name = "False Discovery Rate" value_range = ValueRange(lower=0, lower_inclusive=True, upper=1, upper_inclusive=True) increasing: ClassVar[bool] = False synonyms: ClassVar[Collection[str]] = ("fdr",) # docstr-coverage: inherited def extract_from_confusion_matrix(self, matrix: numpy.ndarray) -> float: # noqa: D102 return safe_divide( numerator=matrix[1, 0].item(), denominator=matrix[:, 0].sum().item(), zero_division=self.zero_division ) @parse_docdata class FalseOmissionRate(ConfusionMatrixClassificationMetric): """ The false omission rate is the proportion of predicted positives which are true negative. .. math :: FOR = FN / (FN + TN) --- link: description: The proportion of predicted positives which are true negative. """ name = "False Omission Rate" value_range = ValueRange(lower=0, lower_inclusive=True, upper=1, upper_inclusive=True) increasing: ClassVar[bool] = False synonyms: ClassVar[Collection[str]] = ("fom",) # docstr-coverage: inherited def extract_from_confusion_matrix(self, matrix: numpy.ndarray) -> float: # noqa: D102 return safe_divide( numerator=matrix[0, 1].item(), denominator=matrix[:, 1].sum().item(), zero_division=self.zero_division ) @parse_docdata class PositiveLikelihoodRatio(ConfusionMatrixClassificationMetric): r""" The positive likelihood ratio is the ratio of true positive rate to false positive rate. .. math :: LR+ = TPR / FPR = \frac{TP / (TP + FN)}{FP / (FP + TN)} = \frac{TP \cdot (FP + TN)}{FP \cdot (TP + FN)} --- link: description: The ratio of true positive rate to false positive rate. """ name = "Positive Likelihood Ratio" value_range = ValueRange(lower=0, lower_inclusive=True) increasing: ClassVar[bool] = True synonyms: ClassVar[Collection[str]] = ("lr+",) # docstr-coverage: inherited def extract_from_confusion_matrix(self, matrix: numpy.ndarray) -> float: # noqa: D102 return safe_divide( numerator=(matrix[0, 0] * matrix[1, :].sum()).item(), denominator=(matrix[1, 0] * matrix[0, :].sum()).item(), zero_division=self.zero_division, ) @parse_docdata class NegativeLikelihoodRatio(ConfusionMatrixClassificationMetric): r""" The negative likelihood ratio is the ratio of false negative rate to true negative rate. .. math :: LR- = FNR / TNR = \frac{FN / (TP + FN)}{TN / (FP + TN)} = \frac{FN \cdot (FP + TN)}{TN \cdot (TP + FN)} --- link: description: The ratio of false positive rate to true positive rate. """ name = "Negative Likelihood Ratio" value_range = ValueRange(lower=0, lower_inclusive=True) increasing: ClassVar[bool] = False synonyms: ClassVar[Collection[str]] = ("lr-",) # docstr-coverage: inherited def extract_from_confusion_matrix(self, matrix: numpy.ndarray) -> float: # noqa: D102 return safe_divide( numerator=(matrix[0, 1] * matrix[0, :].sum()).item(), denominator=(matrix[1, 1] * matrix[1, :].sum()).item(), zero_division=self.zero_division, ) @parse_docdata class DiagnosticOddsRatio(ConfusionMatrixClassificationMetric): r""" The ratio of positive and negative likelihood ratio. .. math :: DOR = \frac{LR+}{LR-} = \frac{TP \cdot TN}{FP \cdot FN} --- link: description: The ratio of positive and negative likelihood ratio. """ name = "Diagnostic Odds Ratio" value_range = ValueRange(lower=0, lower_inclusive=True) increasing: ClassVar[bool] = True synonyms: ClassVar[Collection[str]] = ("dor",) # todo: # docstr-coverage: inherited def extract_from_confusion_matrix(self, matrix: numpy.ndarray) -> float: # noqa: D102 return safe_divide( numerator=(matrix[0, 0] * matrix[1, 1]).item(), denominator=(matrix[0, 1] * matrix[1, 0]).item(), zero_division=self.zero_division, ) @parse_docdata class Accuracy(ConfusionMatrixClassificationMetric): r""" The ratio of the number of correct classifications to the total number. .. math :: ACC = (TP + TN) / (TP + TN + FP + FN) --- link: description: The ratio of the number of correct classifications to the total number. """ name = "Accuracy" value_range = ValueRange(lower=0, lower_inclusive=True, upper=1, upper_inclusive=True) increasing: ClassVar[bool] = True synonyms: ClassVar[Collection[str]] = ("acc", "fraction correct", "fc") # docstr-coverage: inherited def extract_from_confusion_matrix(self, matrix: numpy.ndarray) -> float: # noqa: D102 return safe_divide( numerator=(matrix[0, 0] * matrix[1, 1]).item(), denominator=matrix.sum().item(), zero_division=self.zero_division, ) @parse_docdata class F1Score(ConfusionMatrixClassificationMetric): r""" The harmonic mean of precision and recall. .. math :: F1 = 2TP / (2TP + FP + FN) --- link: description: The harmonic mean of precision and recall. """ name = "F1 Score" value_range = ValueRange(lower=0, lower_inclusive=True, upper=1, upper_inclusive=True) increasing: ClassVar[bool] = True synonyms: ClassVar[Collection[str]] = ("f1",) # docstr-coverage: inherited def extract_from_confusion_matrix(self, matrix: numpy.ndarray) -> float: # noqa: D102 return safe_divide( numerator=2 * matrix[0, 0].item(), denominator=(2 * matrix[0, 0] + matrix[0, 1] + matrix[1, 0]).item(), zero_division=self.zero_division, ) @parse_docdata class PrevalenceThreshold(ConfusionMatrixClassificationMetric): r""" The prevalence threshold. .. math :: PT = √FPR / (√TPR + √FPR) --- link: description: The prevalence threshold. """ # todo: improve doc name = "Prevalence Threshold" value_range = ValueRange(lower=0, lower_inclusive=True) increasing: ClassVar[bool] = False synonyms: ClassVar[Collection[str]] = ("pt",) # docstr-coverage: inherited def extract_from_confusion_matrix(self, matrix: numpy.ndarray) -> float: # noqa: D102 fpr = FalsePositiveRate().extract_from_confusion_matrix(matrix=matrix) tpr = TruePositiveRate().extract_from_confusion_matrix(matrix=matrix) return safe_divide( numerator=numpy.sqrt(fpr).item(), denominator=(numpy.sqrt(fpr) + numpy.sqrt(tpr)).item(), zero_division=self.zero_division, ) @parse_docdata class ThreatScore(ConfusionMatrixClassificationMetric): r""" The threat score. .. math :: TS = TP / (TP + FN + FP) --- link: description: The harmonic mean of precision and recall. """ name = "Threat Score" value_range = ValueRange(lower=0, lower_inclusive=True, upper=1, upper_inclusive=True) increasing: ClassVar[bool] = True synonyms: ClassVar[Collection[str]] = ("ts", "critical success index", "csi", "jaccard index") # docstr-coverage: inherited def extract_from_confusion_matrix(self, matrix: numpy.ndarray) -> float: # noqa: D102 return safe_divide( numerator=matrix[0, 0].item(), denominator=(matrix[0, 0] + matrix[0, 1] + matrix[1, 0]).item(), zero_division=self.zero_division, ) @parse_docdata class FowlkesMallowsIndex(ConfusionMatrixClassificationMetric): r""" The Fowlkes Mallows index. .. math :: FM = \sqrt{\frac{TP^2}{(2TP + FP + FN)}} --- link: description: The Fowlkes Mallows index. """ name = "Fowlkes Mallows Index" value_range = ValueRange(lower=0, lower_inclusive=True, upper=1, upper_inclusive=True) increasing: ClassVar[bool] = True synonyms: ClassVar[Collection[str]] = ("fm", "fmi") # docstr-coverage: inherited def extract_from_confusion_matrix(self, matrix: numpy.ndarray) -> float: # noqa: D102 return math.sqrt( safe_divide( numerator=matrix[0, 0].item() ** 2, denominator=(2 * matrix[0, 0] + matrix[0, 1] + matrix[1, 0]).item(), zero_division=self.zero_division, ) ) @parse_docdata class Informedness(ConfusionMatrixClassificationMetric): r""" The informedness metric. .. math :: YI = TPR + TNR - 1 = TP / (TP + FN) + TN / (TN + FP) - 1 --- link: description: The informedness metric. """ name = "Informedness" value_range = ValueRange(lower=-1, lower_inclusive=True, upper=1, upper_inclusive=True) increasing: ClassVar[bool] = True synonyms: ClassVar[Collection[str]] = ("Youden's J", "Youden's Index", "yi") # docstr-coverage: inherited def extract_from_confusion_matrix(self, matrix: numpy.ndarray) -> float: # noqa: D102 return ( safe_divide( numerator=matrix[1, 1].item(), denominator=matrix[1, :].sum().item(), zero_division=self.zero_division ) + safe_divide( numerator=matrix[0, 0].item(), denominator=matrix[0, :].sum().item(), zero_division=self.zero_division ) - 1 ) @parse_docdata class MatthewsCorrelationCoefficient(ConfusionMatrixClassificationMetric): r""" The Matthews Correlation Coefficient (MCC). A balanced measure applicable even with class imbalance. .. math :: MCC = (TP * TN - FP * FN) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)) --- link: description: The Matthews Correlation Coefficient (MCC). """ name = "Matthews Correlation Coefficient" value_range: ClassVar[ValueRange] = ValueRange(lower=-1, upper=1, lower_inclusive=True, upper_inclusive=True) increasing: ClassVar[bool] = True synonyms: ClassVar[Collection[str]] = ("mcc",) # docstr-coverage: inherited def extract_from_confusion_matrix(self, matrix: numpy.ndarray) -> float: # noqa: D102 return safe_divide( numerator=(matrix[0, 0] * matrix[1, 1] - matrix[1, 0] * matrix[0, 1]).item(), denominator=(matrix.sum(axis=1).prod() * matrix.sum(axis=0).prod()).item(), zero_division=self.zero_division, ) classification_metric_resolver: ClassResolver[ClassificationMetric] = ClassResolver.from_subclasses( base=ClassificationMetric, default=AveragePrecisionScore, skip={BinarizedClassificationMetric, ConfusionMatrixClassificationMetric}, )