Source code for pykeen.metrics.classification

# -*- coding: utf-8 -*-

"""Classification metrics."""

import inspect
from typing import Callable, ClassVar, MutableMapping, Optional, Type

import numpy as np
import rexmex.metrics.classification as rmc
from class_resolver import ClassResolver

from .utils import Metric, ValueRange

__all__ = [

def construct_indicator(*, y_score: np.ndarray, y_true: np.ndarray) -> np.ndarray:
    """Construct binary indicators from a list of scores.

    If there are $n$ positively labeled entries in ``y_true``, this function
    assigns the top $n$ highest scores in ``y_score`` as positive and remainder
    as negative.

    .. note ::
        Since the method uses the number of true labels to determine a threshold, the
        results will typically be overly optimistic estimates of the generalization performance.

    .. todo ::
        Add a method which estimates a threshold based on a validation set, and applies this
        threshold for binarization on the test set.

    :param y_score:
        A 1-D array of the score values
    :param y_true:
        A 1-D array of binary values (1 and 0)
        A 1-D array of indicator values

    .. seealso::

        This implementation was inspired by
    number_pos = np.sum(y_true, dtype=int)
    y_sort = np.flip(np.argsort(y_score))
    y_pred = np.zeros_like(y_true, dtype=int)
    y_pred[y_sort[np.arange(number_pos)]] = 1
    return y_pred

[docs]class ClassificationMetric(Metric): """A base class for classification metrics.""" #: A description of the metric description: ClassVar[str] #: The function that runs the metric func: ClassVar[Callable[[np.array, np.array], float]] # docstr-coverage: inherited
[docs] @classmethod def get_description(cls) -> str: # noqa: D102 return cls.description
[docs] @classmethod def score(cls, y_true, y_score) -> float: """Run the scoring function.""" return cls.func(y_true, construct_indicator(y_score=y_score, y_true=y_true) if cls.binarize else y_score)
#: Functions with the right signature in the :mod:`rexmex.metrics.classification` that are not themselves metrics EXCLUDE = { rmc.true_positive, rmc.true_negative, rmc.false_positive, rmc.false_negative, rmc.pr_auc_score, # for now there's an issue } #: This dictionary maps from duplicate functions to the canonical function in :mod:`rexmex.metrics.classification` DUPLICATE_CLASSIFIERS = { rmc.miss_rate: rmc.false_negative_rate, rmc.fall_out: rmc.false_positive_rate, rmc.selectivity: rmc.true_negative_rate, rmc.specificity: rmc.true_negative_rate, rmc.hit_rate: rmc.true_positive_rate, rmc.sensitivity: rmc.true_positive_rate, rmc.critical_success_index: rmc.threat_score, rmc.precision_score: rmc.positive_predictive_value, rmc.recall_score: rmc.true_positive_rate, } class MetricAnnotator: """A class for annotating metric functions.""" metrics: MutableMapping[str, Type[ClassificationMetric]] def __init__(self): """Initialize the annotator.""" self.metrics = {} def higher(self, func, **kwargs): """Annotate a function where higher values are better.""" return self.add(func, increasing=True, **kwargs) def lower(self, func, **kwargs): """Annotate a function where lower values are better.""" return self.add(func, increasing=False, **kwargs) def add( self, func, *, increasing: bool, description: str, link: str, name: Optional[str] = None, lower: Optional[float] = 0.0, lower_inclusive: bool = True, upper: Optional[float] = 1.0, upper_inclusive: bool = True, binarize: bool = False, ): """Annotate a function.""" title = func.__name__.replace("_", " ").title() metric_cls = type( title.replace(" ", ""), (ClassificationMetric,), dict( name=name or title, link=link, binarize=binarize, increasing=increasing, value_range=ValueRange( lower=lower, lower_inclusive=lower_inclusive, upper=upper, upper_inclusive=upper_inclusive, ), func=func, description=description, ), ) self.metrics[func.__name__] = metric_cls classifier_annotator = MetricAnnotator() classifier_annotator.higher( rmc.true_negative_rate, description="TN / (TN + FP)", link="", ) classifier_annotator.higher( rmc.true_positive_rate, description="TP / (TP + FN)", link="" ) classifier_annotator.higher( rmc.positive_predictive_value, description="TP / (TP + FP)", link="", ) classifier_annotator.higher( rmc.negative_predictive_value, description="TN / (TN + FN)", link="", ) classifier_annotator.lower( rmc.false_negative_rate, description="FN / (FN + TP)", link="", ) classifier_annotator.lower( rmc.false_positive_rate, description="FP / (FP + TN)", link="", ) classifier_annotator.lower( rmc.false_discovery_rate, description="FP / (FP + TP)", link="", ) classifier_annotator.lower( rmc.false_omission_rate, description="FN / (FN + TN)", link="", ) classifier_annotator.higher( rmc.positive_likelihood_ratio, lower=0.0, upper=None, description="TPR / FPR", link="", ) classifier_annotator.lower( rmc.negative_likelihood_ratio, lower=0.0, upper=None, description="FNR / TNR", link="", ) classifier_annotator.lower( rmc.prevalence_threshold, description="√FPR / (√TPR + √FPR)", link="", ) classifier_annotator.higher( rmc.threat_score, description="TP / (TP + FN + FP)", link="", ) classifier_annotator.higher( rmc.fowlkes_mallows_index, description="√PPV x √TPR", link="", ) classifier_annotator.higher( rmc.informedness, description="TPR + TNR - 1", link="" ) classifier_annotator.higher( rmc.markedness, description="PPV + NPV - 1", link="" ) classifier_annotator.higher( rmc.diagnostic_odds_ratio, lower=0.0, upper=None, description="LR+/LR-", link="", ) classifier_annotator.higher( rmc.roc_auc_score, name="AUC-ROC", description="Area Under the ROC Curve", link="", ) classifier_annotator.higher( rmc.accuracy_score, binarize=True, name="Accuracy", description="(TP + TN) / (TP + TN + FP + FN)", link="", ) classifier_annotator.higher( rmc.balanced_accuracy_score, binarize=True, name="Balanced Accuracy", description="An adjusted version of the accuracy for imbalanced datasets", link="", ) classifier_annotator.higher( rmc.f1_score, name="F1 Score", binarize=True, description="2TP / (2TP + FP + FN)", link="", ) classifier_annotator.higher( rmc.average_precision_score, name="Average Precision", description="A summary statistic over the precision-recall curve", link="", ) classifier_annotator.higher( rmc.matthews_correlation_coefficient, binarize=True, lower=-1.0, upper=1.0, description="A balanced measure applicable even with class imbalance", link="", ) # TODO there's something wrong with this, so add it later # classifier_annotator.higher( # rmc.pr_auc_score, # name="AUC-PR", # description="Area Under the Precision-Recall Curve", # link="", # ) classification_metric_resolver: ClassResolver[ClassificationMetric] = ClassResolver( list(classifier_annotator.metrics.values()), base=ClassificationMetric, ) def _check(): """Check that all functions in the classification module are annotated.""" for func in rmc.__dict__.values(): if not inspect.isfunction(func): continue parameters = inspect.signature(func).parameters if "y_true" not in parameters or "y_score" not in parameters: continue if func in EXCLUDE: if func in classifier_annotator.metrics: raise ValueError(f"should not include {func.__name__}") continue if func in DUPLICATE_CLASSIFIERS: if func in classifier_annotator.metrics: raise ValueError(f"{func.__name__} is a duplicate of {DUPLICATE_CLASSIFIERS[func].__name__}") continue if func.__name__ not in classifier_annotator.metrics: raise ValueError(f"missing rexmex classifier: {func.__name__}") if __name__ == "__main__": _check()