Source code for pykeen.metrics.ranking

# -*- coding: utf-8 -*-

"""
Ranking metrics.

This module comprises various rank-based metrics, which get an array of individual ranks as input, as summarize them
into a single-figure metric measuring different aspects of ranking performance.

We can generally distinguish:

Base Metrics
------------
These metrics directly operate on the ranks:

The following metrics measures summarize the central tendency of ranks

- :class:`pykeen.metrics.ranking.ArithmeticMeanRank`
- :class:`pykeen.metrics.ranking.GeometricMeanRank`
- :class:`pykeen.metrics.ranking.HarmonicMeanRank`
- :class:`pykeen.metrics.ranking.MedianRank`

The Hits at K metric is closely related to information retrieval and measures the fraction of times when the correct
result is in the top-$k$ ranked entries, i.e., the rank is at most $k$

- :class:`pykeen.metrics.ranking.HitsAtK`

The next metrics summarize the dispersion of ranks

- :class:`pykeen.metrics.ranking.MedianAbsoluteDeviation`
- :class:`pykeen.metrics.ranking.Variance`
- :class:`pykeen.metrics.ranking.StandardDeviation`

and finally there is a simple metric to store the number of ranks which where aggregated

- :class:`pykeen.metrics.ranking.Count`

Inverse Metrics
---------------
The inverse metrics are reciprocals of the central tendency measures. They offer the advantage of having a fixed value
range of $(0, 1]$, with a known optimal value of $1$:

- :class:`pykeen.metrics.ranking.InverseArithmeticMeanRank`
- :class:`pykeen.metrics.ranking.InverseGeometricMeanRank`
- :class:`pykeen.metrics.ranking.InverseHarmonicMeanRank`
- :class:`pykeen.metrics.ranking.InverseMedianRank`

Adjusted Metrics
----------------
Adjusted metrics build upon base metrics, but adjust them for chance, cf. [berrendorf2020]_ and [hoyt2022]_. All
adjusted metrics derive from :class:`pykeen.metrics.ranking.DerivedRankBasedMetric` and, for a given evaluation set,
are affine transformations of the base metric with dataset-dependent, but fixed transformation constants. Thus, they
can also be computed when the model predictions are not available anymore, but the evaluation set is known.

Expectation-Normalized Metrics
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
These metrics divide the metric by its expected value under random ordering. Thus, their expected value is always 1
irrespective of the evaluation set. They derive from :class:`pykeen.metrics.ranking.ExpectationNormalizedMetric`, and
there is currently only a single implementation:

- :class:`pykeen.metrics.ranking.AdjustedArithmeticMeanRank`

Re-indexed Metrics
~~~~~~~~~~~~~~~~~~
Re-indexed metrics subtract the expected value, and then normalize the optimal value to be 1. Thus, their expected value
under random ordering is 0, their optimal value is 1, and larger values indicate better results. The classes derive from
:class:`pykeen.metrics.ranking.ReindexedMetric`, and the following implementations are available:

- :class:`pykeen.metrics.ranking.AdjustedHitsAtK`
- :class:`pykeen.metrics.ranking.AdjustedArithmeticMeanRankIndex`
- :class:`pykeen.metrics.ranking.AdjustedGeometricMeanRankIndex`
- :class:`pykeen.metrics.ranking.AdjustedInverseHarmonicMeanRank`

z-Adjusted Metrics
~~~~~~~~~~~~~~~~~~
The final type of adjusted metrics uses the expected value as well as the variance of the metric under random ordering
to normalize the metrics similar to `z-score normalization <https://en.wikipedia.org/wiki/Standard_score>`_.
The z-score normalized metrics have an expected value of 0, and a variance of 1, and positive values indicate better
results. While their value range is unbound, it can be interpreted through the lens of the inverse cumulative
density function of the standard Gaussian distribution to retrieve a *p*-value. The classes derive from
:class:`pykeen.metrics.ranking.ZMetric`, and the following implementations are available:

- :class:`pykeen.metrics.ranking.ZArithmeticMeanRank`
- :class:`pykeen.metrics.ranking.ZGeometricMeanRank`
- :class:`pykeen.metrics.ranking.ZHitsAtK`
- :class:`pykeen.metrics.ranking.ZInverseHarmonicMeanRank`
"""
import math
from abc import ABC, abstractmethod
from typing import Callable, ClassVar, Collection, Iterable, NamedTuple, Optional, Tuple, Type, Union

import numpy as np
from class_resolver import ClassResolver, HintOrType
from docdata import parse_docdata
from scipy import stats

from .utils import (
    Metric,
    ValueRange,
    stable_product,
    weighted_harmonic_mean,
    weighted_mean_expectation,
    weighted_mean_variance,
    weighted_median,
)
from ..typing import RANK_REALISTIC, RANK_TYPES, RankType
from ..utils import logcumsumexp

__all__ = [
    "rank_based_metric_resolver",
    # Base classes
    "RankBasedMetric",
    "DerivedRankBasedMetric",
    "ExpectationNormalizedMetric",
    "ReindexedMetric",
    "ZMetric",
    # Concrete classes
    "ArithmeticMeanRank",
    "AdjustedArithmeticMeanRank",
    "AdjustedArithmeticMeanRankIndex",
    "ZArithmeticMeanRank",
    "InverseArithmeticMeanRank",
    #
    "GeometricMeanRank",
    "AdjustedGeometricMeanRankIndex",
    "ZGeometricMeanRank",
    "InverseGeometricMeanRank",
    #
    "HarmonicMeanRank",
    "InverseHarmonicMeanRank",
    "AdjustedInverseHarmonicMeanRank",
    "ZInverseHarmonicMeanRank",
    #
    "MedianRank",
    "InverseMedianRank",
    #
    "HitsAtK",
    "AdjustedHitsAtK",
    "ZHitsAtK",
    #
    "StandardDeviation",
    "Variance",
    "Count",
    # Misc
    "NoClosedFormError",
    "generate_ranks",
    "generate_num_candidates_and_ranks",
    "generalized_harmonic_numbers",
    "AffineTransformationParameters",
    "harmonic_variances",
    #
    "HITS_METRICS",
]

EPSILON = 1.0e-12


[docs]def generate_ranks(
    num_candidates: np.ndarray,
    prefix_shape: Tuple[int, ...] = tuple(),
    seed: Union[None, int, np.random.Generator] = None,
    dtype: Optional[Type[np.number]] = None,
) -> np.ndarray:
    """
    Generate random ranks from a given array of the number of candidates for each ranking task.

    :param num_candidates: shape: s
        the number of candidates
    :param prefix_shape:
        additional dimensions for broadcasted sampling
    :param seed:
        the random seed
    :param dtype:
        the data type

    :return: shape: dims + s
        an array of sampled rank values
    """
    if dtype is None:
        dtype = int
    generator = np.random.default_rng(seed=seed)
    return generator.integers(low=1, high=num_candidates + 1, size=prefix_shape + num_candidates.shape, dtype=dtype)


[docs]def generate_num_candidates_and_ranks(
    num_ranks: int,
    max_num_candidates: int,
    seed: Optional[int] = None,
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Generate random number of candidates, and coherent ranks.

    :param num_ranks:
        the number of ranks to generate
    :param max_num_candidates:
        the maximum number of candidates (e.g., the number of entities)
    :param seed:
        the random seed.

    :return: shape: (num_ranks,)
        a pair of integer arrays, ranks and num_candidates for each individual ranking task
    """
    generator = np.random.default_rng(seed=seed)
    num_candidates = generator.integers(low=1, high=max_num_candidates, size=(num_ranks,))
    ranks = generate_ranks(num_candidates=num_candidates, seed=generator)
    return ranks, num_candidates


[docs]class NoClosedFormError(ValueError):
    """The metric does not provide a closed-form implementation for the requested operation."""


[docs]class RankBasedMetric(Metric):
    """A base class for rank-based metrics."""

    # rank based metrics do not need binarized scores
    binarize: ClassVar[bool] = False

    #: the supported rank types. Most of the time equal to all rank types
    supported_rank_types: ClassVar[Collection[RankType]] = RANK_TYPES

    #: whether the metric requires the number of candidates for each ranking task
    needs_candidates: ClassVar[bool] = False

[docs]    @abstractmethod
    def __call__(
        self, ranks: np.ndarray, num_candidates: Optional[np.ndarray] = None, weights: Optional[np.ndarray] = None
    ) -> float:
        """
        Evaluate the metric.

        :param ranks: shape: s
            the individual ranks
        :param num_candidates: shape: s
            the number of candidates for each individual ranking task
        :param weights: shape: s
            the weights for the individual ranks
        """
        raise NotImplementedError

[docs]    def get_sampled_values(
        self,
        num_candidates: np.ndarray,
        num_samples: int,
        weights: Optional[np.ndarray] = None,
        generator: Optional[np.random.Generator] = None,
        memory_intense: bool = True,
    ) -> np.ndarray:
        """
        Calculate the metric on sampled rank arrays.

        :param num_candidates: shape: s
            the number of candidates for each ranking task
        :param num_samples:
            the number of samples
        :param weights: shape: s
            the weights for the individual ranking tasks
        :param generator:
            a random state for reproducibility
        :param memory_intense:
            whether to use a more memory-intense, but more time-efficient variant

        :return: shape: (num_samples,)
            the metric evaluated on `num_samples` sampled rank arrays
        """
        num_candidates = np.asarray(num_candidates)
        if generator is None:
            generator = np.random.default_rng()
        if memory_intense:
            return np.apply_along_axis(
                self,
                axis=1,
                arr=generate_ranks(prefix_shape=(num_samples,), num_candidates=num_candidates, seed=generator),
                num_candidates=num_candidates,
                weights=weights,
            )
        return np.asanyarray(
            a=[
                self(
                    ranks=generate_ranks(num_candidates=num_candidates, seed=generator),
                    num_candidates=num_candidates,
                    weights=weights,
                )
                for _ in range(num_samples)
            ]
        )

    def _bootstrap(
        self,
        func: Callable[[np.ndarray], np.ndarray],
        num_candidates: np.ndarray,
        num_samples: int,
        confidence_level: float = 95.0,
        n_boot: int = 1_000,
        generator: Optional[np.random.Generator] = None,
        **kwargs,
    ) -> np.ndarray:
        """Bootstrap a metric's confidence intervals."""
        # normalize confidence level
        if not (50 < confidence_level < 100):
            raise ValueError(f"Invalid confidence_level={confidence_level}. Should be in (50, 100).")
        p = 50 - confidence_level / 2, 50, 50 + confidence_level / 2

        # sample metric values
        generator = np.random.default_rng(generator)
        xs = self.get_sampled_values(
            num_candidates=num_candidates, num_samples=num_samples, generator=generator, **kwargs
        )

        # bootstrap estimator (i.e., compute on sample with replacement)
        n = xs.shape[0]
        vs = np.asanyarray([func(xs[generator.integers(n, size=(n,))]) for _ in range(n_boot)])
        return np.percentile(vs, p)

[docs]    def numeric_expected_value(self, **kwargs) -> float:
        r"""
        Compute expected metric value by summation.

        The expectation is computed under the assumption that each individual rank follows a discrete uniform
        distribution $\mathcal{U}\left(1, N_i\right)$, where $N_i$ denotes the number of candidates for
        ranking task $r_i$.

        :param kwargs:
            keyword-based parameters passed to :func:`get_sampled_values`

        :return:
            The estimated expected value of this metric

        .. warning ::

            Depending on the metric, the estimate may not be very accurate and converge slowly, cf.
            https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_discrete.expect.html
        """
        return self.get_sampled_values(**kwargs).mean().item()

[docs]    def numeric_expected_value_with_ci(self, **kwargs) -> np.ndarray:
        """Estimate expected value with confidence intervals."""
        return self._bootstrap(func=np.mean, **kwargs)

[docs]    def expected_value(
        self,
        num_candidates: np.ndarray,
        num_samples: Optional[int] = None,
        weights: Optional[np.ndarray] = None,
        **kwargs,
    ) -> float:
        r"""Compute expected metric value.

        The expectation is computed under the assumption that each individual rank follows a
        discrete uniform distribution $\mathcal{U}\left(1, N_i\right)$, where $N_i$ denotes
        the number of candidates for ranking task $r_i$.

        :param num_candidates:
            the number of candidates for each individual rank computation
        :param num_samples:
            the number of samples to use for simulation, if no closed form
            expected value is implemented
        :param weights: shape: s
            the weights for the individual ranking tasks
        :param kwargs:
            additional keyword-based parameters passed to :func:`get_sampled_values`,
            if no closed form solution is available

        :return:
            the expected value of this metric

        :raises NoClosedFormError:
            raised if a closed form expectation has not been implemented and no number of samples are given

        .. note::
            Prefers analytical solution, if available, but falls back to numeric
            estimation via summation, cf. :func:`RankBasedMetric.numeric_expected_value`.
        """
        if num_samples is None:
            raise NoClosedFormError("Numeric estimation requires to specify a number of samples.")
        return self.numeric_expected_value(
            num_candidates=num_candidates, num_samples=num_samples, weights=weights, **kwargs
        )

[docs]    def numeric_variance(self, **kwargs) -> float:
        r"""Compute variance by summation.

        The variance is computed under the assumption that each individual rank follows a discrete uniform
        distribution $\mathcal{U}\left(1, N_i\right)$, where $N_i$ denotes the number of candidates for
        ranking task $r_i$.

        :param kwargs:
            keyword-based parameters passed to :func:`get_sampled_values`

        :return:
            The estimated variance of this metric

        .. warning ::

            Depending on the metric, the estimate may not be very accurate and converge slowly, cf.
            https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_discrete.expect.html
        """
        return self.get_sampled_values(**kwargs).var(ddof=1).item()

[docs]    def numeric_variance_with_ci(self, **kwargs) -> np.ndarray:
        """Estimate variance with confidence intervals."""
        return self._bootstrap(func=np.var, **kwargs)

[docs]    def variance(
        self,
        num_candidates: np.ndarray,
        num_samples: Optional[int] = None,
        weights: Optional[np.ndarray] = None,
        **kwargs,
    ) -> float:
        r"""Compute variance.

        The variance is computed under the assumption that each individual rank follows a discrete uniform
        distribution $\mathcal{U}\left(1, N_i\right)$, where $N_i$ denotes the number of candidates for
        ranking task $r_i$.

        :param num_candidates:
            the number of candidates for each individual rank computation
        :param num_samples:
            the number of samples to use for simulation, if no closed form
            expected value is implemented
        :param weights: shape: s
            the weights for the individual ranking tasks
        :param kwargs:
            additional keyword-based parameters passed to :func:`get_sampled_values`,
            if no closed form solution is available

        :return:
            The variance of this metric

        :raises NoClosedFormError:
            Raised if a closed form variance has not been implemented and no
            number of samples are given

        .. note::
            Prefers analytical solution, if available, but falls back to numeric
            estimation via summation, cf. :func:`RankBasedMetric.numeric_variance`.
        """
        if num_samples is None:
            raise NoClosedFormError("Numeric estimation requires to specify a number of samples.")
        return self.numeric_variance(num_candidates=num_candidates, num_samples=num_samples, weights=weights, **kwargs)

[docs]    def std(
        self,
        num_candidates: np.ndarray,
        num_samples: Optional[int] = None,
        weights: Optional[np.ndarray] = None,
        **kwargs,
    ) -> float:
        """
        Compute the standard deviation.

        :param num_candidates:
            the number of candidates for each individual rank computation
        :param num_samples:
            the number of samples to use for simulation, if no closed form
            expected value is implemented
        :param weights: shape: s
            the weights for the individual ranking tasks
        :param kwargs:
            additional keyword-based parameters passed to :func:`variance`,

        :return:
            The standard deviation (i.e. the square root of the variance) of this metric

        For a detailed explanation, cf. :func:`RankBasedMetric.variance`.
        """
        return math.sqrt(
            self.variance(num_candidates=num_candidates, num_samples=num_samples, weights=weights, **kwargs)
        )


def _safe_divide(x: float, y: float) -> float:
    """Divide x by y making sure that abs(y) > epsilon."""
    # cf. https://stackoverflow.com/questions/1986152/why-doesnt-python-have-a-sign-function
    y_sign = math.copysign(1.0, y)
    y_abs = abs(y)
    y_abs = max(y_abs, EPSILON)
    y = y_abs * y_sign
    return x / y


[docs]class AffineTransformationParameters(NamedTuple):
    """The parameters of an affine transformation."""

    scale: float = 1.0
    offset: float = 0.0


[docs]class DerivedRankBasedMetric(RankBasedMetric, ABC):
    r"""
    A derived rank-based metric.

    The derivation is based on an affine transformation of the metric, where scale and bias may depend on the number
    of candidates. Since the transformation only depends on the number of candidates, but not the ranks of the
    predictions, this method can also be used to adjust published results without access to the trained models.
    Moreover, we can obtain closed form solutions for expected value and variance.

    Let $\alpha, \beta$ denote the scale and offset of the affine transformation, i.e.,

    .. math ::

        M^* = \alpha \cdot M + \beta

    Then we have for the expectation

    .. math ::

        \mathbb{E}[M^*] = \mathbb{E}[\alpha \cdot M + \beta]
                        = \alpha \cdot \mathbb{E}[M] + \beta

    and for the variance

    .. math ::

        \mathbb{V}[M^*] = \mathbb{V}[\alpha \cdot M + \beta]
                        = \alpha^2 \cdot \mathbb{V}[M]
    """

    base: RankBasedMetric
    needs_candidates: ClassVar[bool] = True

    #: The rank-based metric class that this derived metric extends
    base_cls: ClassVar[Optional[Type[RankBasedMetric]]] = None

    def __init__(
        self,
        base_cls: HintOrType[RankBasedMetric] = None,
        **kwargs,
    ):
        """
        Initialize the derived metric.

        :param base_cls:
            the base class, or a hint thereof. If None, use the class-attribute
        :param kwargs:
            additional keyword-based parameters used to instantiate the base metric
        """
        self.base = rank_based_metric_resolver.make(base_cls or self.base_cls, pos_kwargs=kwargs)

    # docstr-coverage: inherited
[docs]    def __call__(
        self, ranks: np.ndarray, num_candidates: Optional[np.ndarray] = None, weights: Optional[np.ndarray] = None
    ) -> float:  # noqa: D102
        if num_candidates is None:
            raise ValueError(f"{self.__class__.__name__} requires number of candidates.")
        return self.adjust(
            base_metric_result=self.base(ranks=ranks, num_candidates=num_candidates, weights=weights),
            num_candidates=num_candidates,
            weights=weights,
        )

[docs]    def adjust(
        self, base_metric_result: float, num_candidates: np.ndarray, weights: Optional[np.ndarray] = None
    ) -> float:
        """
        Adjust base metric results based on the number of candidates.

        :param base_metric_result:
            the result of the base metric
        :param num_candidates:
            the number of candidates
        :param weights: shape: s
            the weights for the individual ranking tasks

        :return:
            the adjusted metric

        .. note ::

            since the adjustment only depends on the number of candidates, but not the ranks of the predictions, this
            method can also be used to adjust published results without access to the trained models.
        """
        parameters = self.get_coefficients(num_candidates=num_candidates, weights=weights)
        return parameters.scale * base_metric_result + parameters.offset

    # docstr-coverage: inherited
[docs]    def expected_value(
        self,
        num_candidates: np.ndarray,
        num_samples: Optional[int] = None,
        weights: Optional[np.ndarray] = None,
        **kwargs,
    ) -> float:  # noqa: D102
        # since scale and offset are constant for a given number of candidates, we have
        # E[scale * M + offset] = scale * E[M] + offset
        return self.adjust(
            base_metric_result=self.base.expected_value(
                num_candidates=num_candidates, num_samples=num_samples, weights=weights, **kwargs
            ),
            num_candidates=num_candidates,
            weights=weights,
        )

    # docstr-coverage: inherited
[docs]    def variance(
        self,
        num_candidates: np.ndarray,
        num_samples: Optional[int] = None,
        weights: Optional[np.ndarray] = None,
        **kwargs,
    ) -> float:  # noqa: D102
        # since scale and offset are constant for a given number of candidates, we have
        # V[scale * M + offset] = scale^2 * V[M]
        parameters = self.get_coefficients(num_candidates=num_candidates, weights=weights)
        return parameters.scale**2.0 * self.base.variance(
            num_candidates=num_candidates, num_samples=num_samples, weights=weights, **kwargs
        )

[docs]    @abstractmethod
    def get_coefficients(
        self, num_candidates: np.ndarray, weights: Optional[np.ndarray] = None
    ) -> AffineTransformationParameters:
        """
        Compute the scaling coefficients.

        :param num_candidates:
            the number of candidates
        :param weights:
            the weights for the individual ranking tasks


        :return:
            a tuple (scale, offset)
        """
        raise NotImplementedError


[docs]class ZMetric(DerivedRankBasedMetric):
    r"""
    A z-score adjusted metrics.

    .. math ::

        \mathbb{M}^* = \frac{\mathbb{M} - \mathbb{E}[\mathbb{M}]}{\sqrt{\mathbb{V}[\mathbb{M}]}}

    In terms of the affine transformation from DerivedRankBasedMetric, we obtain the following coefficients:

    .. math ::

        \alpha &= \frac{1}{\sqrt{\mathbb{V}[\mathbb{M}]}} \\
        \beta  &= -\alpha \cdot \mathbb{E}[\mathbb{M}]

    .. note ::

        For non-increasing metrics, i.e., where larger values correspond to better results, we additionally change the
        sign of the result such that a larger z-value always corresponds to a better result irrespective of the base
        metric's direction.

    .. warning:: This requires a closed-form solution to the expected value and the variance
    """

    #: Z-adjusted metrics are formulated to be increasing
    increasing = True
    #: Z-adjusted metrics can only be applied to realistic ranks
    supported_rank_types = (RANK_REALISTIC,)
    value_range = ValueRange(lower=None, upper=None)
    closed_expectation: ClassVar[bool] = True
    closed_variance: ClassVar[bool] = True

    # docstr-coverage: inherited
[docs]    def get_coefficients(
        self, num_candidates: np.ndarray, weights: Optional[np.ndarray] = None
    ) -> AffineTransformationParameters:  # noqa: D102
        mean = self.base.expected_value(num_candidates=num_candidates, weights=weights)
        std = self.base.std(num_candidates=num_candidates, weights=weights)
        scale = _safe_divide(1.0, std)
        if not self.base.increasing:
            scale = -scale
        offset = -scale * mean
        return AffineTransformationParameters(scale=scale, offset=offset)

    # docstr-coverage: inherited
[docs]    def expected_value(
        self,
        num_candidates: np.ndarray,
        num_samples: Optional[int] = None,
        weights: Optional[np.ndarray] = None,
        **kwargs,
    ) -> float:  # noqa: D102
        # should be exactly 0.0
        return 0.0  # centered

    # docstr-coverage: inherited
[docs]    def variance(
        self,
        num_candidates: np.ndarray,
        num_samples: Optional[int] = None,
        weights: Optional[np.ndarray] = None,
        **kwargs,
    ) -> float:  # noqa: D102
        # should be exactly 1.0
        return 1.0  # re-scaled


[docs]class ExpectationNormalizedMetric(DerivedRankBasedMetric):
    r"""An adjustment to create an expectation-normalized metric.

    .. math ::

        M^* = \frac{M}{\mathbb{E}[M]}

    In terms of the affine transformation from :class:`DerivedRankBasedMetric`, we obtain the following coefficients:

    .. math ::

        \alpha &= \frac{1}{\mathbb{E}[M]} \\
        \beta  &= 0

    .. warning:: This requires a closed-form solution to the expected value
    """

    closed_expectation: ClassVar[bool] = True

    # docstr-coverage: inherited
[docs]    def get_coefficients(
        self, num_candidates: np.ndarray, weights: Optional[np.ndarray] = None
    ) -> AffineTransformationParameters:  # noqa: D102
        return AffineTransformationParameters(
            scale=_safe_divide(1, self.base.expected_value(num_candidates=num_candidates, weights=weights))
        )

    # docstr-coverage: inherited
[docs]    def expected_value(
        self,
        num_candidates: np.ndarray,
        num_samples: Optional[int] = None,
        weights: Optional[np.ndarray] = None,
        **kwargs,
    ) -> float:  # noqa: D102
        return 1.0  # centered


[docs]class ReindexedMetric(DerivedRankBasedMetric):
    r"""A mixin to create an expectation normalized metric with max of 1 and expectation of 0.

    .. math::

        \mathbb{M}^{*} = \frac{\mathbb{M} - \mathbb{E}[\mathbb{M}]}{1 - \mathbb{E}[\mathbb{M}]}

    In terms of the affine transformation from DerivedRankBasedMetric, we obtain the following coefficients:

    .. math ::

        \alpha &= \frac{1}{1 - \mathbb{E}[\mathbb{M}]} \\
        \beta  &= -\alpha \cdot \mathbb{E}[\mathbb{M}]

    .. warning:: This requires a closed-form solution to the expected value
    """

    #: Expectation/maximum reindexed metrics are formulated to be increasing
    increasing = True
    #: Expectation/maximum reindexed metrics can only be applied to realistic ranks
    supported_rank_types = (RANK_REALISTIC,)
    closed_expectation: ClassVar[bool] = True

    # docstr-coverage: inherited
[docs]    def get_coefficients(
        self, num_candidates: np.ndarray, weights: Optional[np.ndarray] = None
    ) -> AffineTransformationParameters:  # noqa: D102
        mean = self.base.expected_value(num_candidates=num_candidates, weights=weights)
        scale = _safe_divide(1.0, 1.0 - mean)
        offset = -scale * mean
        return AffineTransformationParameters(scale=scale, offset=offset)

    # docstr-coverage: inherited
[docs]    def expected_value(
        self,
        num_candidates: np.ndarray,
        num_samples: Optional[int] = None,
        weights: Optional[np.ndarray] = None,
        **kwargs,
    ) -> float:  # noqa: D102
        # should be exactly 0.0
        return 0.0


[docs]@parse_docdata
class ArithmeticMeanRank(RankBasedMetric):
    r"""The (arithmetic) mean rank.

    The mean rank (MR) computes the arithmetic mean over all individual ranks.
    Denoting the set of individual ranks as $\mathcal{I}$, it is given as:

    .. math::

        MR =\frac{1}{|\mathcal{I}|} \sum \limits_{r \in \mathcal{I}} r

    It has the advantage over hits @ k that it is sensitive to any model performance changes, not only what occurs
    under a certain cutoff and therefore reflects average performance. With PyKEEN's standard 1-based indexing,
    the mean rank lies on the interval $[1, \infty)$ where lower is better.

    .. warning::

        While the arithmetic mean rank is interpretable, the mean rank is dependent on the number of candidates.
        A mean rank of 10 might indicate strong performance for a candidate set size of 1,000,000,
        but incredibly poor performance for a candidate set size of 20.

    For the expected value, we have

    .. math::

        \mathbb{E}[MR] &= \mathbb{E}[\frac{1}{n} \sum \limits_{i=1}^{n} r_i] \\
                       &= \frac{1}{n} \sum \limits_{i=1}^{n} \mathbb{E}[r_i] \\
                       &= \frac{1}{n} \sum \limits_{i=1}^{n} \frac{N_i + 1}{2}

    For the variance, we have

    .. math::

        \mathbb{V}[MR] &= \mathbb{V}[\frac{1}{n} \sum \limits_{i=1}^{n} r_i] \\
                       &= \frac{1}{n^2} \sum \limits_{i=1}^{n} \mathbb{V}[r_i] \\
                       &= \frac{1}{n^2} \sum \limits_{i=1}^{n} \frac{N_i^2 - 1}{12} \\
                       &= \frac{1}{12 n^2} \cdot \left(-n + \sum \limits_{i=1}^{n} N_i \right)

    ---
    link: https://pykeen.readthedocs.io/en/stable/tutorial/understanding_evaluation.html#mean-rank
    description: The arithmetic mean over all ranks.
    """

    name = "Mean Rank (MR)"
    value_range = ValueRange(lower=1, lower_inclusive=True, upper=math.inf)
    increasing: ClassVar[bool] = False
    synonyms: ClassVar[Collection[str]] = ("mean_rank", "mr")
    supports_weights: ClassVar[bool] = True
    closed_expectation: ClassVar[bool] = True
    closed_variance: ClassVar[bool] = True

    # docstr-coverage: inherited
[docs]    def __call__(
        self, ranks: np.ndarray, num_candidates: Optional[np.ndarray] = None, weights: Optional[np.ndarray] = None
    ) -> float:  # noqa: D102
        return np.average(np.asanyarray(ranks), weights=weights).item()

    # docstr-coverage: inherited
[docs]    def expected_value(
        self,
        num_candidates: np.ndarray,
        num_samples: Optional[int] = None,
        weights: Optional[np.ndarray] = None,
        **kwargs,
    ) -> float:  # noqa: D102
        num_candidates = np.asanyarray(num_candidates)
        individual_expectation = 0.5 * (num_candidates + 1)
        return weighted_mean_expectation(individual=individual_expectation, weights=weights)

    # docstr-coverage: inherited
[docs]    def variance(
        self,
        num_candidates: np.ndarray,
        num_samples: Optional[int] = None,
        weights: Optional[np.ndarray] = None,
        **kwargs,
    ) -> float:  # noqa: D102
        num_candidates = np.asanyarray(num_candidates)
        individual_variance = (num_candidates**2 - 1) / 12.0
        return weighted_mean_variance(individual=individual_variance, weights=weights)


[docs]@parse_docdata
class ZArithmeticMeanRank(ZMetric):
    """The z-scored arithmetic mean rank.

    ---
    link: https://arxiv.org/abs/2203.07544
    description: The z-scored mean rank
    """

    name = "z-Mean Rank (zMR)"
    synonyms: ClassVar[Collection[str]] = ("zamr", "zmr")
    base_cls = ArithmeticMeanRank
    supports_weights: ClassVar[bool] = ArithmeticMeanRank.supports_weights


[docs]@parse_docdata
class InverseArithmeticMeanRank(RankBasedMetric):
    """The inverse arithmetic mean rank.

    ---
    link: https://arxiv.org/abs/2203.07544
    description: The inverse of the arithmetic mean over all ranks.
    """

    name = "Inverse Arithmetic Mean Rank (IAMR)"
    value_range = ValueRange(lower=0, lower_inclusive=False, upper=1, upper_inclusive=True)
    increasing = True
    synonyms: ClassVar[Collection[str]] = ("iamr",)
    supports_weights = True

    # docstr-coverage: inherited
[docs]    def __call__(
        self, ranks: np.ndarray, num_candidates: Optional[np.ndarray] = None, weights: Optional[np.ndarray] = None
    ) -> float:  # noqa: D102
        return np.reciprocal(np.average(np.asanyarray(ranks), weights=weights)).item()


[docs]@parse_docdata
class GeometricMeanRank(RankBasedMetric):
    r"""The (weighted) geometric mean rank.

    It is given by

    .. math::

        M = \left(\prod \limits_{i=1}^{m} r_i^{w_i}\right)^{1/w}

    with $w = \sum \limits_{i=1}^{m} w_i$. The unweighted GMR is obtained by setting $w_i = 1$.

    For computing the expected value, we first observe that

    .. math::

        \mathbb{E}[M] &= \mathbb{E}\left[\sqrt[w]{\prod \limits_{i=1}^{m} r_i^{w_i}}\right] \\
                      &= \prod \limits_{i=1}^{m} \mathbb{E}[r_i^{w_i/w}] \\
                      &= \exp \sum \limits_{i=1}^{m} \log \mathbb{E}[r_i^{w_i/w}]

    where the last steps permits a numerically more stable computation. Moreover, we have

    .. math::

        \log \mathbb{E}[r_i^{w_i/w}]
            &= \log \frac{1}{N_i} \sum \limits_{j=1}^{N_i} j^{w_i/w} \\
            &= -\log \frac{1}{N_i} + \log \sum \limits_{j=1}^{N_i} j^{w_i/w} \\
            &= -\log \frac{1}{N_i} + \log \sum \limits_{j=1}^{N_i} \exp \log j^{w_i/w} \\
            &= -\log \frac{1}{N_i} + \log \sum \limits_{j=1}^{N_i} \exp ( \frac{w_i}{w} \cdot \log j )

    For the second summand in the last line, we observe a log-sum-exp term, with known numerically stable
    implementation.

    Alternatively, we can write

    .. math::
        \log \mathbb{E}[r_i^{w_i/w}]
            &= \log \frac{1}{N_i} \sum \limits_{j=1}^{N_i} j^{w_i/w} \\
            &= \log \frac{H_{-w_i/w}(N_i)}{N_i} \\
            &= \log H_{-w_i/w}(N_i) - \log N_i

    .. math::
        \mathbb{E}[M]
            &= \exp \sum \limits_{i=1}^{m} \log \mathbb{E}[r_i^{w_i/w}] \\
            &= \exp \sum \limits_{i=1}^{m} (\log H_{-w_i/w}(N_i) - \log N_i) \\
            &= \exp \sum \limits_{i=1}^{m} \log H_{-w_i/w}(N_i) - \exp \sum \limits_{i=1}^{m} \log N_i

    where $H_p(n)$ denotes the generalized harmonic number, cf. :func:`generalized_harmonic_numbers`.
    ---
    link: https://arxiv.org/abs/2203.07544
    description: The geometric mean over all ranks.
    """

    name = "Geometric Mean Rank (GMR)"
    value_range = ValueRange(lower=1, lower_inclusive=True, upper=math.inf)
    increasing = False
    synonyms: ClassVar[Collection[str]] = ("gmr",)
    supports_weights = True
    closed_expectation: ClassVar[bool] = True
    closed_variance: ClassVar[bool] = True

    # docstr-coverage: inherited
[docs]    def __call__(
        self, ranks: np.ndarray, num_candidates: Optional[np.ndarray] = None, weights: Optional[np.ndarray] = None
    ) -> float:  # noqa: D102
        return stats.gmean(ranks, weights=weights).item()

    # docstr-coverage: inherited
[docs]    def expected_value(
        self,
        num_candidates: np.ndarray,
        num_samples: Optional[int] = None,
        weights: Optional[np.ndarray] = None,
        **kwargs,
    ) -> float:  # noqa: D102
        is_log, individual = self._individual_expectation(num_candidates=num_candidates, weights=weights)
        return stable_product(individual, is_log=is_log).item()

    # docstr-coverage: inherited
[docs]    def variance(
        self,
        num_candidates: np.ndarray,
        num_samples: Optional[int] = None,
        weights: Optional[np.ndarray] = None,
        **kwargs,
    ) -> float:  # noqa: D102
        # V (prod x_i) = prod (V[x_i] - E[x_i]^2) - prod(E[x_i])^2
        is_log, individual_expectation = self._individual_expectation(num_candidates=num_candidates, weights=weights)
        if is_log:
            individual_expectation = np.exp(individual_expectation)
        individual_variance = self._individual_variance(
            num_candidates=num_candidates, weights=weights, individual_expectation=individual_expectation
        )
        return (
            stable_product(individual_variance + individual_expectation**2)
            - stable_product(individual_expectation) ** 2
        )

    @classmethod
    def _individual_variance(
        cls, num_candidates: np.ndarray, weights: np.ndarray, individual_expectation: np.ndarray
    ) -> np.ndarray:
        # use V[x] = E[x^2] - E[x]^2
        x2 = (
            np.exp(cls._log_individual_expectation_no_weight(num_candidates=num_candidates, factor=2.0))
            if weights is None
            else cls._individual_expectation_weighted(num_candidates=num_candidates, weights=weights, factor=2.0)
        )
        return x2 - individual_expectation**2

    @classmethod
    def _individual_expectation(
        cls, num_candidates: np.ndarray, weights: Optional[np.ndarray]
    ) -> Tuple[bool, np.ndarray]:
        if weights is None:
            return True, cls._log_individual_expectation_no_weight(num_candidates=num_candidates)
        return False, cls._individual_expectation_weighted(num_candidates=num_candidates, weights=weights)

    @staticmethod
    def _individual_expectation_weighted(
        num_candidates: np.ndarray, weights: np.ndarray, factor: float = 1.0
    ) -> np.ndarray:
        weights = factor * weights / weights.sum()
        x = np.empty_like(weights)
        # group by same weight -> compute H_w(n) for multiple n at once
        unique_weights, inverse = np.unique(weights, return_inverse=True)
        for i, w in enumerate(unique_weights):
            mask = inverse == i
            nc = num_candidates[mask]
            h = generalized_harmonic_numbers(nc.max(), p=w)
            x[mask] = h[nc - 1] / nc
        return x

    @staticmethod
    def _log_individual_expectation_no_weight(num_candidates: np.ndarray, factor: float = 1.0) -> np.ndarray:
        m = num_candidates.size
        # we compute log E[r_i^(1/m)] for all N_i = 1 ... max_N_i once
        max_val = num_candidates.max()
        x = np.arange(1, max_val + 1, dtype=float)
        x = factor * np.log(x) / m
        x = logcumsumexp(x)
        # now select from precomputed cumulative sums and aggregate
        x = x[num_candidates - 1] - np.log(num_candidates)
        return x


[docs]@parse_docdata
class InverseGeometricMeanRank(RankBasedMetric):
    r"""The inverse geometric mean rank.

    The mean rank corresponds to the arithmetic mean, and tends to be more affected by high rank values.
    The mean reciprocal rank corresponds to the harmonic mean, and tends to be more affected by low rank values.
    The remaining Pythagorean mean, the geometric mean, lies in the center and therefore could better balance these
    biases. Therefore, the inverse geometric mean rank (IGMR) is defined as:

    .. math::

        IGMR = \sqrt[\|\mathcal{I}\|]{\prod \limits_{r \in \mathcal{I}} r}

    .. note:: This metric is novel as of its implementation in PyKEEN and was proposed by Max Berrendorf

    ---
    link: https://arxiv.org/abs/2203.07544
    description: The inverse of the geometric mean over all ranks.
    """

    name = "Inverse Geometric Mean Rank (IGMR)"
    value_range = ValueRange(lower=0, lower_inclusive=False, upper=1, upper_inclusive=True)
    increasing = True
    synonyms: ClassVar[Collection[str]] = ("igmr",)
    supports_weights = True

    # docstr-coverage: inherited
[docs]    def __call__(
        self, ranks: np.ndarray, num_candidates: Optional[np.ndarray] = None, weights: Optional[np.ndarray] = None
    ) -> float:  # noqa: D102
        return np.reciprocal(stats.gmean(ranks, weights=weights)).item()


[docs]@parse_docdata
class HarmonicMeanRank(RankBasedMetric):
    """The harmonic mean rank.

    ---
    link: https://arxiv.org/abs/2203.07544
    description: The harmonic mean over all ranks.
    """

    name = "Harmonic Mean Rank (HMR)"
    value_range = ValueRange(lower=1, lower_inclusive=True, upper=math.inf)
    increasing = False
    synonyms: ClassVar[Collection[str]] = ("hmr",)
    supports_weights = True

    # docstr-coverage: inherited
[docs]    def __call__(
        self, ranks: np.ndarray, num_candidates: Optional[np.ndarray] = None, weights: Optional[np.ndarray] = None
    ) -> float:  # noqa: D102
        return weighted_harmonic_mean(a=ranks, weights=weights).item()


[docs]def generalized_harmonic_numbers(n: int, p: float = -1.0) -> np.ndarray:
    r"""
    Calculate the generalized harmonic numbers from 1 to n (both inclusive).

    .. math::

        H_p(n) = \sum \limits_{i=1}^{n} i^{-p}

    :param n:
        the maximum number for which the generalized harmonic numbers are calculated
    :param p:
        the power, typically negative

    :return: shape: (n,)
        the first $n$ generalized harmonic numbers

    .. seealso::
        https://en.wikipedia.org/wiki/Harmonic_number#Generalizations
    """
    return np.cumsum(np.power(np.arange(1, n + 1, dtype=float), p))


[docs]def harmonic_variances(n: int) -> np.ndarray:
    r"""
    Pre-calculate variances of inverse rank distributions.

    With

    .. math::

        H_p(n) = \sum \limits_{i=1}^{n} i^{-p}

    denoting the generalized harmonic numbers, and abbreviating $H(n) := H_1(n)$, we have

    .. math::

        \textit{V}[n]
            &= \frac{1}{n} \sum \limits_{i=1}^n \left( i^{-1} - \frac{H(n)}{n} \right)^2 \\
            &= \frac{n \cdot H_2(n) - H(n)^2}{n^2}

    :param n:
        the maximum rank number

    :return: shape: (n+1,)
        the variances for the discrete uniform distribution over $\{\frac{1}{1}, \dots, \frac{1}{k}\}$`
    """
    h = generalized_harmonic_numbers(n)
    h2 = generalized_harmonic_numbers(n, p=-2)
    n = np.arange(1, n + 1, dtype=float)
    v = (n * h2 - h**2) / n**2
    # ensure non-negativity, mathematically not necessary, but just to be safe from the numeric perspective
    # cf. https://en.wikipedia.org/wiki/Loss_of_significance#Subtraction
    v = np.maximum(v, 0.0)
    return v


[docs]@parse_docdata
class InverseHarmonicMeanRank(RankBasedMetric):
    r"""The inverse harmonic mean rank.

    The mean reciprocal rank (MRR) is the arithmetic mean of reciprocal ranks, and thus the inverse of the harmonic mean
    of the ranks. It is defined as:

    .. math::

        IHMR = MRR =\frac{1}{|\mathcal{I}|} \sum_{r \in \mathcal{I}} r^{-1}

    .. warning::

        It has been argued that the mean reciprocal rank has theoretical flaws by [fuhr2018]_. However, this opinion
        is not undisputed, cf. [sakai2021]_.

    Despite its flaws, MRR is still often used during early stopping due to its behavior related to low rank values.
    While the hits @ k ignores changes among high rank values completely and the mean rank changes uniformly
    across the full value range, the mean reciprocal rank is more affected by changes of low rank values than high ones
    (without disregarding them completely like hits @ k does for low rank values)
    Therefore, it can be considered as soft a version of hits @ k that is less sensitive to outliers.
    It is bound on $(0, 1]$ where closer to 1 is better.

    Let

    .. math::

        H_m(n) = \sum \limits_{i=1}^{n} i^{-m}

    denote the generalized harmonic number, with $H(n) := H_{1}(n)$ for brevity.
    Thus, we have

    .. math::

        \mathbb{E}\left[r_i^{-1}\right] = \frac{H(N_i)}{N_i}

    and hence

    .. math::

        \mathbb{E}\left[\textrm{MRR}\right]
            &= \mathbb{E}\left[\frac{1}{n} \sum \limits_{i=1}^n r_i^{-1}\right] \\
            &= \frac{1}{n} \sum \limits_{i=1}^n \mathbb{E}\left[r_i^{-1}\right] \\
            &= \frac{1}{n} \sum \limits_{i=1}^n \frac{H(N_i)}{N_i}

    For the variance, we have for the individual ranks

    .. math::

        \mathbb{V}\left[r_i^{-1}\right]
            &= \frac{1}{N_i} \sum \limits_{i=1}^{N_i} \left(\frac{H(N_i)}{N_i} - \frac{1}{i}\right)^2 \\
            &= \frac{N_i \cdot H_2(N_i) - H(N_i)^2}{N_i^2}

    and thus overall

    .. math::

        \mathbb{V}\left[\textrm{MRR}\right]
            &= \mathbb{V}\left[\frac{1}{n} \sum \limits_{i=1}^n r_i^{-1}\right] \\
            &= \frac{1}{n^2} \sum \limits_{i=1}^n \mathbb{V}\left[r_i^{-1}\right] \\
            &= \frac{1}{n^2} \sum \limits_{i=1}^n \frac{N_i \cdot H_2(N_i) - H(N_i)^2}{N_i^2} \\

    .. seealso::
        https://en.wikipedia.org/wiki/Inverse_distribution#Inverse_uniform_distribution

    ---
    link: https://en.wikipedia.org/wiki/Mean_reciprocal_rank
    description: The inverse of the harmonic mean over all ranks.
    """

    name = "Mean Reciprocal Rank (MRR)"
    value_range = ValueRange(lower=0, lower_inclusive=False, upper=1, upper_inclusive=True)
    synonyms: ClassVar[Collection[str]] = ("mean_reciprocal_rank", "mrr")
    increasing = True
    supports_weights = True
    closed_expectation: ClassVar[bool] = True
    closed_variance: ClassVar[bool] = True

    # docstr-coverage: inherited
[docs]    def __call__(
        self, ranks: np.ndarray, num_candidates: Optional[np.ndarray] = None, weights: Optional[np.ndarray] = None
    ) -> float:  # noqa: D102
        return np.reciprocal(weighted_harmonic_mean(a=ranks, weights=weights)).item()

    # docstr-coverage: inherited
[docs]    def expected_value(
        self,
        num_candidates: np.ndarray,
        num_samples: Optional[int] = None,
        weights: Optional[np.ndarray] = None,
        **kwargs,
    ) -> float:  # noqa: D102
        num_candidates = np.asanyarray(num_candidates)
        n = num_candidates.max().item()
        expectation = generalized_harmonic_numbers(n, p=-1.0) / np.arange(1, n + 1)
        individual = expectation[num_candidates - 1]
        return weighted_mean_expectation(individual, weights)

    # docstr-coverage: inherited
[docs]    def variance(
        self,
        num_candidates: np.ndarray,
        num_samples: Optional[int] = None,
        weights: Optional[np.ndarray] = None,
        **kwargs,
    ) -> float:  # noqa: D102
        num_candidates = np.asanyarray(num_candidates)
        n = num_candidates.max().item()
        individual = harmonic_variances(n)[num_candidates - 1]
        return weighted_mean_variance(individual, weights)


[docs]@parse_docdata
class AdjustedInverseHarmonicMeanRank(ReindexedMetric):
    r"""The adjusted MRR index.

    .. note ::
        the actual lower bound is $\frac{-\mathbb{E}[\text{MRR}]}{1-\mathbb{E}[\text{MRR}]}$,
        and thus data dependent.

    ---
    link: https://arxiv.org/abs/2203.07544
    description: The re-indexed adjusted MRR
    tight_lower: \frac{-E[f]}{1-E[f]}
    """

    name = "Adjusted Inverse Harmonic Mean Rank"
    synonyms: ClassVar[Collection[str]] = ("amrr", "aihmr", "adjusted_mrr", "adjusted_mean_reciprocal_rank")
    value_range = ValueRange(lower=None, lower_inclusive=False, upper=1, upper_inclusive=True)
    base_cls = InverseHarmonicMeanRank
    supports_weights: ClassVar[bool] = InverseHarmonicMeanRank.supports_weights


[docs]@parse_docdata
class ZInverseHarmonicMeanRank(ZMetric):
    """The z-inverse harmonic mean rank (ZIHMR).

    ---
    link: https://arxiv.org/abs/2203.07544
    description: The z-scored mean reciprocal rank
    """

    name = "z-Mean Reciprocal Rank (zMRR)"
    synonyms: ClassVar[Collection[str]] = ("zmrr", "zihmr")
    base_cls = InverseHarmonicMeanRank
    supports_weights: ClassVar[bool] = InverseHarmonicMeanRank.supports_weights


[docs]@parse_docdata
class ZGeometricMeanRank(ZMetric):
    """The z geometric mean rank (zGMR).

    ---
    link: https://arxiv.org/abs/2203.07544
    description: The z-scored geometric mean rank
    """

    name = "z-Geometric Mean Rank (zGMR)"
    synonyms: ClassVar[Collection[str]] = ("zgmr",)
    base_cls = GeometricMeanRank
    supports_weights: ClassVar[bool] = GeometricMeanRank.supports_weights


[docs]@parse_docdata
class MedianRank(RankBasedMetric):
    """The median rank.

    ---
    link: https://arxiv.org/abs/2203.07544
    description: The median over all ranks.
    """

    name = "Median Rank"
    value_range = ValueRange(lower=1, lower_inclusive=True, upper=math.inf)
    increasing = False
    supports_weights = True

    # docstr-coverage: inherited
[docs]    def __call__(
        self, ranks: np.ndarray, num_candidates: Optional[np.ndarray] = None, weights: Optional[np.ndarray] = None
    ) -> float:  # noqa: D102
        if weights is None:
            return np.median(ranks).item()

        return weighted_median(a=ranks, weights=weights).item()


[docs]@parse_docdata
class InverseMedianRank(RankBasedMetric):
    """The inverse median rank.

    ---
    link: https://arxiv.org/abs/2203.07544
    description: The inverse of the median over all ranks.
    """

    name = "Inverse Median Rank"
    value_range = ValueRange(lower=0, lower_inclusive=False, upper=1, upper_inclusive=True)
    increasing = True
    supports_weights = True

    # docstr-coverage: inherited
[docs]    def __call__(
        self, ranks: np.ndarray, num_candidates: Optional[np.ndarray] = None, weights: Optional[np.ndarray] = None
    ) -> float:  # noqa: D102
        return np.reciprocal(weighted_median(a=ranks, weights=weights)).item()


[docs]@parse_docdata
class StandardDeviation(RankBasedMetric):
    """The ranks' standard deviation.

    ---
    link: https://pykeen.readthedocs.io/en/stable/tutorial/understanding_evaluation.html
    """

    name = "Standard Deviation (std)"
    value_range = ValueRange(lower=0, lower_inclusive=True, upper=math.inf)
    increasing = False
    synonyms: ClassVar[Collection[str]] = ("rank_std", "std")

    # docstr-coverage: inherited
[docs]    def __call__(
        self, ranks: np.ndarray, num_candidates: Optional[np.ndarray] = None, weights: Optional[np.ndarray] = None
    ) -> float:  # noqa: D102
        return np.asanyarray(ranks).std().item()


[docs]@parse_docdata
class Variance(RankBasedMetric):
    """The ranks' variance.

    ---
    link: https://pykeen.readthedocs.io/en/stable/tutorial/understanding_evaluation.html
    """

    name = "Variance"
    value_range = ValueRange(lower=0, lower_inclusive=True, upper=math.inf)
    increasing = False
    synonyms: ClassVar[Collection[str]] = ("rank_var", "var")

    # docstr-coverage: inherited
[docs]    def __call__(
        self, ranks: np.ndarray, num_candidates: Optional[np.ndarray] = None, weights: Optional[np.ndarray] = None
    ) -> float:  # noqa: D102
        return np.asanyarray(ranks).var().item()


@parse_docdata
class MedianAbsoluteDeviation(RankBasedMetric):
    """The ranks' median absolute deviation (MAD).

    ---
    link: https://pykeen.readthedocs.io/en/stable/tutorial/understanding_evaluation.html
    """

    name = "Median Absolute Deviation (MAD)"
    value_range = ValueRange(lower=0, lower_inclusive=True, upper=math.inf)
    increasing = False
    synonyms: ClassVar[Collection[str]] = ("rank_mad", "mad")
    supports_weights = True

    # docstr-coverage: inherited
    def __call__(
        self, ranks: np.ndarray, num_candidates: Optional[np.ndarray] = None, weights: Optional[np.ndarray] = None
    ) -> float:  # noqa: D102
        if weights is None:
            return stats.median_abs_deviation(ranks, scale="normal").item()

        return weighted_median(a=np.abs(ranks - weighted_median(a=ranks, weights=weights)), weights=weights).item()


[docs]@parse_docdata
class Count(RankBasedMetric):
    """The ranks' count.

    Lower numbers may indicate unreliable results.
    ---
    link: https://pykeen.readthedocs.io/en/stable/reference/evaluation.html
    """

    name = "Count"
    value_range = ValueRange(lower=0, lower_inclusive=True, upper=math.inf)
    increasing = True
    synonyms: ClassVar[Collection[str]] = ("rank_count",)

    # docstr-coverage: inherited
[docs]    def __call__(
        self, ranks: np.ndarray, num_candidates: Optional[np.ndarray] = None, weights: Optional[np.ndarray] = None
    ) -> float:  # noqa: D102
        # TODO: should we return the sum of weights?
        return float(np.asanyarray(ranks).size)


[docs]@parse_docdata
class HitsAtK(RankBasedMetric):
    r"""The Hits @ k.

    The hits @ k describes the fraction of true entities that appear in the first $k$ entities of the sorted rank list.
    Denoting the set of individual ranks as $\mathcal{I}$, it is given as:

    .. math::

        H_k = \frac{1}{|\mathcal{I}|} \sum \limits_{r \in \mathcal{I}} \mathbb{I}[r \leq k]

    For example, if Google shows 20 results on the first page, then the percentage of results that are relevant is the
    hits @ 20. The hits @ k, regardless of $k$, lies on the $[0, 1]$ where closer to 1 is better.

    .. warning::

        This metric does not differentiate between cases when the rank is larger than $k$.
        This means that a miss with rank $k+1$ and $k+d$ where $d \gg 1$ have the same
        effect on the final score. Therefore, it is less suitable for the comparison of different
        models.

    For the expected values, we first note that

    .. math::

        \mathbb{I}[r_i \leq k] \sim \textit{Bernoulli}(p_i)

    with $p_i = \min\{\frac{k}{N_i}, 1\}$. Thus, we have

    .. math::

        \mathbb{E}[\mathbb{I}[r_i \leq k]] = p_i

    and

    .. math::

        \mathbb{V}[\mathbb{I}[r_i \leq k]] = p_i \cdot (1 - p_i)

    Hence, we obtain

    .. math::

        \mathbb{E}[Hits@k] &= \mathbb{E}\left[\frac{1}{n} \sum \limits_{i=1}^{n} \mathbb{I}[r_i \leq k]\right] \\
                           &= \frac{1}{n} \sum \limits_{i=1}^{n} \mathbb{E}[\mathbb{I}[r_i \leq k]] \\
                           &= \frac{1}{n} \sum \limits_{i=1}^{n} p_i

    For the variance, we have

    .. math::

        \mathbb{V}[Hits@k] &= \mathbb{V}\left[\frac{1}{n} \sum \limits_{i=1}^{n} \mathbb{I}[r_i \leq k]\right] \\
                           &= \frac{1}{n^2} \sum \limits_{i=1}^{n} \mathbb{V}\left[\mathbb{I}[r_i \leq k]\right] \\
                           &= \frac{1}{n^2} \sum \limits_{i=1}^{n} p_i(1 - p_i)
    ---
    description: The relative frequency of ranks not larger than a given k.
    link: https://pykeen.readthedocs.io/en/stable/tutorial/understanding_evaluation.html#hits-k
    """

    name = "Hits @ K"
    value_range = ValueRange(lower=0, lower_inclusive=True, upper=1, upper_inclusive=True)
    synonyms: ClassVar[Collection[str]] = ("h@k", "hits@k", "h@", "hits@", "hits_at_", "h_at_")
    increasing = True
    supports_weights = True
    closed_expectation: ClassVar[bool] = True
    closed_variance: ClassVar[bool] = True

    def __init__(self, k: int = 10) -> None:
        """
        Initialize the metric.

        :param k:
            the parameter $k$ of number of top entries to consider
        """
        super().__init__()
        self.k = k

    # docstr-coverage: inherited
[docs]    def iter_extra_repr(self) -> Iterable[str]:  # noqa: D102
        yield from super().iter_extra_repr()
        yield f"k={self.k}"

    # docstr-coverage: inherited
[docs]    def __call__(
        self, ranks: np.ndarray, num_candidates: Optional[np.ndarray] = None, weights: Optional[np.ndarray] = None
    ) -> float:  # noqa: D102
        return np.average(np.less_equal(ranks, self.k), weights=weights).item()

    # docstr-coverage: inherited
    @property
    def key(self) -> str:  # noqa: D102
        return super().key[:-1] + str(self.k)

    # docstr-coverage: inherited
[docs]    def expected_value(
        self,
        num_candidates: np.ndarray,
        num_samples: Optional[int] = None,
        weights: Optional[np.ndarray] = None,
        **kwargs,
    ) -> float:  # noqa: D102
        num_candidates = np.asanyarray(num_candidates, dtype=float)
        # for each individual ranking task, we have I[r_i <= k] ~ Bernoulli(k/N_i)
        individual = np.minimum(self.k / num_candidates, 1.0)
        return weighted_mean_expectation(individual=individual, weights=weights)

    # docstr-coverage: inherited
[docs]    def variance(
        self,
        num_candidates: np.ndarray,
        num_samples: Optional[int] = None,
        weights: Optional[np.ndarray] = None,
        **kwargs,
    ) -> float:  # noqa: D102
        # for each individual ranking task, we have I[r_i <= k] ~ Bernoulli(k/N_i)
        num_candidates = np.asanyarray(num_candidates, dtype=float)
        p = np.minimum(self.k / num_candidates, 1.0)
        individual_variance = p * (1 - p)
        return weighted_mean_variance(individual=individual_variance, weights=weights)


[docs]@parse_docdata
class AdjustedHitsAtK(ReindexedMetric):
    r"""The adjusted Hits at K ($AH_k$).

    .. note ::
        the actual lower bound is $\frac{-\mathbb{E}[H_k]}{1 - \mathbb{E}[H_k]}$, and thus data dependent.

    ---
    link: https://arxiv.org/abs/2203.07544
    description: The re-indexed adjusted hits at K
    tight_lower: \frac{-E[f]}{1-E[f]}
    """

    name = "Adjusted Hits at K"
    synonyms: ClassVar[Collection[str]] = (
        "ahk",
        "ah@k",
        "ahits@k",
        "ah@",
        "ahits@",
        "ahits_at_",
        "ah_at_",
        "adjusted_hits_at_",
    )
    value_range = ValueRange(lower=None, lower_inclusive=False, upper=1, upper_inclusive=True)
    base_cls = HitsAtK
    supports_weights: ClassVar[bool] = HitsAtK.supports_weights


[docs]@parse_docdata
class ZHitsAtK(ZMetric):
    """The z-scored hits at k ($ZAH_k$).

    ---
    link: https://arxiv.org/abs/2203.07544
    description: The z-scored hits at K
    """

    name = "z-Hits at K"
    synonyms: ClassVar[Collection[str]] = ("z_hits_at_", "zahk")
    increasing = True
    supported_rank_types = (RANK_REALISTIC,)
    needs_candidates = True
    base_cls = HitsAtK
    supports_weights: ClassVar[bool] = HitsAtK.supports_weights


[docs]@parse_docdata
class AdjustedArithmeticMeanRank(ExpectationNormalizedMetric):
    """The adjusted arithmetic mean rank (AMR).

    The adjusted (arithmetic) mean rank (AMR) was introduced by [berrendorf2020]. It is defined as the ratio of the
    mean rank to the expected mean rank. It lies on the open interval $(0, 2)$ where lower is better.

    ---
    description: The mean over all ranks divided by its expected value.
    link: https://arxiv.org/abs/2002.06914
    """

    name = "Adjusted Arithmetic Mean Rank (AAMR)"
    value_range = ValueRange(lower=0, lower_inclusive=True, upper=2, upper_inclusive=False)
    synonyms: ClassVar[Collection[str]] = ("adjusted_mean_rank", "amr", "aamr")
    supported_rank_types = (RANK_REALISTIC,)
    needs_candidates = True
    increasing = False
    base_cls = ArithmeticMeanRank
    supports_weights: ClassVar[bool] = ArithmeticMeanRank.supports_weights


[docs]@parse_docdata
class AdjustedArithmeticMeanRankIndex(ReindexedMetric):
    """The adjusted arithmetic mean rank index (AMRI).

    The adjusted (arithmetic) mean rank index (AMRI) was introduced by [berrendorf2020] to make the AMR more intuitive.
    The AMRI has a bounded value range of $[-1, 1]$ where closer to 1 is better.

    ---
    link: https://arxiv.org/abs/2002.06914
    description: The re-indexed adjusted mean rank (AAMR)
    """

    name = "Adjusted Arithmetic Mean Rank Index (AAMRI)"
    value_range = ValueRange(lower=-1, lower_inclusive=True, upper=1, upper_inclusive=True)
    synonyms: ClassVar[Collection[str]] = ("adjusted_mean_rank_index", "amri", "aamri")
    base_cls = ArithmeticMeanRank
    supports_weights: ClassVar[bool] = ArithmeticMeanRank.supports_weights


[docs]@parse_docdata
class AdjustedGeometricMeanRankIndex(ReindexedMetric):
    r"""The adjusted geometric mean rank index (AGMRI).

    ---
    link: https://arxiv.org/abs/2002.06914
    description: The re-indexed adjusted geometric mean rank (AGMRI)
    tight_lower: \frac{-E[f]}{1-E[f]}
    """

    name = "Adjusted Geometric Mean Rank Index (AGMRI)"
    value_range = ValueRange(lower=None, lower_inclusive=False, upper=1, upper_inclusive=True)
    synonyms: ClassVar[Collection[str]] = ("gmri", "agmri")
    base_cls = GeometricMeanRank
    supports_weights: ClassVar[bool] = GeometricMeanRank.supports_weights


rank_based_metric_resolver: ClassResolver[RankBasedMetric] = ClassResolver.from_subclasses(
    base=RankBasedMetric,
    default=InverseHarmonicMeanRank,  # mrr
    skip={ExpectationNormalizedMetric, ReindexedMetric, ZMetric, DerivedRankBasedMetric},
)
"""The rank-based metric resolver allows for the lookup and instantiation of classes
deriving from :class:`RankBasedMetric` via the :mod:`class_resolver`.
"""

HITS_METRICS: Tuple[Type[RankBasedMetric], ...] = (HitsAtK, ZHitsAtK, AdjustedHitsAtK)