Source code for pykeen.nn.sim

# -*- coding: utf-8 -*-

"""Similarity functions."""

import itertools
import math

import torch

from .compute_kernel import batched_dot
from ..typing import GaussianDistribution
from ..utils import at_least_eps, calculate_broadcasted_elementwise_result_shape, tensor_sum

__all__ = [
    "expected_likelihood",
    "kullback_leibler_similarity",
    "KG2E_SIMILARITIES",
]


# TODO test
[docs]def expected_likelihood(
    h: GaussianDistribution,
    r: GaussianDistribution,
    t: GaussianDistribution,
    exact: bool = True,
) -> torch.FloatTensor:
    r"""Compute the similarity based on expected likelihood.

    .. math::

        D((\mu_e, \Sigma_e), (\mu_r, \Sigma_r)))
        = \frac{1}{2} \left(
            (\mu_e - \mu_r)^T(\Sigma_e + \Sigma_r)^{-1}(\mu_e - \mu_r)
            + \log \det (\Sigma_e + \Sigma_r) + d \log (2 \pi)
        \right)
        = \frac{1}{2} \left(
            \mu^T\Sigma^{-1}\mu
            + \log \det \Sigma + d \log (2 \pi)
        \right)

    with :math:`\mu_e = \mu_h - \mu_t` and :math:`\Sigma_e = \Sigma_h + \Sigma_t`.

    :param h: shape: (batch_size, num_heads, 1, 1, d)
        The head entity Gaussian distribution.
    :param r: shape: (batch_size, 1, num_relations, 1, d)
        The relation Gaussian distribution.
    :param t: shape: (batch_size, 1, 1, num_tails, d)
        The tail entity Gaussian distribution.
    :param exact:
        Whether to return the exact similarity, or leave out constant offsets.

    :return: torch.Tensor, shape: (batch_size, num_heads, num_relations, num_tails)
        The similarity.
    """
    # subtract, shape: (batch_size, num_heads, num_relations, num_tails, dim)
    var = tensor_sum(*(d.diagonal_covariance for d in (h, r, t)))
    mean = tensor_sum(h.mean, -t.mean, -r.mean)

    #: a = \mu^T\Sigma^{-1}\mu
    safe_sigma = at_least_eps(var)
    sim = batched_dot(
        a=safe_sigma.reciprocal(),
        b=(mean**2),
    )

    #: b = \log \det \Sigma
    sim = sim + safe_sigma.log().sum(dim=-1)
    if exact:
        sim = sim + sim.shape[-1] * math.log(2.0 * math.pi)
    return sim


[docs]def kullback_leibler_similarity(
    h: GaussianDistribution,
    r: GaussianDistribution,
    t: GaussianDistribution,
    exact: bool = True,
) -> torch.FloatTensor:
    r"""Compute the negative KL divergence.

    This is done between two Gaussian distributions given by mean `mu_*` and diagonal covariance matrix `sigma_*`.

    .. math::

        D((\mu_0, \Sigma_0), (\mu_1, \Sigma_1)) = 0.5 * (
          tr(\Sigma_1^-1 \Sigma_0)
          + (\mu_1 - \mu_0) * \Sigma_1^-1 (\mu_1 - \mu_0)
          - k
          + ln (det(\Sigma_1) / det(\Sigma_0))
        )

    with :math:`\mu_e = \mu_h - \mu_t` and :math:`\Sigma_e = \Sigma_h + \Sigma_t`.

    .. note ::
        This methods assumes diagonal covariance matrices :math:`\Sigma`.

    .. seealso ::
        https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Kullback%E2%80%93Leibler_divergence

    :param h: shape: (batch_size, num_heads, 1, 1, d)
        The head entity Gaussian distribution.
    :param r: shape: (batch_size, 1, num_relations, 1, d)
        The relation Gaussian distribution.
    :param t: shape: (batch_size, 1, 1, num_tails, d)
        The tail entity Gaussian distribution.
    :param exact:
        Whether to return the exact similarity, or leave out constant offsets.

    :return: torch.Tensor, shape: (s_1, ..., s_k)
        The similarity.
    """
    assert all((d.diagonal_covariance > 0).all() for d in (h, r, t))
    return -_vectorized_kl_divergence(
        h=h,
        r=r,
        t=t,
        exact=exact,
    )


def _vectorized_kl_divergence(
    h: GaussianDistribution,
    r: GaussianDistribution,
    t: GaussianDistribution,
    exact: bool = True,
) -> torch.FloatTensor:
    r"""Vectorized implementation of KL-divergence.

    Computes the divergence between :math:`\mathcal{N}(\mu_e, \Sigma_e)` and :math:`\mathcal{N}(\mu_r, \Sigma_r)`
    given by

    .. math ::
        \mu_e = \mu_h - \mu_t

        \Sigma_e = \Sigma_h + \Sigma_t

    where all covariance matrices are diagonal. Hence we can simplify

    .. math ::
        D(\mathcal{N}(\mu_e, \Sigma_e), \mathcal{N}(\mu_r, \Sigma_r))
        =
        0.5 * (
          \trace(\Sigma_r^-1 \Sigma_e)
          + (\mu_r - \mu_e) * \Sigma_r^-1 (\mu_r - \mu_e)
          - k
          + \ln (\det(\Sigma_r) / \det(\Sigma_e))
        )
        = 0.5 * (
          \sum_i \Sigma_e[i] / Sigma_r[i]
          + \sum_i \mu[i]^2 / \Sigma_r[i]
          + \sum_i \ln Sigma_r[i]
          - \sum_i \ln Sigma_e[i]
          - k
        )

    where :math:`\mu = \mu_r - \mu_e = \mu_r - \mu_h + \mu_t`

    :param h: shape: (batch_size, num_heads, 1, 1, d)
        The head entity Gaussian distribution.
    :param r: shape: (batch_size, 1, num_relations, 1, d)
        The relation Gaussian distribution.
    :param t: shape: (batch_size, 1, 1, num_tails, d)
        The tail entity Gaussian distribution.
    :param exact:
        Whether to return the exact similarity, or leave out constant offsets.

    :return: torch.Tensor, shape: (s_1, ..., s_k)
        The KL-divergence.
    """
    e_var = h.diagonal_covariance + t.diagonal_covariance
    r_var_safe = at_least_eps(r.diagonal_covariance)
    terms = []
    # 1. Component
    # \sum_i \Sigma_e[i] / Sigma_r[i]
    r_var_safe_reciprocal = r_var_safe.reciprocal()
    terms.append(batched_dot(e_var, r_var_safe_reciprocal))
    # 2. Component
    # (mu_1 - mu_0) * Sigma_1^-1 (mu_1 - mu_0)
    # with mu = (mu_1 - mu_0)
    # = mu * Sigma_1^-1 mu
    # since Sigma_1 is diagonal
    # = mu**2 / sigma_1
    mu = tensor_sum(r.mean, -h.mean, t.mean)
    terms.append(batched_dot(mu.pow(2), r_var_safe_reciprocal))
    # 3. Component
    if exact:
        terms.append(-torch.as_tensor(data=[h.mean.shape[-1]], device=mu.device).squeeze())
    # 4. Component
    # ln (det(\Sigma_1) / det(\Sigma_0))
    # = ln det Sigma_1 - ln det Sigma_0
    # since Sigma is diagonal, we have det Sigma = prod Sigma[ii]
    # = ln prod Sigma_1[ii] - ln prod Sigma_0[ii]
    # = sum ln Sigma_1[ii] - sum ln Sigma_0[ii]
    e_var_safe = at_least_eps(e_var)
    terms.extend(
        (
            r_var_safe.log().sum(dim=-1),
            -e_var_safe.log().sum(dim=-1),
        )
    )
    result = tensor_sum(*terms)
    if exact:
        result = 0.5 * result
    return result


def _torch_kl_similarity(
    h: GaussianDistribution,
    r: GaussianDistribution,
    t: GaussianDistribution,
) -> torch.FloatTensor:
    """Compute KL similarity using torch.distributions.

    :param h: shape: (batch_size, num_heads, 1, 1, d)
        The head entity Gaussian distribution.
    :param r: shape: (batch_size, 1, num_relations, 1, d)
        The relation Gaussian distribution.
    :param t: shape: (batch_size, 1, 1, num_tails, d)
        The tail entity Gaussian distribution.
    :return: torch.Tensor, shape: (s_1, ..., s_k)
        The KL-divergence.

    .. warning ::
        Do not use this method in production code.
    """
    e_mean = h.mean - t.mean
    e_var = h.diagonal_covariance + t.diagonal_covariance

    # allocate result
    batch_size, num_heads, num_relations, num_tails = calculate_broadcasted_elementwise_result_shape(
        e_mean.shape,
        r.mean.shape,
    )[:-1]
    result = h.mean.new_empty(batch_size, num_heads, num_relations, num_tails)
    for bi, hi, ri, ti in itertools.product(
        range(batch_size),
        range(num_heads),
        range(num_relations),
        range(num_tails),
    ):
        # prepare distributions
        e_loc = e_mean[bi, hi, 0, ti, :]
        r_loc = r.mean[bi, 0, ri, 0, :]
        e_cov = torch.diag(e_var[bi, hi, 0, ti, :])
        r_cov = torch.diag(r.diagonal_covariance[bi, 0, ri, 0, :])
        p = torch.distributions.MultivariateNormal(
            loc=e_loc,
            covariance_matrix=e_cov,
        )
        q = torch.distributions.MultivariateNormal(
            loc=r_loc,
            covariance_matrix=r_cov,
        )
        result[bi, hi, ri, ti] = torch.distributions.kl_divergence(p=p, q=q).view(-1)
    return -result


KG2E_SIMILARITIES = {
    "KL": kullback_leibler_similarity,
    "EL": expected_likelihood,
}