Source code for pykeen.pipeline.plot_utils

# -*- coding: utf-8 -*-

"""Plotting utilities for the pipeline results."""

import logging
from typing import Callable, Mapping, Optional, Set

from ..losses import loss_resolver
from ..models.nbase import ERModel
from ..nn.representation import Representation
from ..stoppers import EarlyStopper

__all__ = [
    "plot_losses",
    "plot_early_stopping",
    "plot_er",
    "plot",
]

logger = logging.getLogger(__name__)

REDUCER_RELATION_WHITELIST = {"PCA"}


[docs]def plot_losses(pipeline_result, *, ax=None):
    """Plot the losses per epoch."""
    import seaborn as sns

    sns.set_style("darkgrid")

    ax = _ensure_ax(ax)
    rv = sns.lineplot(x=range(len(pipeline_result.losses)), y=pipeline_result.losses, ax=ax)

    loss_name = loss_resolver.normalize_inst(pipeline_result.model.loss)
    ax.set_ylabel(f"{loss_name} Loss")
    ax.set_xlabel("Epoch")
    ax.set_title(pipeline_result.title if pipeline_result.title is not None else "Losses Plot")
    return rv


[docs]def plot_early_stopping(pipeline_result, *, ax=None, lineplot_kwargs=None):
    """Plot the evaluations during early stopping."""
    import seaborn as sns

    if not isinstance(pipeline_result.stopper, EarlyStopper) or not pipeline_result.stopper.results:
        raise ValueError

    ax = _ensure_ax(ax)

    x = [(1 + e) * pipeline_result.stopper.frequency for e in range(len(pipeline_result.stopper.results))]
    rv = sns.lineplot(x=x, y=pipeline_result.stopper.results, ax=ax, marker="o", **(lineplot_kwargs or {}))

    ax.set_ylabel(pipeline_result.stopper.metric)
    ax.set_xlabel("Epoch")
    ax.set_title(pipeline_result.title if pipeline_result.title is not None else "Early Stopper Evaluation Plot")
    return rv


def build_representation_getter(relation: bool = False, index: int = 0) -> Callable[[ERModel], Representation]:
    """
    Build a representation getter.

    :param relation:
        whether to get relation representations, or entity representations.
    :param index:
        the index of the representation to get

    :return:
        a function to get the representation.
    """

    def getter(model: ERModel) -> Representation:
        """
        Get a specific representation from model.

        :param model:
            the model

        :return:
            the representation
        """
        # cf. also https://github.com/pykeen/pykeen/issues/1071
        representations = model.relation_representations if relation else model.entity_representations
        return representations[index]

    return getter


[docs]def plot_er(  # noqa: C901
    pipeline_result,
    *,
    model: Optional[str] = None,
    entities: Optional[Set[str]] = None,
    relations: Optional[Set[str]] = None,
    apply_limits: bool = True,
    margin: float = 0.4,
    plot_entities: bool = True,
    plot_relations: Optional[bool] = None,
    annotation_x_offset: float = 0.02,
    annotation_y_offset: float = 0.03,
    entity_embedding_getter=None,
    relation_embedding_getter=None,
    ax=None,
    subtitle: Optional[str] = None,
    **kwargs,
):
    """Plot the reduced entities and relation vectors in 2D.

    :param pipeline_result: The result returned by :func:`pykeen.pipeline.pipeline`.
    :param model: The dimensionality reduction model from :mod:`sklearn`. Defaults to PCA.
        Can also use KPCA, GRP, SRP, TSNE, LLE, ISOMAP, MDS, or SE.
    :param entities: A subset of entities to plot
    :param relations: A subset of relations to plot
    :param apply_limits: Should the x and y limits be applied?
    :param margin: The margin size around the minimum/maximum x and y values
    :param plot_entities: If true, plot the entities based on their reduced embeddings
    :param plot_relations: By default, this is only enabled on translational distance models
        like :class:`pykeen.models.TransE`.
    :param annotation_x_offset: X offset of label from entity position
    :param annotation_y_offset: Y offset of label from entity position
    :param entity_embedding_getter: A function that takes a model and returns its entity embeddings. If none,
        defaults to :func:`_default_entity_embedding_getter`, which just gets ``model.entity_embeddings``. Note,
        the default only works with old-style PyKEEN models.
    :param relation_embedding_getter: A function that takes a model and returns its relation embeddings. If none,
        defaults to :func:`_default_relation_embedding_getter`, which just gets ``model.relation_embeddings``. Note,
        the default only works with old-style PyKEEN models.
    :param ax: The matplotlib axis, if pre-defined
    :param subtitle: A user-defined subtitle. Is inferred if not given. Pass an empty string to not use a subtitle.
    :param kwargs: The keyword arguments passed to `__init__()` of
        the reducer class (e.g., PCA, TSNE)
    :returns: The axis

    :raises ValueError: if entity plotting and relation plotting are both turned off

    .. warning::

        Plotting relations and entities on the same plot is only
        meaningful for translational distance models like TransE.
    """
    import seaborn as sns

    if not plot_entities and not plot_relations:
        raise ValueError

    if plot_relations is None:  # automatically set to true for translational models, false otherwise
        plot_relations = pipeline_result.model.__class__.__name__.lower().startswith("trans")

    if model is None:
        model = "PCA"
    reducer_cls, reducer_kwargs = _get_reducer_cls(model, **kwargs)
    if plot_relations and reducer_cls.__name__ not in REDUCER_RELATION_WHITELIST:
        raise ValueError(f"Can not use reducer {reducer_cls} when projecting relations. Will result in nonsense")
    reducer = reducer_cls(n_components=2, **reducer_kwargs)

    ax = _ensure_ax(ax)

    sns.set_style("whitegrid")

    if entity_embedding_getter is None:
        entity_embedding_getter = build_representation_getter(relation=False, index=0)
    if relation_embedding_getter is None:
        relation_embedding_getter = build_representation_getter(relation=True, index=0)

    if plot_relations and plot_entities:
        e_embeddings, e_reduced = _reduce_embeddings(entity_embedding_getter(pipeline_result.model), reducer, fit=True)
        r_embeddings, r_reduced = _reduce_embeddings(
            relation_embedding_getter(pipeline_result.model),
            reducer,
            fit=False,
        )

        xmax = max(r_embeddings[:, 0].max(), e_embeddings[:, 0].max()) + margin
        xmin = min(r_embeddings[:, 0].min(), e_embeddings[:, 0].min()) - margin
        ymax = max(r_embeddings[:, 1].max(), e_embeddings[:, 1].max()) + margin
        ymin = min(r_embeddings[:, 1].min(), e_embeddings[:, 1].min()) - margin
    elif plot_relations:
        e_embeddings, e_reduced = None, False
        r_embeddings, r_reduced = _reduce_embeddings(
            relation_embedding_getter(pipeline_result.model),
            reducer,
            fit=True,
        )

        xmax = r_embeddings[:, 0].max() + margin
        xmin = r_embeddings[:, 0].min() - margin
        ymax = r_embeddings[:, 1].max() + margin
        ymin = r_embeddings[:, 1].min() - margin
    elif plot_entities:
        e_embeddings, e_reduced = _reduce_embeddings(entity_embedding_getter(pipeline_result.model), reducer, fit=True)
        r_embeddings, r_reduced = None, False

        xmax = e_embeddings[:, 0].max() + margin
        xmin = e_embeddings[:, 0].min() - margin
        ymax = e_embeddings[:, 1].max() + margin
        ymin = e_embeddings[:, 1].min() - margin
    else:
        raise ValueError  # not even possible

    if subtitle is not None:
        pass  # a specific subtitle has been given
    elif not e_reduced and not r_reduced:
        subtitle = ""
    elif reducer_kwargs:
        _subtitle_ending = ", ".join(f"{key}={value}" for key, value in reducer_kwargs.items())
        subtitle = f" using {reducer_cls.__name__} ({_subtitle_ending})"
    else:
        subtitle = f" using {reducer_cls.__name__}"

    if plot_entities:
        entity_id_to_label = pipeline_result.training.entity_id_to_label
        for entity_id, entity_reduced_embedding in enumerate(e_embeddings):
            entity_label = entity_id_to_label[entity_id]
            if entities and entity_label not in entities:
                continue
            x, y = entity_reduced_embedding
            ax.scatter(x, y, color="black")
            ax.annotate(entity_label, (x + annotation_x_offset, y + annotation_y_offset))

    if plot_relations:
        relation_id_to_label = pipeline_result.training.relation_id_to_label
        for relation_id, relation_reduced_embedding in enumerate(r_embeddings):
            relation_label = relation_id_to_label[relation_id]
            if relations and relation_label not in relations:
                continue
            x, y = relation_reduced_embedding
            ax.arrow(0, 0, x, y, color="black")
            ax.annotate(relation_label, (x + annotation_x_offset, y + annotation_y_offset))

    if plot_entities and plot_relations:
        ax.set_title(f"Entity/Relation Plot{subtitle}")
    elif plot_entities:
        ax.set_title(f"Entity Plot{subtitle}")
    elif plot_relations:
        ax.set_title(f"Relation Plot{subtitle}")

    if apply_limits:
        ax.set_xlim([xmin, xmax])
        ax.set_ylim([ymin, ymax])

    return ax


def _ensure_ax(ax):
    if ax is not None:
        return ax

    import matplotlib.pyplot as plt

    return plt.gca()


def _reduce_embeddings(embedding: Representation, reducer, fit: bool = False):
    embeddings_numpy = embedding(indices=None).detach().cpu().numpy()
    if embeddings_numpy.shape[1] == 2:
        logger.debug("not reducing entity embeddings, already dim=2")
        return embeddings_numpy, False
    elif fit:
        return reducer.fit_transform(embeddings_numpy), True
    else:
        return reducer.transform(embeddings_numpy), True


def _get_reducer_cls(model: str, **kwargs):
    """Get the model class by name and default kwargs.

    :param model: The name of the model. Can choose from: PCA, KPCA, GRP,
        SRP, TSNE, LLE, ISOMAP, MDS, or SE.
    :param kwargs: Keyword arguments that will get passed through and modified based on the chosen model.
    :return: A pair of a reducer class from :mod:`sklearn` and the modified kwargs.

    :raises ValueError: if invalid model name is passed
    """
    # TODO: use a class-resolver?
    if model.upper() == "PCA":
        from sklearn.decomposition import PCA as Reducer  # noqa:N811
    elif model.upper() == "KPCA":
        kwargs.setdefault("kernel", "rbf")
        from sklearn.decomposition import KernelPCA as Reducer
    elif model.upper() == "GRP":
        from sklearn.random_projection import GaussianRandomProjection as Reducer
    elif model.upper() == "SRP":
        from sklearn.random_projection import SparseRandomProjection as Reducer
    elif model.upper() in {"T-SNE", "TSNE"}:
        from sklearn.manifold import TSNE as Reducer  # noqa:N811
    elif model.upper() in {"LLE", "LOCALLYLINEAREMBEDDING"}:
        from sklearn.manifold import LocallyLinearEmbedding as Reducer
    elif model.upper() == "ISOMAP":
        from sklearn.manifold import Isomap as Reducer
    elif model.upper() in {"MDS", "MULTIDIMENSIONALSCALING"}:
        from sklearn.manifold import MDS as Reducer  # noqa:N811
    elif model.upper() in {"SE", "SPECTRAL", "SPECTRALEMBEDDING"}:
        from sklearn.manifold import SpectralEmbedding as Reducer
    else:
        raise ValueError(f"invalid dimensionality reduction model: {model}")
    return Reducer, kwargs


[docs]def plot(pipeline_result, er_kwargs: Optional[Mapping[str, str]] = None, figsize=(10, 4)):
    """Plot all plots."""
    import matplotlib.pyplot as plt

    fig, axes = plt.subplots(1, 2, figsize=figsize)

    pipeline_result.plot_losses(ax=axes[0])
    pipeline_result.plot_er(ax=axes[1], **(er_kwargs or {}))

    plt.tight_layout()
    return fig, axes