Source code for pykeen.evaluation.evaluator

# -*- coding: utf-8 -*-

"""Basic structure of a evaluator."""

import gc
import logging
import timeit
from abc import ABC, abstractmethod
from contextlib import contextmanager
from math import ceil
from typing import Any, ClassVar, Collection, Iterable, List, Mapping, Optional, Tuple, Type, Union, cast

import numpy as np
import pandas
import torch
from tqdm.autonotebook import tqdm

from ..metrics.utils import Metric
from ..models import Model
from ..triples.triples_factory import restrict_triples
from ..triples.utils import get_entities, get_relations
from ..typing import LABEL_HEAD, LABEL_RELATION, LABEL_TAIL, InductiveMode, MappedTriples, Target
from ..utils import (

__all__ = [

logger = logging.getLogger(__name__)

def optional_context_manager(condition, context_manager):
    """Return an optional context manager based on the given condition."""
    if condition:
        with context_manager:
            yield context_manager

[docs]class MetricResults: """Results from computing metrics.""" metrics: ClassVar[Mapping[str, Type[Metric]]] def __init__(self, data: Mapping): """Initialize the result wrapper.""" = data def __getattr__(self, item): # noqa:D105 # TODO remove this, it makes code much harder to reason about if item not in raise AttributeError return[item]
[docs] @abstractmethod def get_metric(self, name: str) -> float: """Get the given metric from the results. :param name: The name of the metric :returns: The value for the metric """ raise NotImplementedError
[docs] def to_dict(self): """Get the results as a dictionary.""" return
[docs] def to_flat_dict(self) -> Mapping[str, Any]: """Get the results as a flattened dictionary.""" return self.to_dict()
[docs]class Evaluator(ABC): """An abstract evaluator for KGE models. The evaluator encapsulates the computation of evaluation metrics based on head and tail scores. To this end, it offers two methods to process a batch of triples together with the scores produced by some model. It maintains intermediate results in its state, and offers a method to obtain the final results once finished. """ def __init__( self, filtered: bool = False, requires_positive_mask: bool = False, batch_size: Optional[int] = None, slice_size: Optional[int] = None, automatic_memory_optimization: bool = True, mode: Optional[InductiveMode] = None, ): """Initialize the evaluator. :param filtered: Should filtered evaluation be performed? :param requires_positive_mask: Does the evaluator need access to the masks? :param batch_size: >0. Evaluation batch size. :param slice_size: >0. The divisor for the scoring function when using slicing :param automatic_memory_optimization: Whether to automatically optimize the sub-batch size during evaluation with regards to the hardware at hand. :param mode: the inductive mode, or None for transductive evaluation """ self.filtered = filtered self.requires_positive_mask = requires_positive_mask self.batch_size = batch_size self.slice_size = slice_size self.automatic_memory_optimization = automatic_memory_optimization self.mode = mode
[docs] @classmethod def get_normalized_name(cls) -> str: """Get the normalized name of the evaluator.""" return normalize_string(cls.__name__, suffix=Evaluator.__name__)
[docs] @abstractmethod def process_scores_( self, hrt_batch: MappedTriples, target: Target, scores: torch.FloatTensor, true_scores: Optional[torch.FloatTensor] = None, dense_positive_mask: Optional[torch.FloatTensor] = None, ) -> None: """Process a batch of triples with their computed scores for all entities. :param hrt_batch: shape: (batch_size, 3) :param target: the prediction target :param scores: shape: (batch_size, num_entities) :param true_scores: shape: (batch_size, 1) :param dense_positive_mask: shape: (batch_size, num_entities) An optional binary (0/1) tensor indicating other true entities. """ raise NotImplementedError
[docs] @abstractmethod def finalize(self) -> MetricResults: """Compute the final results, and clear buffers.""" raise NotImplementedError
[docs] def evaluate( self, model: Model, mapped_triples: MappedTriples, batch_size: Optional[int] = None, slice_size: Optional[int] = None, **kwargs, ) -> MetricResults: """ Run :func:`pykeen.evaluation.evaluate` with this evaluator. This method will re-use the stored optimized batch and slice size, as well as the evaluator's inductive mode. :param model: the model to evaluate. :param mapped_triples: shape: (n, 3) the ID-based evaluation triples :param batch_size: the batch size to use, or `None` to trigger automatic memory optimization :param slice_size: the slice size to use :param kwargs: the keyword-based parameters passed to :func:`pykeen.evaluation.evaluate` :return: the evaluation results """ # add mode parameter mode = kwargs.pop("mode", None) if mode is not None: logger.warning(f"Ignoring provided mode={mode}, and use the evaluator's mode={self.mode} instead") kwargs["mode"] = self.mode if batch_size is None and self.automatic_memory_optimization: # Using automatic memory optimization on CPU may result in undocumented crashes due to OS' OOM killer. if model.device.type == "cpu": "Currently automatic memory optimization only supports GPUs, but you're using a CPU. " "Therefore, the batch_size will be set to the default value.", ) else: batch_size, slice_size = self.batch_and_slice( model=model, mapped_triples=mapped_triples, batch_size=batch_size, **kwargs, ) # The batch_size and slice_size should be accessible to outside objects for re-use, e.g. early stoppers. self.batch_size = batch_size self.slice_size = slice_size # Clear the ranks from the current evaluator self.finalize() rv = evaluate( model=model, mapped_triples=mapped_triples, evaluator=self, batch_size=batch_size, slice_size=slice_size, **kwargs, ) # Since squeeze is true, we can expect that evaluate returns a MetricResult, but we need to tell MyPy that return cast(MetricResults, rv)
[docs] def batch_and_slice( self, model: Model, mapped_triples: MappedTriples, batch_size: Optional[int] = None, **kwargs, ) -> Tuple[int, Optional[int]]: """Find the maximum possible batch_size and slice_size for evaluation with the current setting. The speed of evaluation can be greatly increased when the batch_size is increased, therefore this function estimates the maximal possible batch_size for the evaluation by starting with the batch_size given as argument and increasing it until the hardware runs out-of-memory(OOM). In some cases, i.e. with very large models or very large datasets, even the batch_size 1 is too big for the hardware at hand. In these cases, this function will check if the model at hand allows slicing (this needs to be implemented for the affected scoring functions) and, if possible, will search the maximum possible slice_size that would still allow to calculate the model with the given parameters on the hardware at hand. :param model: The model to evaluate. :param mapped_triples: The triples on which to evaluate. :param batch_size: The initial batch size to start with. None defaults to number_of_triples. :param kwargs: additional keyword-based parameters passed to :func:`pykeen.evaluation.evaluate` :return: Maximum possible batch size and, if necessary, the slice_size, which defaults to None. :raises MemoryError: If it is not possible to evaluate the model on the hardware at hand with the given parameters. """ # do not display progress bar while searching kwargs["use_tqdm"] = False # start by searching for batch_size batch_size, evaluated_once = self._param_size_search( key="batch_size", start_value=batch_size, model=model, mapped_triples=mapped_triples, **kwargs, ) if evaluated_once: # slice_size = None return batch_size, None # We need to try slicing, if the evaluation for the batch_size search never succeeded # we do not need to repeat time-consuming checks kwargs["do_time_consuming_checks"] = False slice_size, evaluated_once = self._param_size_search( key="slice_size", # infer start value start_value=None, model=model, mapped_triples=mapped_triples, **kwargs, ) if not evaluated_once: raise MemoryError("The current model can't be trained on this hardware with these parameters.") return batch_size, slice_size
def _param_size_search( self, key: str, start_value: Optional[int], model: Model, mapped_triples: MappedTriples, **kwargs, ) -> Tuple[int, bool]: values_dict = {} maximum_triples = mapped_triples.shape[0] if key == "batch_size": if start_value is None: start_value = 256 if start_value > maximum_triples: start_value = maximum_triples values_dict[key] = start_value values_dict["slice_size"] = None elif key == "slice_size": if start_value is None: # Since the batch_size search with size 1, i.e. one tuple ((h, r), (h, t) or (r, t)) scored on all # entities/relations, must have failed to start slice_size search, we start with trying half the # entities/relations. targets: Collection[Target] = kwargs.get("targets", (LABEL_HEAD, LABEL_TAIL)) predict_entities = bool({LABEL_HEAD, LABEL_TAIL}.intersection(targets)) predict_relations = LABEL_RELATION in targets max_id = -1 if predict_entities: max_id = max(max_id, model.num_entities) if predict_relations: max_id = max(max_id, model.num_relations) start_value = ceil(max_id / 2) self._check_slicing_availability( model, batch_size=1, entities=predict_entities, relations=predict_relations ) values_dict[key] = start_value values_dict["batch_size"] = 1 else: raise AttributeError(f"The parameter {key} is unknown.") reached_max = False evaluated_once = False"Starting {key} search for evaluation now...") while True: logger.debug(f"Trying {key}={values_dict[key]}") try: # The cache of the previous run has to be freed to allow accurate memory availability estimates gc.collect() torch.cuda.empty_cache() evaluate( model=model, mapped_triples=mapped_triples, evaluator=self, only_size_probing=True, batch_size=values_dict.get("batch_size"), slice_size=values_dict.get("slice_size"), **kwargs, ) evaluated_once = True except RuntimeError as runtime_error: # Due to the caused OOM Runtime Error, the failed model has to be cleared to avoid memory leakage for p in model.parameters(): if p.grad is not None: del p.grad # free some memory # The cache of the previous run has to be freed to allow accurate memory availability estimates gc.collect() torch.cuda.empty_cache() if ( not is_cudnn_error(runtime_error) and not is_cuda_oom_error(runtime_error) and not is_nonzero_larger_than_maxint_error(runtime_error) ): raise runtime_error if values_dict[key] == 1: logger.debug( f"Even {key} {values_dict[key]} does not fit into your memory with these parameters.", ) break # values_dict[key] will always be an int at this point values_dict[key] //= 2 # type: ignore reached_max = True if evaluated_once:"Concluded {key} search with batch_size={values_dict[key]}.") break else: logger.debug(f"The {key} {values_dict[key]} was too big, trying less now") else: # The cache of the previous run has to be freed to allow accurate memory availability estimates gc.collect() torch.cuda.empty_cache() if not reached_max and values_dict["batch_size"] < maximum_triples: values_dict[key] *= 2 # type: ignore else:"Concluded {key} search with batch_size={values_dict[key]}.") break return cast(Tuple[int, bool], (values_dict[key], evaluated_once)) @staticmethod def _check_slicing_availability(model: Model, batch_size: int, entities: bool, relations: bool) -> None: """ Raise an error if the necessary slicing operations are not supported. :param model: the model :param batch_size: the batch-size; only used for creating the error message :param entities: whether entities need to be scored :param relations: whether relations need to be scored :raises MemoryError: if the necessary slicing operations are not supported by the model """ reasons = [] if entities: # if inverse triples are used, we only do score_t (TODO: by default; can this be changed?) if not model.can_slice_t: reasons.append("score_t") # otherwise, i.e., without inverse triples, we also need score_h if not model.use_inverse_triples and not model.can_slice_t: reasons.append("score_h") # if relations are to be predicted, we need to slice score_r if relations and not model.can_slice_r: reasons.append("score_r") # raise an error, if any of the required methods cannot slice if reasons: raise MemoryError( f"The current model can't be evaluated on this hardware with these parameters, as " f"evaluation batch_size={batch_size} is too big and slicing is not implemented for this " f"model yet (missing support for: {reasons})" )
def create_sparse_positive_filter_( hrt_batch: MappedTriples, all_pos_triples: torch.LongTensor, relation_filter: torch.BoolTensor = None, filter_col: int = 0, ) -> Tuple[torch.LongTensor, torch.BoolTensor]: """Compute indices of all positives. For simplicity, only the head-side is described, i.e. filter_col=0. The tail-side is processed alike. For each (h, r, t) triple in the batch, the entity identifiers are computed such that (h', r, t) exists in all positive triples. :param hrt_batch: shape: (batch_size, 3) A batch of triples. :param all_pos_triples: shape: (num_positive_triples, 3) All positive triples to base the filtering on. :param relation_filter: shape: (batch_size, num_positive_triples) A boolean mask R[i, j] which is True iff the j-th positive triple contains the same relation as the i-th triple in the batch. :param filter_col: The column along which to filter. Allowed are {0, 2}, where 0 corresponds to filtering head-based and 2 corresponds to filtering tail-based. :return: - positives, shape: (2, m) The indices of positives in format [(batch_index, entity_id)]. - the relation filter for re-usage. :raises NotImplementedError: if the `filter_col` is not in `{0, 2}` """ if filter_col not in {0, 2}: raise NotImplementedError( "This code has only been written for updating head (filter_col=0) or " f"tail (filter_col=2) mask, but filter_col={filter_col} was given.", ) if relation_filter is None: relations = hrt_batch[:, 1:2] relation_filter = (all_pos_triples[:, 1:2]).view(1, -1) == relations # Split batch other_col = 2 - filter_col entities = hrt_batch[:, other_col : other_col + 1] entity_filter_test = (all_pos_triples[:, other_col : other_col + 1]).view(1, -1) == entities filter_batch = (entity_filter_test & relation_filter).nonzero(as_tuple=False) filter_batch[:, 1] = all_pos_triples[:, filter_col : filter_col + 1].view(1, -1)[:, filter_batch[:, 1]] return filter_batch, relation_filter def create_dense_positive_mask_( zero_tensor: torch.FloatTensor, filter_batch: torch.LongTensor, ) -> torch.FloatTensor: """Construct dense positive mask. :param zero_tensor: shape: (batch_size, num_entities) A tensor of zeros of suitable shape. :param filter_batch: shape: (m, 2) The indices of all positives in format (batch_index, entity_id) :return: The dense positive mask with x[b, i] = 1 iff (b, i) in filter_batch. """ zero_tensor[filter_batch[:, 0], filter_batch[:, 1]] = 1 return zero_tensor def filter_scores_( scores: torch.FloatTensor, filter_batch: torch.LongTensor, ) -> torch.FloatTensor: """Filter scores by setting true scores to NaN. :param scores: shape: (batch_size, num_entities) The scores for all corrupted triples (including the currently considered true triple). Are modified *in-place*. :param filter_batch: (m, 2) The indices of all positives. :return: A reference to the scores, which have been updated in-place. """ # Bind shape batch_size, num_entities = scores.shape # Set all filtered triples to NaN to ensure their exclusion in subsequent calculations scores[filter_batch[:, 0], filter_batch[:, 1]] = float("nan") # Warn if all entities will be filtered # (scores != scores) yields true for all NaN instances (IEEE 754), thus allowing to count the filtered triples. if ((scores != scores).sum(dim=1) == num_entities).any(): logger.warning( "User selected filtered metric computation, but all corrupted triples exists also as positive " "triples", ) return scores # TODO: consider switching to torch.DataLoader where the preparation of masks/filter batches also takes place
[docs]def evaluate( model: Model, mapped_triples: MappedTriples, evaluator: Evaluator, only_size_probing: bool = False, batch_size: Optional[int] = None, slice_size: Optional[int] = None, device: Optional[torch.device] = None, use_tqdm: bool = True, tqdm_kwargs: Optional[Mapping[str, str]] = None, restrict_entities_to: Optional[Collection[int]] = None, restrict_relations_to: Optional[Collection[int]] = None, do_time_consuming_checks: bool = True, additional_filter_triples: Union[None, MappedTriples, List[MappedTriples]] = None, pre_filtered_triples: bool = True, targets: Collection[Target] = (LABEL_HEAD, LABEL_TAIL), *, mode: Optional[InductiveMode], ) -> MetricResults: """Evaluate metrics for model on mapped triples. The model is used to predict scores for all tails and all heads for each triple. Subsequently, each abstract evaluator is applied to the scores, also receiving the batch itself (e.g. to compute entity-specific metrics). Thereby, the (potentially) expensive score computation against all entities is done only once. The metric evaluators are expected to maintain their own internal buffers. They are returned after running the evaluation, and should offer a possibility to extract some final metrics. :param model: The model to evaluate. :param mapped_triples: The triples on which to evaluate. The mapped triples should never contain inverse triples - these are created by the model class on the fly. :param evaluator: The evaluator. :param only_size_probing: The evaluation is only performed for two batches to test the memory footprint, especially on GPUs. :param batch_size: >0 A positive integer used as batch size. Generally chosen as large as possible. Defaults to 1 if None. :param slice_size: >0 The divisor for the scoring function when using slicing. :param device: The device on which the evaluation shall be run. If None is given, use the model's device. :param use_tqdm: Should a progress bar be displayed? :param tqdm_kwargs: Additional keyword based arguments passed to the progress bar. :param restrict_entities_to: Optionally restrict the evaluation to the given entity IDs. This may be useful if one is only interested in a part of the entities, e.g. due to type constraints, but wants to train on all available data. For ranking the entities, we still compute all scores for all possible replacement entities to avoid irregular access patterns which might decrease performance, but the scores will afterwards be filtered to only keep those of interest. If provided, we assume by default that the triples are already filtered, such that it only contains the entities of interest. To explicitly filter within this method, pass `pre_filtered_triples=False`. :param restrict_relations_to: Optionally restrict the evaluation to the given relation IDs. This may be useful if one is only interested in a part of the relations, e.g. due to relation types, but wants to train on all available data. If provided, we assume by default that the triples are already filtered, such that it only contains the relations of interest. To explicitly filter within this method, pass `pre_filtered_triples=False`. :param do_time_consuming_checks: Whether to perform some time consuming checks on the provided arguments. Currently, this encompasses: - If restrict_entities_to or restrict_relations_to is not None, check whether the triples have been filtered. Disabling this option can accelerate the method. Only effective if pre_filtered_triples is set to True. :param pre_filtered_triples: Whether the triples have been pre-filtered to adhere to restrict_entities_to / restrict_relations_to. When set to True, and the triples have *not* been filtered, the results may be invalid. Pre-filtering the triples accelerates this method, and is recommended when evaluating multiple times on the same set of triples. :param additional_filter_triples: additional true triples to filter out during filtered evaluation. :param targets: the prediction targets :param mode: the inductive mode, or None for transductive evaluation :raises NotImplementedError: if relation prediction evaluation is requested :raises ValueError: if the pre_filtered_triples contain unwanted entities (can only be detected with the time-consuming checks). :return: the evaluation results """ if LABEL_RELATION in targets: raise NotImplementedError("cf.") start = timeit.default_timer() # verify that the triples have been filtered if pre_filtered_triples and do_time_consuming_checks: if restrict_entities_to is not None: present_entity_ids = get_entities(triples=mapped_triples) unwanted = present_entity_ids.difference(restrict_entities_to) if len(unwanted) > 0: raise ValueError( f"mapped_triples contains IDs of entities which are not contained in restrict_entities_to:" f"{unwanted}. This will invalidate the evaluation results.", ) if restrict_relations_to is not None: present_relation_ids = get_relations(triples=mapped_triples) unwanted = present_relation_ids.difference(restrict_relations_to) if len(unwanted): raise ValueError( f"mapped_triples contains IDs of relations which are not contained in restrict_relations_to:" f"{unwanted}. This will invalidate the evaluation results.", ) # Filter triples if necessary if not pre_filtered_triples and (restrict_entities_to is not None or restrict_relations_to is not None): old_num_triples = mapped_triples.shape[0] mapped_triples = restrict_triples( mapped_triples=mapped_triples, entities=restrict_entities_to, relations=restrict_relations_to, )"keeping {format_relative_comparison(mapped_triples.shape[0], old_num_triples)} triples.") # Send to device if device is not None: model = device = model.device # Ensure evaluation mode model.eval() # Prepare for result filtering if evaluator.filtered or evaluator.requires_positive_mask: all_pos_triples = prepare_filter_triples( mapped_triples=mapped_triples, additional_filter_triples=additional_filter_triples, ).to(device=device) else: all_pos_triples = None # Send tensors to device mapped_triples = # Prepare batches if batch_size is None: # This should be a reasonable default size that works on most setups while being faster than batch_size=1 batch_size = 32"No evaluation batch_size provided. Setting batch_size to '{batch_size}'.") batches = cast(Iterable[np.ndarray], split_list_in_batches_iter(input_list=mapped_triples, batch_size=batch_size)) # Show progressbar num_triples = mapped_triples.shape[0] # Flag to check when to quit the size probing evaluated_once = False # Disable gradient tracking _tqdm_kwargs = dict( desc=f"Evaluating on {model.device}", total=num_triples, unit="triple", unit_scale=True, # Choosing no progress bar (use_tqdm=False) would still show the initial progress bar without disable=True disable=not use_tqdm, ) if tqdm_kwargs: _tqdm_kwargs.update(tqdm_kwargs) with optional_context_manager(use_tqdm, tqdm(**_tqdm_kwargs)) as progress_bar, torch.inference_mode(): # batch-wise processing for batch in batches: batch_size = batch.shape[0] relation_filter = None for target in targets: relation_filter = _evaluate_batch( batch=batch, model=model, target=target, evaluator=evaluator, slice_size=slice_size, all_pos_triples=all_pos_triples, relation_filter=relation_filter, restrict_entities_to=restrict_entities_to, mode=mode, ) # If we only probe sizes we do not need more than one batch if only_size_probing and evaluated_once: break evaluated_once = True if use_tqdm: progress_bar.update(batch_size) # Finalize result = evaluator.finalize() stop = timeit.default_timer() if only_size_probing: logger.debug("Evaluation took %.2fs seconds", stop - start) else:"Evaluation took %.2fs seconds", stop - start) return result
def _evaluate_batch( batch: MappedTriples, model: Model, target: Target, evaluator: Evaluator, slice_size: Optional[int], all_pos_triples: Optional[MappedTriples], relation_filter: Optional[torch.BoolTensor], restrict_entities_to: Optional[torch.LongTensor], *, mode: Optional[InductiveMode], ) -> torch.BoolTensor: """ Evaluate ranking for batch. :param batch: shape: (batch_size, 3) The batch of currently evaluated triples. :param model: The model to evaluate. :param target: The prediction target. :param evaluator: The evaluator :param slice_size: An optional slice size for computing the scores. :param all_pos_triples: All positive triples (required if filtering is necessary). :param relation_filter: The relation filter. Can be re-used. :param restrict_entities_to: Restriction to evaluate only for these entities. :param mode: the inductive mode, or None for transductive evaluation :raises ValueError: if all positive triples are required (either due to filtered evaluation, or requiring dense masks). :return: The relation filter, which can be re-used for the same batch. """ scores = model.predict(hrt_batch=batch, target=target, slice_size=slice_size, mode=mode) if evaluator.filtered or evaluator.requires_positive_mask: column = TARGET_TO_INDEX[target] if all_pos_triples is None: raise ValueError( "If filtering_necessary of positive_masks_required is True, all_pos_triples has to be " "provided, but is None." ) # Create filter positive_filter, relation_filter = create_sparse_positive_filter_( hrt_batch=batch, all_pos_triples=all_pos_triples, relation_filter=relation_filter, filter_col=column, ) else: positive_filter = relation_filter = None if evaluator.filtered: assert positive_filter is not None # Select scores of true true_scores = scores[torch.arange(0, batch.shape[0]), batch[:, column]] # overwrite filtered scores scores = filter_scores_(scores=scores, filter_batch=positive_filter) # The scores for the true triples have to be rewritten to the scores tensor scores[torch.arange(0, batch.shape[0]), batch[:, column]] = true_scores # the rank-based evaluators needs the true scores with trailing 1-dim true_scores = true_scores.unsqueeze(dim=-1) else: true_scores = None # Create a positive mask with the size of the scores from the positive filter if evaluator.requires_positive_mask: assert positive_filter is not None positive_mask = create_dense_positive_mask_(zero_tensor=torch.zeros_like(scores), filter_batch=positive_filter) else: positive_mask = None # Restrict to entities of interest if restrict_entities_to is not None: scores = scores[:, restrict_entities_to] if positive_mask is not None: positive_mask = positive_mask[:, restrict_entities_to] # process scores evaluator.process_scores_( hrt_batch=batch, target=target, true_scores=true_scores, scores=scores, dense_positive_mask=positive_mask, ) return relation_filter def get_candidate_set_size( mapped_triples: MappedTriples, restrict_entities_to: Optional[Collection[int]] = None, restrict_relations_to: Optional[Collection[int]] = None, additional_filter_triples: Union[None, MappedTriples, List[MappedTriples]] = None, num_entities: Optional[int] = None, ) -> pandas.DataFrame: """ Calculate the candidate set sizes for head/tail prediction for the given triples. :param mapped_triples: shape: (n, 3) the evaluation triples :param restrict_entities_to: The entity IDs of interest. If None, defaults to all entities. cf. :func:`restrict_triples`. :param restrict_relations_to: The relations IDs of interest. If None, defaults to all relations. cf. :func:`restrict_triples`. :param additional_filter_triples: shape: (n, 3) additional filter triples besides the evaluation triples themselves. cf. `_prepare_filter_triples`. :param num_entities: the number of entities. If not given, this number is inferred from all triples :return: columns: "index" | "head" | "relation" | "tail" | "head_candidates" | "tail_candidates" a dataframe of all evaluation triples, with the number of head and tail candidates """ # optionally restrict triples (nop if no restriction) mapped_triples = restrict_triples( mapped_triples=mapped_triples, entities=restrict_entities_to, relations=restrict_relations_to, ) # evaluation triples as dataframe df_eval = pandas.DataFrame( data=mapped_triples.numpy(), columns=COLUMN_LABELS, ).reset_index() # determine filter triples filter_triples = prepare_filter_triples( mapped_triples=mapped_triples, additional_filter_triples=additional_filter_triples, ) # infer num_entities if not given if restrict_entities_to: num_entities = len(restrict_entities_to) else: # TODO: unique, or max ID + 1? num_entities = num_entities or filter_triples[:, [0, 2]].view(-1).unique().numel() # optionally restrict triples filter_triples = restrict_triples( mapped_triples=filter_triples, entities=restrict_entities_to, relations=restrict_relations_to, ) df_filter = pandas.DataFrame( data=filter_triples.numpy(), columns=COLUMN_LABELS, ) # compute candidate set sizes for different targets # TODO: extend to relations? for target in [LABEL_HEAD, LABEL_TAIL]: total = num_entities group_keys = TARGET_TO_KEY_LABELS[target] df_count = df_filter.groupby(by=group_keys).agg({target: "count"}) column = f"{target}_candidates" df_count[column] = total - df_count[target] df_count = df_count.drop(columns=target) df_eval = pandas.merge(df_eval, df_count, on=group_keys, how="left") df_eval[column] = df_eval[column].fillna(value=total) return df_eval