Source code for pykeen.models.unimodal.ermlpe

# -*- coding: utf-8 -*-

"""An implementation of the extension to ERMLP."""

from typing import Any, ClassVar, Mapping, Type

import torch
from torch import nn
from torch.nn.init import uniform_

from ..base import EntityRelationEmbeddingModel
from ...constants import DEFAULT_DROPOUT_HPO_RANGE, DEFAULT_EMBEDDING_HPO_EMBEDDING_DIM_RANGE
from ...losses import BCEAfterSigmoidLoss, Loss
from ...nn.emb import EmbeddingSpecification
from ...typing import Hint, Initializer

__all__ = [
    "ERMLPE",
]


[docs]class ERMLPE(EntityRelationEmbeddingModel): r"""An extension of :class:`pykeen.models.ERMLP` proposed by [sharifzadeh2019]_. This model uses a neural network-based approach similar to ER-MLP and with slight modifications. In ER-MLP, the model is: .. math:: f(h, r, t) = \textbf{w}^{T} g(\textbf{W} [\textbf{h}; \textbf{r}; \textbf{t}]) whereas in ER-MPL (E) the model is: .. math:: f(h, r, t) = \textbf{t}^{T} f(\textbf{W} (g(\textbf{W} [\textbf{h}; \textbf{r}])) including dropouts and batch-norms between each two hidden layers. ConvE can be seen as a special case of ER-MLP (E )that contains the unnecessary inductive bias of convolutional filters. The aim of this model is to show that lifting this bias from :class:`pykeen.models.ConvE` (which simply leaves us with a modified ER-MLP model), not only reduces the number of parameters but also improves performance. --- name: ER-MLP (E) citation: author: Sharifzadeh year: 2019 link: https://github.com/pykeen/pykeen github: pykeen/pykeen """ #: The default strategy for optimizing the model's hyper-parameters hpo_default: ClassVar[Mapping[str, Any]] = dict( embedding_dim=DEFAULT_EMBEDDING_HPO_EMBEDDING_DIM_RANGE, hidden_dim=dict(type=int, low=5, high=9, scale="power_two"), input_dropout=DEFAULT_DROPOUT_HPO_RANGE, hidden_dropout=DEFAULT_DROPOUT_HPO_RANGE, ) #: The default loss function class loss_default: ClassVar[Type[Loss]] = BCEAfterSigmoidLoss #: The default parameters for the default loss function class loss_default_kwargs: ClassVar[Mapping[str, Any]] = {} def __init__( self, *, hidden_dim: int = 300, input_dropout: float = 0.2, hidden_dropout: float = 0.3, embedding_dim: int = 200, entity_initializer: Hint[Initializer] = uniform_, relation_initializer: Hint[Initializer] = uniform_, **kwargs, ) -> None: super().__init__( entity_representations=EmbeddingSpecification( embedding_dim=embedding_dim, initializer=entity_initializer, ), relation_representations=EmbeddingSpecification( embedding_dim=embedding_dim, initializer=relation_initializer, ), **kwargs, ) self.hidden_dim = hidden_dim self.linear1 = nn.Linear(2 * self.embedding_dim, self.hidden_dim) self.linear2 = nn.Linear(self.hidden_dim, self.embedding_dim) self.input_dropout = nn.Dropout(input_dropout) self.bn1 = nn.BatchNorm1d(self.hidden_dim) self.bn2 = nn.BatchNorm1d(self.embedding_dim) self.mlp = nn.Sequential( self.linear1, nn.Dropout(hidden_dropout), self.bn1, nn.ReLU(), self.linear2, nn.Dropout(hidden_dropout), self.bn2, nn.ReLU(), ) def _reset_parameters_(self): # noqa: D102 super()._reset_parameters_() for module in [ self.linear1, self.linear2, self.bn1, self.bn2, ]: module.reset_parameters()
[docs] def score_hrt(self, hrt_batch: torch.LongTensor) -> torch.FloatTensor: # noqa: D102 # Get embeddings h = self.entity_embeddings(indices=hrt_batch[:, 0]).view(-1, self.embedding_dim) r = self.relation_embeddings(indices=hrt_batch[:, 1]).view(-1, self.embedding_dim) t = self.entity_embeddings(indices=hrt_batch[:, 2]) # Embedding Regularization self.regularize_if_necessary(h, r, t) # Concatenate them x_s = torch.cat([h, r], dim=-1) x_s = self.input_dropout(x_s) # Predict t embedding x_t = self.mlp(x_s) # compare with all t's # For efficient calculation, each of the calculated [h, r] rows has only to be multiplied with one t row x = (x_t.view(-1, self.embedding_dim) * t).sum(dim=1, keepdim=True) # The application of the sigmoid during training is automatically handled by the default loss. return x
[docs] def score_t(self, hr_batch: torch.LongTensor) -> torch.FloatTensor: # noqa: D102 h = self.entity_embeddings(indices=hr_batch[:, 0]).view(-1, self.embedding_dim) r = self.relation_embeddings(indices=hr_batch[:, 1]).view(-1, self.embedding_dim) t = self.entity_embeddings(indices=None).transpose(1, 0) # Embedding Regularization self.regularize_if_necessary(h, r, t) # Concatenate them x_s = torch.cat([h, r], dim=-1) x_s = self.input_dropout(x_s) # Predict t embedding x_t = self.mlp(x_s) x = x_t @ t # The application of the sigmoid during training is automatically handled by the default loss. return x
[docs] def score_h(self, rt_batch: torch.LongTensor) -> torch.FloatTensor: # noqa: D102 h = self.entity_embeddings(indices=None) r = self.relation_embeddings(indices=rt_batch[:, 0]).view(-1, self.embedding_dim) t = self.entity_embeddings(indices=rt_batch[:, 1]).view(-1, self.embedding_dim) # Embedding Regularization self.regularize_if_necessary(h, r, t) rt_batch_size = t.shape[0] # Extend each rt_batch of "r" with shape [rt_batch_size, dim] to [rt_batch_size, dim * num_entities] r = torch.repeat_interleave(r, self.num_entities, dim=0) # Extend each h with shape [num_entities, dim] to [rt_batch_size * num_entities, dim] # h = torch.repeat_interleave(h, rt_batch_size, dim=0) h = h.repeat(rt_batch_size, 1) # Extend t t = t.repeat_interleave(self.num_entities, dim=0) # Concatenate them x_s = torch.cat([h, r], dim=-1) x_s = self.input_dropout(x_s) # Predict t embedding x_t = self.mlp(x_s) # For efficient calculation, each of the calculated [h, r] rows has only to be multiplied with one t row x = (x_t.view(-1, self.embedding_dim) * t).sum(dim=1, keepdim=True) # The results have to be realigned with the expected output of the score_h function x = x.view(rt_batch_size, self.num_entities) # The application of the sigmoid during training is automatically handled by the default loss. return x