Source code for pykeen.datasets.inductive.ilp_teru

# -*- coding: utf-8 -*-

"""The inductive link prediction datasets from [teru2020]_.

- GitHub Repository: https://github.com/kkteru/grail
- Paper: https://arxiv.org/abs/1911.06962
"""

import click
from docdata import parse_docdata
from more_click import verbose_option

from .base import UnpackedRemoteDisjointInductiveDataset

__all__ = [
    "InductiveFB15k237",
    "InductiveWN18RR",
    "InductiveNELL",
]

BASE_URL = "https://raw.githubusercontent.com/kkteru/grail/master/data"

FB_TRAIN_URL = "{base_url}/fb237_{version}/train.txt"
FB_INDUCTIVE_INFERENCE_URL = "{base_url}/fb237_{version}_ind/train.txt"
FB_INDUCTIVE_VALIDATION_URL = "{base_url}/fb237_{version}_ind/valid.txt"
FB_INDUCTIVE_TEST_URL = "{base_url}/fb237_{version}_ind/test.txt"

WN_TRAIN_URL = "{base_url}/WN18RR_{version}/train.txt"
WN_INDUCTIVE_INFERENCE_URL = "{base_url}/WN18RR_{version}_ind/train.txt"
WN_INDUCTIVE_VALIDATION_URL = "{base_url}/WN18RR_{version}_ind/valid.txt"
WN_INDUCTIVE_TEST_URL = "{base_url}/WN18RR_{version}_ind/test.txt"

NELL_TRAIN_URL = "{base_url}/nell_{version}/train.txt"
NELL_INDUCTIVE_INFERENCE_URL = "{base_url}/nell_{version}_ind/train.txt"
NELL_INDUCTIVE_VALIDATION_URL = "{base_url}/nell_{version}_ind/valid.txt"
NELL_INDUCTIVE_TEST_URL = "{base_url}/nell_{version}_ind/test.txt"


# If GitHub ever gets upset from too many downloads, we can switch to
# the data posted at https://github.com/pykeen/pykeen/pull/154#issuecomment-730462039


[docs]@parse_docdata class InductiveFB15k237(UnpackedRemoteDisjointInductiveDataset): """The inductive FB15k-237 dataset in 4 versions. --- name: FB15k-237 citation: author: Teru year: 2020 link: https://arxiv.org/abs/1911.06962 github: kkteru/grail V1: transductive train entities: 1594 relations: 180 transductive train triples: 4245 inductive inference entities: 1093 inductive inference relations: 180 inductive inference triples: 1993 inductive validation triples: 206 inductive test triples: 205 V2: transductive train entities: 2608 relations: 200 transductive train triples: 9739 inductive inference entities: 1660 inductive inference relations: 200 inductive inference triples: 4145 inductive validation triples: 469 inductive test triples: 478 V3: transductive train entities: 3668 relations: 215 transductive train triples: 17986 inductive inference entities: 2501 inductive inference relations: 215 inductive inference triples: 7406 inductive validation triples: 866 inductive test triples: 865 V4: transductive train entities: 4707 relations: 219 transductive train triples: 27203 inductive inference entities: 3051 inductive inference relations: 219 inductive inference triples: 11714 inductive validation triples: 1416 inductive test triples: 1424 """ def __init__(self, version: str = "v1", **kwargs): """Initialize a particular version of a dataset (out of 4) from [teru2020]_. :param version: v1 / v2 / v3 / v4 , differ in the sizes of train and inductive inference graphs :param kwargs: keyword arguments passed to :class:`pykeen.datasets.base.UnpackedRemoteDataset`. """ super().__init__( transductive_training_url=FB_TRAIN_URL.format(base_url=BASE_URL, version=version), inductive_inference_url=FB_INDUCTIVE_INFERENCE_URL.format(base_url=BASE_URL, version=version), inductive_validation_url=FB_INDUCTIVE_VALIDATION_URL.format(base_url=BASE_URL, version=version), inductive_testing_url=FB_INDUCTIVE_TEST_URL.format(base_url=BASE_URL, version=version), version=version, eager=True, **kwargs, )
[docs]@parse_docdata class InductiveWN18RR(UnpackedRemoteDisjointInductiveDataset): """The inductive WN18RR dataset in 4 versions. --- name: WordNet-18 (RR) citation: author: Teru year: 2020 link: https://arxiv.org/abs/1911.06962 github: kkteru/grail V1: transductive train entities: 2746 relations: 9 transductive train triples: 5410 inductive inference entities: 922 inductive inference relations: 9 inductive inference triples: 1618 inductive validation triples: 185 inductive test triples: 188 V2: transductive train entities: 6954 relations: 10 transductive train triples: 15262 inductive inference entities: 2757 inductive inference relations: 10 inductive inference triples: 4011 inductive validation triples: 411 inductive test triples: 411 V3: transductive train entities: 12078 relations: 11 transductive train triples: 25901 inductive inference entities: 5084 inductive inference relations: 11 inductive inference triples: 6327 inductive validation triples: 538 inductive test triples: 605 V4: transductive train entities: 3861 relations: 9 transductive train triples: 7940 inductive inference entities: 7084 inductive inference relations: 9 inductive inference triples: 12334 inductive validation triples: 1394 inductive test triples: 1429 """ def __init__(self, version: str = "v1", **kwargs): """Initialize a particular version of a dataset (out of 4) from [teru2020]_. :param version: v1 / v2 / v3 / v4 , differ in the sizes of train and inductive inference graphs :param kwargs: keyword arguments passed to :class:`pykeen.datasets.base.UnpackedRemoteDataset`. """ super().__init__( transductive_training_url=WN_TRAIN_URL.format(base_url=BASE_URL, version=version), inductive_inference_url=WN_INDUCTIVE_INFERENCE_URL.format(base_url=BASE_URL, version=version), inductive_validation_url=WN_INDUCTIVE_VALIDATION_URL.format(base_url=BASE_URL, version=version), inductive_testing_url=WN_INDUCTIVE_TEST_URL.format(base_url=BASE_URL, version=version), version=version, eager=True, **kwargs, )
[docs]@parse_docdata class InductiveNELL(UnpackedRemoteDisjointInductiveDataset): """The inductive NELL dataset in 4 versions. --- name: NELL citation: author: Teru year: 2020 link: https://arxiv.org/abs/1911.06962 github: kkteru/grail V1: transductive train entities: 3103 relations: 14 transductive train triples: 4687 inductive inference entities: 225 inductive inference relations: 14 inductive inference triples: 833 inductive validation triples: 101 inductive test triples: 100 V2: transductive train entities: 2564 relations: 88 transductive train triples: 8219 inductive inference entities: 2086 inductive inference relations: 88 inductive inference triples: 4586 inductive validation triples: 459 inductive test triples: 476 V3: transductive train entities: 4647 relations: 142 transductive train triples: 16393 inductive inference entities: 3566 inductive inference relations: 142 inductive inference triples: 8048 inductive validation triples: 811 inductive test triples: 809 V4: transductive train entities: 2092 relations: 76 transductive train triples: 7546 inductive inference entities: 2795 inductive inference relations: 76 inductive inference triples: 7073 inductive validation triples: 716 inductive test triples: 731 """ def __init__(self, version: str = "v1", **kwargs): """Initialize a particular version of a dataset (out of 4) from [teru2020]_. :param version: v1 / v2 / v3 / v4 , differ in the sizes of train and inductive inference graphs :param kwargs: keyword arguments passed to :class:`pykeen.datasets.base.UnpackedRemoteDataset`. """ super().__init__( transductive_training_url=NELL_TRAIN_URL.format(base_url=BASE_URL, version=version), inductive_inference_url=NELL_INDUCTIVE_INFERENCE_URL.format(base_url=BASE_URL, version=version), inductive_validation_url=NELL_INDUCTIVE_VALIDATION_URL.format(base_url=BASE_URL, version=version), inductive_testing_url=NELL_INDUCTIVE_TEST_URL.format(base_url=BASE_URL, version=version), version=version, eager=True, **kwargs, )
@click.command() @verbose_option def _main(): for cls in [InductiveFB15k237, InductiveWN18RR, InductiveNELL]: click.secho(f"Loading {cls.__name__}", fg="green", bold=True) d = cls() d.summarize() if __name__ == "__main__": _main()