"""The Wikidata5m dataset from [wang2019]_.

Wikidata5m is a million-scale knowledge graph dataset with aligned corpus.
This dataset integrates the Wikidata knowledge graph and Wikipedia pages.
Each entity in Wikidata5m is described by a corresponding Wikipedia page,
which enables the evaluation of link prediction over unseen entities.

[docs]@parse_docdata class Wikidata5M(TarFileRemoteDataset): """The Wikidata5M dataset from [wang2019]_. --- name: Wikidata5M statistics: entities: 4594149 relations: 822 training: 20614279 testing: 4977 validation: 4983 triples: 20624239 citation: author: Wang year: 2019 arxiv: 1911.06136 link: """ def __init__(self, create_inverse_triples: bool = False, **kwargs): """Initialize the Wikidata5M dataset. :param create_inverse_triples: Should inverse triples be created? Defaults to false. :param kwargs: keyword arguments passed to :class:`pykeen.datasets.base.TarFileRemoteDataset`. """ super().__init__( url=TRANSDUCTIVE_URL, relative_training_path=pathlib.PurePath("wikidata5m_transductive_train.txt"), relative_testing_path=pathlib.PurePath("wikidata5m_transductive_test.txt"), relative_validation_path=pathlib.PurePath("wikidata5m_transductive_valid.txt"), create_inverse_triples=create_inverse_triples, **kwargs, )
if __name__ == "__main__": Wikidata5M.cli()