Source code for pykeen.datasets.db100k

# -*- coding: utf-8 -*-

"""The DB100K dataset."""

from docdata import parse_docdata

from .base import UnpackedRemoteDataset

BASE_URL = 'https://raw.githubusercontent.com/iieir-km/ComplEx-NNE_AER/master/datasets/DB100K'

__all__ = [
    'DB100K',
]


[docs]@parse_docdata class DB100K(UnpackedRemoteDataset): """The DB100K dataset from [ding2018]_. --- name: DB100K citation: author: Ding year: 2018 link: https://arxiv.org/abs/1805.02408 github: iieir-km/ComplEx-NNE_AER statistics: entities: 99604 relations: 470 training: 597482 testing: 50000 validation: 49997 triples: 697479 """ def __init__(self, create_inverse_triples: bool = False, **kwargs): """Initialize the DB100K small dataset. :param create_inverse_triples: Should inverse triples be created? Defaults to false. :param kwargs: keyword arguments passed to :class:`pykeen.datasets.base.UnpackedRemoteDataset`. """ # GitHub's raw.githubusercontent.com service rejects requests that are streamable. This is # normally the default for all of PyKEEN's remote datasets, so just switch the default here. kwargs.setdefault('stream', False) super().__init__( training_url=f'{BASE_URL}/_train.txt', testing_url=f'{BASE_URL}/_test.txt', validation_url=f'{BASE_URL}/_valid.txt', create_inverse_triples=create_inverse_triples, **kwargs, )
if __name__ == '__main__': DB100K.cli()