Source code for pykeen.datasets.umls

# -*- coding: utf-8 -*-

"""Get triples from the UMLS dataset."""

import os

from docdata import parse_docdata

from ..base import PathDataset

__all__ = [
    'UMLS_TRAIN_PATH',
    'UMLS_TEST_PATH',
    'UMLS_VALIDATE_PATH',
    'UMLS',
]

HERE = os.path.abspath(os.path.dirname(__file__))

UMLS_TRAIN_PATH = os.path.join(HERE, 'train.txt')
UMLS_TEST_PATH = os.path.join(HERE, 'test.txt')
UMLS_VALIDATE_PATH = os.path.join(HERE, 'valid.txt')


[docs]@parse_docdata class UMLS(PathDataset): """The UMLS dataset. --- name: Unified Medical Language System statistics: entities: 135 relations: 46 training: 5216 testing: 661 validation: 652 triples: 6529 citation: author: Zhenfeng Lei year: 2017 github: ZhenfengLei/KGDatasets """ def __init__(self, create_inverse_triples: bool = False, **kwargs): """Initialize the UMLS dataset. :param create_inverse_triples: Should inverse triples be created? Defaults to false. :param kwargs: keyword arguments passed to :class:`pykeen.datasets.base.PathDataset`. """ super().__init__( training_path=UMLS_TRAIN_PATH, testing_path=UMLS_TEST_PATH, validation_path=UMLS_VALIDATE_PATH, create_inverse_triples=create_inverse_triples, **kwargs, )
if __name__ == '__main__': UMLS().summarize()