Source code for pykeen.datasets.cskg

# -*- coding: utf-8 -*-

"""The `Common Sense Knowledge Graph <https://github.com/usc-isi-i2/cskg>`_ dataset.

- GitHub Repository: https://github.com/usc-isi-i2/cskg
- Paper: https://arxiv.org/pdf/2012.11490.pdf
- Data download: https://zenodo.org/record/4331372/files/cskg.tsv.gz
"""

import logging

from .base import SingleTabbedDataset
from ..typing import TorchRandomHint

__all__ = [
    'CSKG',
]

URL = 'https://zenodo.org/record/4331372/files/cskg.tsv.gz'


[docs]class CSKG(SingleTabbedDataset): """The CSKG dataset. The CSKG combines several knowledge graphs with "common sense" knowledge. It contains 2,087,833 entities, 58 relations, and 5,748,411 triples. .. [ilievski2020] Ilievski, F., Szekely, P., & Zhang, B. (2020). `CSKG: The CommonSense Knowledge Graph <http://arxiv.org/abs/2012.11490>`_. *arxiv*, 2012.11490. """ def __init__(self, create_inverse_triples: bool = False, random_state: TorchRandomHint = 0, **kwargs): """Initialize the `CSKG <https://github.com/usc-isi-i2/cskg>`_ dataset from [ilievski2020]_. :param create_inverse_triples: Should inverse triples be created? Defaults to false. :param random_state: The random seed to use in splitting the dataset. Defaults to 0. :param kwargs: keyword arguments passed to :class:`pykeen.datasets.base.SingleTabbedDataset`. """ super().__init__( url=URL, create_inverse_triples=create_inverse_triples, random_state=random_state, read_csv_kwargs=dict( usecols=['node1', 'relation', 'node2'], ), **kwargs, )
def _main(): ds = CSKG(eager=True) ds.summarize() if __name__ == '__main__': logging.basicConfig(level=logging.INFO) _main()