|
56 | 56 | rename,
|
57 | 57 | )
|
58 | 58 | from .fingerprint import Hasher
|
59 |
| -from .info import DatasetInfo, DatasetInfosDict, PostProcessedInfo |
| 59 | +from .info import DatasetInfo, PostProcessedInfo |
60 | 60 | from .iterable_dataset import ArrowExamplesIterable, ExamplesIterable, IterableDataset
|
61 | 61 | from .keyhash import DuplicatedKeysError
|
62 | 62 | from .naming import INVALID_WINDOWS_CHARACTERS_IN_PATH, camelcase_to_snakecase
|
@@ -349,9 +349,7 @@ def __init__(
|
349 | 349 | # prepare info: DatasetInfo are a standardized dataclass across all datasets
|
350 | 350 | # Prefill datasetinfo
|
351 | 351 | if info is None:
|
352 |
| - # TODO FOR PACKAGED MODULES IT IMPORTS DATA FROM src/packaged_modules which doesn't make sense |
353 |
| - info = self.get_exported_dataset_info() |
354 |
| - info.update(self._info()) |
| 352 | + info = self._info() |
355 | 353 | info.builder_name = self.name
|
356 | 354 | info.dataset_name = self.dataset_name
|
357 | 355 | info.config_name = self.config.name
|
@@ -391,7 +389,7 @@ def __init__(
|
391 | 389 | if os.path.exists(self._cache_dir): # check if data exist
|
392 | 390 | if len(os.listdir(self._cache_dir)) > 0:
|
393 | 391 | if os.path.exists(os.path.join(self._cache_dir, config.DATASET_INFO_FILENAME)):
|
394 |
| - logger.info("Overwrite dataset info from restored data version if exists.") |
| 392 | + logger.debug("Overwrite dataset info from restored data version if exists.") |
395 | 393 | self.info = DatasetInfo.from_directory(self._cache_dir)
|
396 | 394 | else: # dir exists but no data, remove the empty dir as data aren't available anymore
|
397 | 395 | logger.warning(
|
@@ -503,35 +501,6 @@ def update_hash_with_config_parameters(hash: str, config_parameters: dict) -> st
|
503 | 501 | if os.path.isdir(legacy_cache_dir):
|
504 | 502 | return legacy_relative_data_dir
|
505 | 503 |
|
506 |
| - @classmethod |
507 |
| - def get_all_exported_dataset_infos(cls) -> DatasetInfosDict: |
508 |
| - """Empty dict if doesn't exist |
509 |
| -
|
510 |
| - Example: |
511 |
| -
|
512 |
| - ```py |
513 |
| - >>> from datasets import load_dataset_builder |
514 |
| - >>> ds_builder = load_dataset_builder('vivos') |
515 |
| - >>> ds_builder.get_all_exported_dataset_infos() |
516 |
| - {'default': DatasetInfo(description='', citation='', homepage='', license='', features={'speaker_id': Value('string'), 'path': Value('string'), 'audio': Audio(sampling_rate=16000, mono=True, decode=True, id=None), 'sentence': Value('string')}, post_processed=None, supervised_keys=None, builder_name=None, dataset_name=None, config_name='default', version=None, splits={'train': SplitInfo(name='train', num_bytes=1722002133, num_examples=11660, shard_lengths=None, dataset_name=None), 'test': SplitInfo(name='test', num_bytes=86120227, num_examples=760, shard_lengths=None, dataset_name=None)}, download_checksums=None, download_size=1475540500, post_processing_size=None, dataset_size=1808122360, size_in_bytes=None)} |
517 |
| - ``` |
518 |
| - """ |
519 |
| - return DatasetInfosDict.from_directory(cls.get_imported_module_dir()) |
520 |
| - |
521 |
| - def get_exported_dataset_info(self) -> DatasetInfo: |
522 |
| - """Empty `DatasetInfo` if doesn't exist |
523 |
| -
|
524 |
| - Example: |
525 |
| -
|
526 |
| - ```py |
527 |
| - >>> from datasets import load_dataset_builder |
528 |
| - >>> ds_builder = load_dataset_builder('cornell-movie-review-data/rotten_tomatoes') |
529 |
| - >>> ds_builder.get_exported_dataset_info() |
530 |
| - DatasetInfo(description='', citation='', homepage='', license='', features={'speaker_id': Value('string'), 'path': Value('string'), 'audio': Audio(sampling_rate=16000, mono=True, decode=True, id=None), 'sentence': Value('string')}, post_processed=None, supervised_keys=None, builder_name=None, dataset_name=None, config_name='default', version=None, splits={'train': SplitInfo(name='train', num_bytes=1722002133, num_examples=11660, shard_lengths=None, dataset_name=None), 'test': SplitInfo(name='test', num_bytes=86120227, num_examples=760, shard_lengths=None, dataset_name=None)}, download_checksums=None, download_size=1475540500, post_processing_size=None, dataset_size=1808122360, size_in_bytes=None) |
531 |
| - ``` |
532 |
| - """ |
533 |
| - return self.get_all_exported_dataset_infos().get(self.config.name, DatasetInfo()) |
534 |
| - |
535 | 504 | def _create_builder_config(
|
536 | 505 | self, config_name=None, custom_features=None, **config_kwargs
|
537 | 506 | ) -> tuple[BuilderConfig, str]:
|
|
0 commit comments