@@ -237,7 +237,10 @@ class BIOSCAN1M(VisionDataset):
237
237
238
238
modality : str or Iterable[str], default=("image", "dna")
239
239
Which data modalities to use. One of, or a list of:
240
- ``"image"``, ``"dna"``.
240
+ ``"image"``, ``"dna"``, or any column name in the metadata CSV file.
241
+
242
+ .. versionchanged:: 1.1.0
243
+ Added support for arbitrary modalities.
241
244
242
245
reduce_repeated_barcodes : bool, default=False
243
246
Whether to reduce the dataset to only one sample per barcode.
@@ -426,6 +429,13 @@ def __getitem__(self, index: int) -> Tuple[Any, ...]:
426
429
The DNA barcode, if the ``"dna"`` modality is requested, optionally
427
430
transformed by the ``dna_transform`` pipeline.
428
431
432
+ *modalities : Any
433
+ Any other modalities requested, as specified in the ``modality`` parameter.
434
+ The data is extracted from the appropriate column in the metadata TSV file,
435
+ without any transformations.
436
+
437
+ .. versionadded:: 1.1.0
438
+
429
439
target : int or Tuple[int, ...] or str or Tuple[str, ...] or None
430
440
The target(s), optionally transformed by the ``target_transform`` pipeline.
431
441
If ``target_format="index"``, the target(s) will be returned as integer
@@ -446,6 +456,8 @@ def __getitem__(self, index: int) -> Tuple[Any, ...]:
446
456
X = sample ["nucraw" ]
447
457
if self .dna_transform is not None :
448
458
X = self .dna_transform (X )
459
+ elif modality in self .metadata .columns :
460
+ X = sample [modality ]
449
461
else :
450
462
raise ValueError (f"Unfamiliar modality: { modality } " )
451
463
values .append (X )
@@ -499,7 +511,7 @@ def _check_exists(self, verbose=0) -> bool:
499
511
500
512
def _load_metadata (self ) -> pandas .DataFrame :
501
513
r"""
502
- Load metadata from CSV file and prepare it for training.
514
+ Load metadata from TSV file and prepare it for training.
503
515
"""
504
516
self .metadata = load_metadata (
505
517
self .metadata_path ,
0 commit comments