|
10 | 10 |
|
11 | 11 | import os
|
12 | 12 | from enum import Enum
|
13 |
| -from typing import Any, Tuple |
| 13 | +from typing import Any, Iterable, Tuple, Union |
14 | 14 |
|
| 15 | +import numpy as np |
| 16 | +import numpy.typing as npt |
15 | 17 | import pandas
|
16 | 18 | import PIL
|
17 | 19 | import torch
|
@@ -406,11 +408,88 @@ def __init__(
|
406 | 408 |
|
407 | 409 | self._load_metadata()
|
408 | 410 |
|
| 411 | + def index2label(self, column: str, index: Union[int, Iterable[int]]) -> Union[str, npt.NDArray[np.str_]]: |
| 412 | + r""" |
| 413 | + Convert target's integer index to text label. |
| 414 | +
|
| 415 | + .. versionadded:: 1.1.0 |
| 416 | +
|
| 417 | + Parameters |
| 418 | + ---------- |
| 419 | + column : str |
| 420 | + The dataset column name to map. This is the same as the ``target_type``. |
| 421 | + index : int or Iterable[int] |
| 422 | + The integer index or indices to map to labels. |
| 423 | +
|
| 424 | + Returns |
| 425 | + ------- |
| 426 | + str or numpy.array[str] |
| 427 | + The text label or labels corresponding to the integer index or indices |
| 428 | + in the specified column. |
| 429 | + Entries containing missing values, indicated by negative indices, are mapped |
| 430 | + to an empty string. |
| 431 | +
|
| 432 | + Examples |
| 433 | + -------- |
| 434 | + >>> dataset.index2label("order", [4]) |
| 435 | + 'Diptera' |
| 436 | + >>> dataset.index2label("order", [4, 9, -1, 4]) |
| 437 | + array(['Diptera', 'Lepidoptera', '', 'Diptera'], dtype=object) |
| 438 | + """ |
| 439 | + if not hasattr(index, "__len__"): |
| 440 | + # Single index |
| 441 | + if index < 0: |
| 442 | + return "" |
| 443 | + return self.metadata[column].cat.categories[index] |
| 444 | + index = np.asarray(index) |
| 445 | + out = self.metadata[column].cat.categories[index] |
| 446 | + out = np.asarray(out) |
| 447 | + out[index < 0] = "" |
| 448 | + return out |
| 449 | + |
| 450 | + def label2index(self, column: str, label: Union[str, Iterable[str]]) -> Union[int, npt.NDArray[np.int_]]: |
| 451 | + r""" |
| 452 | + Convert target's text label to integer index. |
| 453 | +
|
| 454 | + .. versionadded:: 1.1.0 |
| 455 | +
|
| 456 | + Parameters |
| 457 | + ---------- |
| 458 | + column : str |
| 459 | + The dataset column name to map. This is the same as the ``target_type``. |
| 460 | + label : str or Iterable[str] |
| 461 | + The text label or labels to map to integer indices. |
| 462 | +
|
| 463 | + Returns |
| 464 | + ------- |
| 465 | + int or numpy.array[int] |
| 466 | + The integer index or indices corresponding to the text label or labels |
| 467 | + in the specified column. |
| 468 | + Entries containing missing values, indicated by empty strings, are mapped |
| 469 | + to ``-1``. |
| 470 | +
|
| 471 | + Examples |
| 472 | + -------- |
| 473 | + >>> dataset.label2index("order", "Diptera") |
| 474 | + 4 |
| 475 | + >>> dataset.label2index("order", ["Diptera", "Lepidoptera", "", "Diptera"]) |
| 476 | + array([4, 9, -1, 4]) |
| 477 | + """ |
| 478 | + if isinstance(label, str): |
| 479 | + # Single index |
| 480 | + if label == "": |
| 481 | + return -1 |
| 482 | + return self.metadata[column].cat.categories.get_loc(label) |
| 483 | + labels = label |
| 484 | + out = [-1 if lab == "" else self.metadata[column].cat.categories.get_loc(lab) for lab in labels] |
| 485 | + out = np.asarray(out) |
| 486 | + return out |
| 487 | + |
409 | 488 | def __len__(self):
|
410 | 489 | return len(self.metadata)
|
411 | 490 |
|
412 | 491 | def __getitem__(self, index: int) -> Tuple[Any, ...]:
|
413 |
| - """ |
| 492 | + r""" |
414 | 493 | Get a sample from the dataset.
|
415 | 494 |
|
416 | 495 | Parameters
|
|
0 commit comments