Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions bioscan_dataset/bioscan1m.py
Original file line number Diff line number Diff line change
Expand Up @@ -891,19 +891,19 @@ def label2index(
int or numpy.array[int]
The integer index or indices corresponding to the text label or labels
in the specified column.
Entries containing missing values, indicated by empty strings, are mapped
to ``-1``.
Entries containing missing values, indicated by empty strings or NaN values,
are mapped to ``-1``.
"""
if column is not None:
pass
elif len(self.target_type) == 1:
column = self.target_type[0]
else:
raise ValueError("column must be specified if there isn't a single target_type")
if isinstance(label, str):
if pandas.isna(label) or label == "":
# Single index
if label == "":
return -1
return -1
if isinstance(label, str):
try:
return self.metadata[column].cat.categories.get_loc(label)
except KeyError:
Expand All @@ -915,7 +915,10 @@ def label2index(
)
labels = label
try:
out = [-1 if lab == "" else self.metadata[column].cat.categories.get_loc(lab) for lab in labels]
out = [
-1 if lab == "" or pandas.isna(lab) else self.metadata[column].cat.categories.get_loc(lab)
for lab in labels
]
except KeyError:
raise KeyError(f"Label {repr(label)} not found in metadata column {repr(column)}") from None
out = np.asarray(out)
Expand Down
15 changes: 9 additions & 6 deletions bioscan_dataset/bioscan5m.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,8 +612,8 @@ def label2index(
int or numpy.array[int]
The integer index or indices corresponding to the text label or labels
in the specified column.
Entries containing missing values, indicated by empty strings, are mapped
to ``-1``.
Entries containing missing values, indicated by empty strings or NaN values,
are mapped to ``-1``.

Examples
--------
Expand All @@ -628,10 +628,10 @@ def label2index(
column = self.target_type[0]
else:
raise ValueError("column must be specified if there isn't a single target_type")
if isinstance(label, str):
if pandas.isna(label) or label == "":
# Single index
if label == "":
return -1
return -1
if isinstance(label, str):
try:
return self.metadata[column].cat.categories.get_loc(label)
except KeyError:
Expand All @@ -643,7 +643,10 @@ def label2index(
)
labels = label
try:
out = [-1 if lab == "" else self.metadata[column].cat.categories.get_loc(lab) for lab in labels]
out = [
-1 if lab == "" or pandas.isna(lab) else self.metadata[column].cat.categories.get_loc(lab)
for lab in labels
]
except KeyError:
raise KeyError(f"Label {repr(label)} not found in metadata column {repr(column)}") from None
out = np.asarray(out)
Expand Down