Skip to content

Commit bf1810f

Browse files
Fixed bug where pyarrow conversions were causing string accessor to fail in search (#718)
1 parent d481be4 commit bf1810f

File tree

5 files changed

+101
-1
lines changed

5 files changed

+101
-1
lines changed

intake_esm/_search.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,10 @@ def search(
4545
column_has_iterables = column in columns_with_iterables
4646
for value in values:
4747
if column_has_iterables:
48-
mask = df[column].str.contains(value, regex=False)
48+
try:
49+
mask = df[column].str.contains(value, regex=False)
50+
except AttributeError:
51+
mask = df[column].apply(tuple).str.contains(value, regex=False)
4952
elif column_is_stringtype and is_pattern(value):
5053
mask = df[column].str.contains(value, regex=True, case=True, flags=0)
5154
elif pd.isna(value):
1.21 KB
Binary file not shown.
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
{
2+
"esmcat_version": "0.0.1",
3+
"attributes": [
4+
{
5+
"column_name": "filename",
6+
"vocabulary": ""
7+
},
8+
{
9+
"column_name": "file_id",
10+
"vocabulary": ""
11+
},
12+
{
13+
"column_name": "path",
14+
"vocabulary": ""
15+
},
16+
{
17+
"column_name": "filename_timestamp",
18+
"vocabulary": ""
19+
},
20+
{
21+
"column_name": "frequency",
22+
"vocabulary": ""
23+
},
24+
{
25+
"column_name": "start_date",
26+
"vocabulary": ""
27+
},
28+
{
29+
"column_name": "end_date",
30+
"vocabulary": ""
31+
},
32+
{
33+
"column_name": "variable",
34+
"vocabulary": ""
35+
},
36+
{
37+
"column_name": "variable_long_name",
38+
"vocabulary": ""
39+
},
40+
{
41+
"column_name": "variable_standard_name",
42+
"vocabulary": ""
43+
},
44+
{
45+
"column_name": "variable_cell_methods",
46+
"vocabulary": ""
47+
},
48+
{
49+
"column_name": "variable_units",
50+
"vocabulary": ""
51+
},
52+
{
53+
"column_name": "realm",
54+
"vocabulary": ""
55+
}
56+
],
57+
"assets": {
58+
"column_name": "path",
59+
"format": "netcdf",
60+
"format_column_name": null
61+
},
62+
"aggregation_control": {
63+
"variable_column_name": "variable",
64+
"groupby_attrs": ["file_id", "frequency"],
65+
"aggregations": [
66+
{
67+
"type": "join_existing",
68+
"attribute_name": "start_date",
69+
"options": {
70+
"dim": "time",
71+
"combine": "by_coords"
72+
}
73+
}
74+
]
75+
},
76+
"id": "01deg_jra55v13_ryf9091",
77+
"description": "0.1 degree ACCESS-OM2 global model configuration with JRA55-do v1.3 RYF9091 repeat year forcing (May 1990 to Apr 1991)",
78+
"title": null,
79+
"last_updated": "2025-03-04T01:25:35Z",
80+
"catalog_file": "access-columns-with-iterables.csv.gz"
81+
}

tests/test_core.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import intake_esm
2020

2121
from .utils import (
22+
access_columns_with_iterables_cat,
2223
catalog_dict_records,
2324
cdf_cat_sample_cesmle,
2425
cdf_cat_sample_cmip5,
@@ -201,6 +202,18 @@ def test_catalog_search(path, query, expected_size):
201202
assert len(new_cat) == expected_size
202203

203204

205+
@pytest.mark.parametrize(
206+
'path, columns_with_iterables, query, expected_size',
207+
[
208+
(access_columns_with_iterables_cat, ['variable'], {'variable': ['aice_m']}, 1),
209+
],
210+
)
211+
def test_catalog_search_columns_with_iterables(path, columns_with_iterables, query, expected_size):
212+
cat = intake.open_esm_datastore(path, columns_with_iterables=columns_with_iterables)
213+
new_cat = cat.search(**query)
214+
assert len(new_cat) == expected_size
215+
216+
204217
def test_catalog_with_registry_search():
205218
cat = intake.open_esm_datastore(zarr_cat_aws_cesm, registry=registry)
206219
new_cat = cat.search(variable='FOO')

tests/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
'https://raw.githubusercontent.com/NCAR/cesm-lens-aws/master/intake-catalogs/aws-cesm1-le.json'
1717
)
1818
mixed_cat_sample_cmip6 = os.path.join(here, 'sample-catalogs/cmip6-bcc-mixed-formats.json')
19+
access_columns_with_iterables_cat = os.path.join(
20+
here, 'sample-catalogs/access-columns-with-iterables.json'
21+
)
1922

2023

2124
sample_df = pd.DataFrame(

0 commit comments

Comments
 (0)