Skip to content

Commit ff83475

Browse files
@aulemahal suggestion (edge case, single item iterable column)
1 parent 915aeae commit ff83475

File tree

5 files changed

+92
-1
lines changed

5 files changed

+92
-1
lines changed

intake_esm/cat.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,7 @@ def pandas(self) -> pd.DataFrame:
516516

517517
if self.pl_df is not None:
518518
self.df = self.pl_df.to_pandas(use_pyarrow_extension_array=True)
519-
for colname in self.columns_with_iterables:
519+
for colname in self.columns_with_iterables: # Can this be done in one hit?
520520
self.df[colname] = self.df[colname].apply(tuple)
521521
return self.df
522522

@@ -660,6 +660,7 @@ def _read_csv_pl(self) -> FramesModel:
660660
pl.col(colname)
661661
.str.replace('^.', '[') # Replace first/last chars with [ or ].
662662
.str.replace('.$', ']') # set/tuple => list
663+
.str.replace(',]$', ']') # Remove trailing commas
663664
.str.replace_all("'", '"')
664665
.str.json_decode() # This is to do with the way polars reads json - single versus double quotes
665666
for colname in converters.keys()
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
filename,file_id,path,filename_timestamp,frequency,start_date,end_date,variable,variable_long_name,variable_standard_name,variable_cell_methods,variable_units,realm
2+
iceh.1900-01.nc,iceh_XXXX_XX,/g/data/ik11/outputs/access-om2-01/01deg_jra55v13_ryf9091/output000/ice/OUTPUT/iceh.1900-01.nc,1900-01,1mon,"1900-01-01, 00:00:00","1900-02-01, 00:00:00","('time',)","('model time',)","('',)","('',)","('days since 1900-01-01 00:00:00',)",seaIce
3+
iceh.1900-02.nc,iceh_XXXX_XX,/g/data/ik11/outputs/access-om2-01/01deg_jra55v13_ryf9091/output000/ice/OUTPUT/iceh.1900-02.nc,1900-02,1mon,"1900-02-01, 00:00:00","1900-03-01, 00:00:00","('time',)","('model time',)","('',)","('',)","('days since 1900-01-01 00:00:00',)",seaIce
4+
iceh.1900-03.nc,iceh_XXXX_XX,/g/data/ik11/outputs/access-om2-01/01deg_jra55v13_ryf9091/output000/ice/OUTPUT/iceh.1900-03.nc,1900-03,1mon,"1900-03-01, 00:00:00","1900-04-01, 00:00:00","('time',)","('model time',)","('',)","('',)","('days since 1900-01-01 00:00:00',)",seaIce
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
{
2+
"esmcat_version": "0.0.1",
3+
"attributes": [
4+
{
5+
"column_name": "filename",
6+
"vocabulary": ""
7+
},
8+
{
9+
"column_name": "file_id",
10+
"vocabulary": ""
11+
},
12+
{
13+
"column_name": "path",
14+
"vocabulary": ""
15+
},
16+
{
17+
"column_name": "filename_timestamp",
18+
"vocabulary": ""
19+
},
20+
{
21+
"column_name": "frequency",
22+
"vocabulary": ""
23+
},
24+
{
25+
"column_name": "start_date",
26+
"vocabulary": ""
27+
},
28+
{
29+
"column_name": "end_date",
30+
"vocabulary": ""
31+
},
32+
{
33+
"column_name": "variable",
34+
"vocabulary": ""
35+
},
36+
{
37+
"column_name": "variable_long_name",
38+
"vocabulary": ""
39+
},
40+
{
41+
"column_name": "variable_standard_name",
42+
"vocabulary": ""
43+
},
44+
{
45+
"column_name": "variable_cell_methods",
46+
"vocabulary": ""
47+
},
48+
{
49+
"column_name": "variable_units",
50+
"vocabulary": ""
51+
},
52+
{
53+
"column_name": "realm",
54+
"vocabulary": ""
55+
}
56+
],
57+
"assets": {
58+
"column_name": "path",
59+
"format": "netcdf",
60+
"format_column_name": null
61+
},
62+
"aggregation_control": {
63+
"variable_column_name": "variable",
64+
"groupby_attrs": ["file_id", "frequency"],
65+
"aggregations": [
66+
{
67+
"type": "join_existing",
68+
"attribute_name": "start_date",
69+
"options": {
70+
"dim": "time",
71+
"combine": "by_coords"
72+
}
73+
}
74+
]
75+
},
76+
"id": "01deg_jra55v13_ryf9091",
77+
"description": "0.1 degree ACCESS-OM2 global model configuration with JRA55-do v1.3 RYF9091 repeat year forcing (May 1990 to Apr 1991)",
78+
"title": null,
79+
"last_updated": "2025-03-04T01:25:35Z",
80+
"catalog_file": "access-single-item-iterables.csv"
81+
}

tests/test_cat.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
access_columns_with_lists_cat,
1414
access_columns_with_sets_cat,
1515
access_columns_with_tuples_cat,
16+
access_single_item_iterables_cat,
1617
catalog_dict_records,
1718
cdf_cat_sample_cesmle,
1819
cdf_cat_sample_cmip5,
@@ -148,6 +149,7 @@ def test_esmcatmodel_unique_and_nunique(query, expected_unique_vals, expected_nu
148149
(access_columns_with_lists_cat, list),
149150
(access_columns_with_tuples_cat, tuple),
150151
(access_columns_with_sets_cat, set),
152+
(access_single_item_iterables_cat, tuple),
151153
],
152154
)
153155
def test_esmcatmodel_roundtrip_itercols_type_stable(catalog_file, expected_type):

tests/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
here, 'sample-catalogs/access-columns-with-tuples.json'
2424
)
2525
access_columns_with_sets_cat = os.path.join(here, 'sample-catalogs/access-columns-with-sets.json')
26+
access_single_item_iterables_cat = os.path.join(
27+
here, 'sample-catalogs/access-single-item-iterables.json'
28+
)
2629
zarr_v2_cat = os.path.join(here, 'sample-catalogs/cesm1-lens-zarr2.json')
2730
zarr_v3_cat = os.path.join(here, 'sample-catalogs/cesm1-lens-zarr2.json')
2831

0 commit comments

Comments
 (0)