@@ -321,9 +321,8 @@ def _df_from_file(
321
321
cat .catalog_file = csv_path
322
322
323
323
reader = CatalogFileDataReader (cat .catalog_file , storage_options , ** read_kwargs )
324
- read = reader ()
325
- self ._iterable_dtype_map = reader ._dtype_map
326
- return read
324
+ self ._iterable_dtype_map = reader .dtype_map
325
+ return reader .frames
327
326
328
327
@property
329
328
def lf (self ) -> pl .LazyFrame :
@@ -610,8 +609,8 @@ def __init__(
610
609
f'Expected one of { __filetypes__ } '
611
610
)
612
611
613
- # Set default dtype_map to tuple
614
- self ._dtype_map = { key : 'tuple' for key in self .read_kwargs . get ( 'converters' , {}). keys ()}
612
+ self . _dtype_map : dict [ str , str ] = {}
613
+ self .frames = self ._read ()
615
614
616
615
def _read_csv_pd (self ) -> FramesModel :
617
616
"""Read a catalog file stored as a csv using pandas"""
@@ -653,17 +652,14 @@ def _read_csv_pl(self) -> FramesModel:
653
652
)
654
653
.collect ()
655
654
.to_dicts ()
656
- ):
655
+ ): # Returns an empty list if no rows - hence walrus
657
656
self ._dtype_map = dtype_map [0 ]
658
657
659
658
lf = lf .with_columns (
660
659
[
661
660
pl .col (colname )
662
661
.str .replace ('^.' , '[' ) # Replace first/last chars with [ or ].
663
662
.str .replace ('.$' , ']' ) # set/tuple => list
664
- # ^ We also need to cache - probably as an attriubte on this class
665
- # what we found ie. '[' => list, '(' => tuple, etc., so we can write
666
- # the correct type back when we serialise the catalog. # TODO
667
663
.str .replace_all ("'" , '"' )
668
664
.str .json_decode () # This is to do with the way polars reads json - single versus double quotes
669
665
for colname in converters .keys ()
@@ -680,7 +676,7 @@ def _read_parquet_pl(self) -> FramesModel:
680
676
)
681
677
return FramesModel (lf = lf )
682
678
683
- def __call__ (self ):
679
+ def _read (self ):
684
680
if self .driver == 'polars' :
685
681
if self .filetype == 'csv' :
686
682
return self ._read_csv_pl ()
@@ -694,3 +690,8 @@ def __call__(self):
694
690
return self ._read_csv_pd ()
695
691
else :
696
692
raise ValueError (f'Unsupported file type { self .filetype } for pandas reader' )
693
+
694
+ @property
695
+ def dtype_map (self ) -> dict [str , str ]:
696
+ """Return a map of column names to their dtypes for columns with iterables."""
697
+ return self ._dtype_map
0 commit comments