Skip to content

Map blocks unable to infer datatype of object arrays #58

@miguelcarcamov

Description

@miguelcarcamov

I'm trying to convert the following xarray dataset to xarray cupy:

<xarray.Dataset>
Dimensions:        (row: 119, xyz: 3)
Coordinates:
    ROWID          (row) int64 dask.array<chunksize=(119,), meta=np.ndarray>
Dimensions without coordinates: row, xyz
Data variables:
    POSITION       (row, xyz) float64 dask.array<chunksize=(119, 3), meta=np.ndarray>
    TYPE           (row) object dask.array<chunksize=(119,), meta=np.ndarray>
    NAME           (row) object dask.array<chunksize=(119,), meta=np.ndarray>
    MOUNT          (row) object dask.array<chunksize=(119,), meta=np.ndarray>
    OFFSET         (row, xyz) float64 dask.array<chunksize=(119, 3), meta=np.ndarray>
    FLAG_ROW       (row) bool dask.array<chunksize=(119,), meta=np.ndarray>
    DISH_DIAMETER  (row) float64 dask.array<chunksize=(119,), meta=np.ndarray>
    STATION        (row) object dask.array<chunksize=(119,), meta=np.ndarray>
Attributes:
    __daskms_partition_schema__:  ()

However, I'm getting the following error:

"name": "ValueError",
"message": "`dtype` inference failed in `map_blocks`.

Please specify the dtype explicitly using the `dtype` kwarg.

Original error is below:
------------------------
ValueError('Unsupported dtype object')

Traceback:
---------
  File \"/home/miguel/.conda/envs/pyralysis-env/lib/python3.12/site-packages/dask/array/core.py\", line 456, in apply_infer_dtype
    o = func(*args, **kwargs)
        ^^^^^^^^^^^^^^^^^^^^^
  File \"/home/miguel/.conda/envs/pyralysis-env/lib/python3.12/site-packages/cupy/_creation/from_data.py\", line 88, in asarray
    return _core.array(a, dtype, False, order, blocking=blocking)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File \"cupy/_core/core.pyx\", line 2383, in cupy._core.core.array
  File \"cupy/_core/core.pyx\", line 2410, in cupy._core.core.array
  File \"cupy/_core/core.pyx\", line 2549, in cupy._core.core._array_default
",
	"stack": "---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[7], line 1
----> 1 dataset = x.read(filter_flag_column=False, calculate_psf=False)

File ~/Documents/pyralysis/src/pyralysis/io/daskms.py:134, in DaskMS.read(self, read_flagged_data, filter_flag_column, calculate_psf, taql_query, chunks)
    132 # Creating antenna object
    133 antennas = xds_from_table(self.ms_name_dask + \"ANTENNA\", taql_where=taql_query_flag_row)[0]
--> 134 antenna_obj = Antenna(dataset=antennas)
    136 if obs_obj.ntelescope > 1:
    137     # if there is more than one telescope in the dataset, allocate space for
    138     # one observation id per antenna
    139     antenna_obs_id = da.zeros_like(antenna_obj.dataset.ROWID, dtype=np.int32)

File <string>:4, in __init__(self, dataset)

File ~/Documents/pyralysis/src/pyralysis/base/antenna.py:36, in Antenna.__post_init__(self)
     33 self.logger.setLevel(logging.INFO)
     35 print(self.dataset)
---> 36 self.dataset = xarray_as_cupy(self.dataset)
     37 if self.dataset is not None:
     38     self.max_diameter = self.dataset.DISH_DIAMETER.data.max() * u.m

File ~/Documents/pyralysis/src/pyralysis/utils/xarray_cupy_transformer.py:16, in xarray_as_cupy(xarray_object)
     11 def xarray_as_cupy(
     12     xarray_object: Union[xarray.DataArray, xarray.Dataset] = None
     13 ) -> Union[xarray.DataArray, xarray.Dataset]:
     15     if cupy_xarray and dask.config.get(\"array.backend\") == \"cupy\" and xarray_object is not None:
---> 16         return xarray_object.as_cupy()
     17     else:
     18         return xarray_object

File ~/.conda/envs/pyralysis-env/lib/python3.12/site-packages/cupy_xarray/accessors.py:162, in _.<locals>.as_cupy(*args, **kwargs)
    161 def as_cupy(*args, **kwargs):
--> 162     return ds.cupy.as_cupy(*args, **kwargs)

File ~/.conda/envs/pyralysis-env/lib/python3.12/site-packages/cupy_xarray/accessors.py:119, in CupyDatasetAccessor.as_cupy(self)
    118 def as_cupy(self):
--> 119     data_vars = {var: da.as_cupy() for var, da in self.ds.data_vars.items()}
    120     return Dataset(data_vars=data_vars, coords=self.ds.coords, attrs=self.ds.attrs)

File ~/.conda/envs/pyralysis-env/lib/python3.12/site-packages/cupy_xarray/accessors.py:148, in _.<locals>.as_cupy(*args, **kwargs)
    147 def as_cupy(*args, **kwargs):
--> 148     return da.cupy.as_cupy(*args, **kwargs)

File ~/.conda/envs/pyralysis-env/lib/python3.12/site-packages/cupy_xarray/accessors.py:56, in CupyDataArrayAccessor.as_cupy(self)
     32 \"\"\"
     33 Converts the DataArray's underlying array type to cupy.
     34 
   (...)
     52 
     53 \"\"\"
     54 if isinstance(self.da.data, dask_array_type):
     55     return DataArray(
---> 56         data=self.da.data.map_blocks(cp.asarray),
     57         coords=self.da.coords,
     58         dims=self.da.dims,
     59         name=self.da.name,
     60         attrs=self.da.attrs,
     61     )
     62 return DataArray(
     63     data=cp.asarray(self.da.data),
     64     coords=self.da.coords,
   (...)
     67     attrs=self.da.attrs,
     68 )

File ~/.conda/envs/pyralysis-env/lib/python3.12/site-packages/dask/array/core.py:2689, in Array.map_blocks(self, func, *args, **kwargs)
   2687 @wraps(map_blocks)
   2688 def map_blocks(self, func, *args, **kwargs):
-> 2689     return map_blocks(func, self, *args, **kwargs)

File ~/.conda/envs/pyralysis-env/lib/python3.12/site-packages/dask/array/core.py:813, in map_blocks(func, name, token, dtype, chunks, drop_axis, new_axis, enforce_ndim, meta, *args, **kwargs)
    810     except Exception:
    811         pass
--> 813     dtype = apply_infer_dtype(func, args, original_kwargs, \"map_blocks\")
    815 if drop_axis:
    816     ndim_out = len(out_ind)

File ~/.conda/envs/pyralysis-env/lib/python3.12/site-packages/dask/array/core.py:481, in apply_infer_dtype(func, args, kwargs, funcname, suggest_dtype, nout)
    479     msg = None
    480 if msg is not None:
--> 481     raise ValueError(msg)
    482 return getattr(o, \"dtype\", type(o)) if nout is None else tuple(e.dtype for e in o)

ValueError: `dtype` inference failed in `map_blocks`.

Please specify the dtype explicitly using the `dtype` kwarg.

Original error is below:
------------------------
ValueError('Unsupported dtype object')

Traceback:
---------
  File \"/home/miguel/.conda/envs/pyralysis-env/lib/python3.12/site-packages/dask/array/core.py\", line 456, in apply_infer_dtype
    o = func(*args, **kwargs)
        ^^^^^^^^^^^^^^^^^^^^^
  File \"/home/miguel/.conda/envs/pyralysis-env/lib/python3.12/site-packages/cupy/_creation/from_data.py\", line 88, in asarray
    return _core.array(a, dtype, False, order, blocking=blocking)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File \"cupy/_core/core.pyx\", line 2383, in cupy._core.core.array
  File \"cupy/_core/core.pyx\", line 2410, in cupy._core.core.array
  File \"cupy/_core/core.pyx\", line 2549, in cupy._core.core._array_default
"

It seems map_blocks inference fails for object dtype arrays? Could this dtype be forced by passing a dtype parameter to map_blocks?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions