@@ -1925,15 +1925,26 @@ cdef class DenseArrayImpl(Array):
1925
1925
if self .view_attr :
1926
1926
result = self .subarray (selection , attrs = (self .view_attr ,))
1927
1927
return result [self .view_attr ]
1928
- else :
1929
- result = self .subarray (selection )
1930
- for i in range (self .schema .nattr ):
1931
- attr = self .schema .attr (i )
1932
- enum_label = attr .enum_label
1933
- if enum_label is not None :
1934
- values = self .enum (enum_label ).values ()
1935
- result [attr .name ] = np .array ([values [idx ] for idx in result [attr .name ]])
1936
- return result
1928
+
1929
+ result = self .subarray (selection )
1930
+ for i in range (self .schema .nattr ):
1931
+ attr = self .schema .attr (i )
1932
+ enum_label = attr .enum_label
1933
+ if enum_label is not None :
1934
+ values = self .enum (enum_label ).values ()
1935
+ if attr .isnullable :
1936
+ data = np .array ([values [idx ] for idx in result [attr .name ].data ])
1937
+ result [attr .name ] = np .ma .array (
1938
+ data , mask = ~ result [attr .name ].mask )
1939
+ else :
1940
+ result [attr .name ] = np .array (
1941
+ [values [idx ] for idx in result [attr .name ]])
1942
+ else :
1943
+ if attr .isnullable :
1944
+ result [attr .name ] = np .ma .array (result [attr .name ].data ,
1945
+ mask = ~ result [attr .name ].mask )
1946
+
1947
+ return result
1937
1948
1938
1949
def __repr__ (self ):
1939
1950
if self .isopen :
@@ -2182,6 +2193,10 @@ cdef class DenseArrayImpl(Array):
2182
2193
arr .shape = np .prod (output_shape )
2183
2194
2184
2195
out [name ] = arr
2196
+
2197
+ if self .schema .has_attr (name ) and self .attr (name ).isnullable :
2198
+ out [name ] = np .ma .array (out [name ], mask = results [name ][2 ].astype (bool ))
2199
+
2185
2200
return out
2186
2201
2187
2202
def __setitem__ (self , object selection , object val ):
@@ -2272,14 +2287,34 @@ cdef class DenseArrayImpl(Array):
2272
2287
# Create list of attribute names and values
2273
2288
for attr_idx in range (self .schema .nattr ):
2274
2289
attr = self .schema .attr (attr_idx )
2275
- k = attr .name
2276
- v = val [k ]
2277
- attr = self . schema . attr ( k )
2290
+ name = attr .name
2291
+ attr_val = val [name ]
2292
+
2278
2293
attributes .append (attr ._internal_name )
2279
2294
# object arrays are var-len and handled later
2280
- if type (v ) is np .ndarray and v .dtype is not np .dtype ('O' ):
2281
- v = np .ascontiguousarray (v , dtype = attr .dtype )
2282
- values .append (v )
2295
+ if type (attr_val ) is np .ndarray and attr_val .dtype is not np .dtype ('O' ):
2296
+ attr_val = np .ascontiguousarray (attr_val , dtype = attr .dtype )
2297
+
2298
+ try :
2299
+ if attr .isvar :
2300
+ # ensure that the value is array-convertible, for example: pandas.Series
2301
+ attr_val = np .asarray (attr_val )
2302
+ if attr .isnullable and name not in nullmaps :
2303
+ nullmaps [name ] = np .array ([int (v is not None ) for v in attr_val ], dtype = np .uint8 )
2304
+ else :
2305
+ if (np .issubdtype (attr .dtype , np .string_ ) and not
2306
+ (np .issubdtype (attr_val .dtype , np .string_ ) or attr_val .dtype == np .dtype ('O' ))):
2307
+ raise ValueError ("Cannot write a string value to non-string "
2308
+ "typed attribute '{}'!" .format (name ))
2309
+
2310
+ if attr .isnullable and name not in nullmaps :
2311
+ nullmaps [name ] = ~ np .ma .masked_invalid (attr_val ).mask
2312
+ attr_val = np .nan_to_num (attr_val )
2313
+ attr_val = np .ascontiguousarray (attr_val , dtype = attr .dtype )
2314
+ except Exception as exc :
2315
+ raise ValueError (f"NumPy array conversion check failed for attr '{ name } '" ) from exc
2316
+
2317
+ values .append (attr_val )
2283
2318
2284
2319
elif np .isscalar (val ):
2285
2320
for i in range (self .schema .nattr ):
@@ -2290,10 +2325,29 @@ cdef class DenseArrayImpl(Array):
2290
2325
values .append (A )
2291
2326
elif self .schema .nattr == 1 :
2292
2327
attr = self .schema .attr (0 )
2328
+ name = attr .name
2293
2329
attributes .append (attr ._internal_name )
2294
2330
# object arrays are var-len and handled later
2295
2331
if type (val ) is np .ndarray and val .dtype is not np .dtype ('O' ):
2296
2332
val = np .ascontiguousarray (val , dtype = attr .dtype )
2333
+ try :
2334
+ if attr .isvar :
2335
+ # ensure that the value is array-convertible, for example: pandas.Series
2336
+ val = np .asarray (val )
2337
+ if attr .isnullable and name not in nullmaps :
2338
+ nullmaps [name ] = np .array ([int (v is not None ) for v in val ], dtype = np .uint8 )
2339
+ else :
2340
+ if (np .issubdtype (attr .dtype , np .string_ ) and not
2341
+ (np .issubdtype (val .dtype , np .string_ ) or val .dtype == np .dtype ('O' ))):
2342
+ raise ValueError ("Cannot write a string value to non-string "
2343
+ "typed attribute '{}'!" .format (name ))
2344
+
2345
+ if attr .isnullable and name not in nullmaps :
2346
+ nullmaps [name ] = ~ np .ma .fix_invalid (val ).mask
2347
+ val = np .nan_to_num (val )
2348
+ val = np .ascontiguousarray (val , dtype = attr .dtype )
2349
+ except Exception as exc :
2350
+ raise ValueError (f"NumPy array conversion check failed for attr '{ name } '" ) from exc
2297
2351
values .append (val )
2298
2352
elif self .view_attr is not None :
2299
2353
# Support single-attribute assignment for multi-attr array
@@ -2329,9 +2383,6 @@ cdef class DenseArrayImpl(Array):
2329
2383
if not isinstance (val , np .ndarray ):
2330
2384
raise TypeError (f"Expected NumPy array for attribute '{ key } ' "
2331
2385
f"validity bitmap, got { type (val )} " )
2332
- if val .dtype != np .uint8 :
2333
- raise TypeError (f"Expected NumPy uint8 array for attribute '{ key } ' "
2334
- f"validity bitmap, got { val .dtype } " )
2335
2386
2336
2387
_write_array (
2337
2388
ctx_ptr ,
@@ -2769,17 +2820,19 @@ def _setitem_impl_sparse(self: Array, selection, val, dict nullmaps):
2769
2820
if attr .isvar :
2770
2821
# ensure that the value is array-convertible, for example: pandas.Series
2771
2822
attr_val = np .asarray (attr_val )
2823
+ if attr .isnullable and name not in nullmaps :
2824
+ nullmaps [name ] = np .array ([int (v is not None ) for v in attr_val ], dtype = np .uint8 )
2772
2825
else :
2773
2826
if (np .issubdtype (attr .dtype , np .string_ ) and not
2774
2827
(np .issubdtype (attr_val .dtype , np .string_ ) or attr_val .dtype == np .dtype ('O' ))):
2775
2828
raise ValueError ("Cannot write a string value to non-string "
2776
2829
"typed attribute '{}'!" .format (name ))
2777
-
2830
+
2831
+ if attr .isnullable and name not in nullmaps :
2832
+ nullmaps [name ] = ~ np .ma .masked_invalid (attr_val ).mask
2833
+ attr_val = np .nan_to_num (attr_val )
2778
2834
attr_val = np .ascontiguousarray (attr_val , dtype = attr .dtype )
2779
2835
2780
- if attr .isnullable and attr .name not in nullmaps :
2781
- nullmaps [attr .name ] = np .array ([int (v is not None ) for v in attr_val ], dtype = np .uint8 )
2782
-
2783
2836
except Exception as exc :
2784
2837
raise ValueError (f"NumPy array conversion check failed for attr '{ name } '" ) from exc
2785
2838
@@ -2919,7 +2972,18 @@ cdef class SparseArrayImpl(Array):
2919
2972
enum_label = attr .enum_label
2920
2973
if enum_label is not None :
2921
2974
values = self .enum (enum_label ).values ()
2922
- result [attr .name ] = np .array ([values [idx ] for idx in result [attr .name ]])
2975
+ if attr .isnullable :
2976
+ data = np .array ([values [idx ] for idx in result [attr .name ].data ])
2977
+ result [attr .name ] = np .ma .array (
2978
+ data , mask = ~ result [attr .name ].mask )
2979
+ else :
2980
+ result [attr .name ] = np .array (
2981
+ [values [idx ] for idx in result [attr .name ]])
2982
+ else :
2983
+ if attr .isnullable :
2984
+ result [attr .name ] = np .ma .array (result [attr .name ].data ,
2985
+ mask = ~ result [attr .name ].mask )
2986
+
2923
2987
return result
2924
2988
2925
2989
def query (self , attrs = None , cond = None , attr_cond = None , dims = None ,
@@ -3207,6 +3271,9 @@ cdef class SparseArrayImpl(Array):
3207
3271
else :
3208
3272
arr .dtype = el_dtype
3209
3273
out [final_name ] = arr
3274
+
3275
+ if self .schema .has_attr (final_name ) and self .attr (final_name ).isnullable :
3276
+ out [final_name ] = np .ma .array (out [final_name ], mask = results [name ][2 ])
3210
3277
3211
3278
return out
3212
3279
0 commit comments