@@ -110,7 +110,7 @@ _tiledb_dtype_to_numpy_typeid_convert ={
110
110
TILEDB_INT16 : np .NPY_INT16 ,
111
111
TILEDB_UINT16 : np .NPY_UINT16 ,
112
112
TILEDB_CHAR : np .NPY_STRING ,
113
- TILEDB_STRING_ASCII : np .NPY_STRING ,
113
+ TILEDB_STRING_ASCII : np .NPY_UNICODE ,
114
114
TILEDB_STRING_UTF8 : np .NPY_UNICODE ,
115
115
}
116
116
IF LIBTILEDB_VERSION_MAJOR >= 2 :
@@ -133,7 +133,7 @@ _tiledb_dtype_to_numpy_dtype_convert = {
133
133
TILEDB_INT16 : np .int16 ,
134
134
TILEDB_UINT16 : np .uint16 ,
135
135
TILEDB_CHAR : np .dtype ('S1' ),
136
- TILEDB_STRING_ASCII : np .dtype ('S ' ),
136
+ TILEDB_STRING_ASCII : np .dtype ('U ' ),
137
137
TILEDB_STRING_UTF8 : np .dtype ('U1' ),
138
138
}
139
139
IF LIBTILEDB_VERSION_MAJOR >= 2 :
@@ -1824,10 +1824,8 @@ cdef class Attr(object):
1824
1824
filters_str += repr (f ) + ", "
1825
1825
filters_str += "])"
1826
1826
1827
- attr_dtype = "ascii" if self .isascii else self .dtype
1828
-
1829
1827
# filters_str must be last with no spaces
1830
- return (f"""Attr(name={ repr (self .name )} , dtype='{ attr_dtype !s} ', """
1828
+ return (f"""Attr(name={ repr (self .name )} , dtype='{ self . dtype !s} ', """
1831
1829
f"""var={ self .isvar !s} , nullable={ self .isnullable !s} """
1832
1830
f"""{ filters_str } )""" )
1833
1831
@@ -1852,7 +1850,7 @@ cdef class Attr(object):
1852
1850
1853
1851
output .write ("<tr>" )
1854
1852
output .write (f"<td>{ self .name } </td>" )
1855
- output .write (f"<td>{ 'ascii' if self .isascii else self . dtype } </td>" )
1853
+ output .write (f"<td>{ self .isascii } </td>" )
1856
1854
output .write (f"<td>{ self .isvar } </td>" )
1857
1855
output .write (f"<td>{ self .isnullable } </td>" )
1858
1856
output .write (f"<td>{ self .filters ._repr_html_ ()} </td>" )
@@ -1903,8 +1901,12 @@ cdef class Dim(object):
1903
1901
if not ctx :
1904
1902
ctx = default_ctx ()
1905
1903
1904
+ is_string = (
1905
+ isinstance (dtype , str ) and dtype == "ascii"
1906
+ ) or np .dtype (dtype ) in (np .str_ , np .bytes_ )
1907
+
1906
1908
if var is not None :
1907
- if var and np . dtype ( dtype ) not in ( np . str_ , np . bytes_ ) :
1909
+ if var and not is_string :
1908
1910
raise TypeError ("'var=True' specified for non-str/bytes dtype" )
1909
1911
1910
1912
if domain is not None and len (domain ) != 2 :
@@ -1919,12 +1921,14 @@ cdef class Dim(object):
1919
1921
cdef void * tile_size_ptr = NULL
1920
1922
cdef np .dtype domain_dtype
1921
1923
1922
- if ((isinstance (dtype , str ) and dtype == "ascii" ) or
1923
- dtype == np .dtype ('S' )):
1924
+ if is_string :
1924
1925
# Handle var-len domain type
1925
1926
# (currently only TILEDB_STRING_ASCII)
1926
1927
# The dimension's domain is implicitly formed as
1927
1928
# coordinates are written.
1929
+ if dtype != "ascii" :
1930
+ warnings .warn ("Use 'ascii' for string dimensions." )
1931
+ dtype = np .dtype ("|U0" )
1928
1932
dim_datatype = TILEDB_STRING_ASCII
1929
1933
else :
1930
1934
if domain is None or len (domain ) != 2 :
@@ -1985,17 +1989,19 @@ cdef class Dim(object):
1985
1989
self .ptr = dim_ptr
1986
1990
1987
1991
def __repr__ (self ):
1988
- filters_str = ""
1992
+ filters = ""
1989
1993
if self .filters :
1990
- filters_str = ", filters=FilterList(["
1994
+ filters = ", filters=FilterList(["
1991
1995
for f in self .filters :
1992
- filters_str += repr (f ) + ", "
1993
- filters_str += "])"
1996
+ filters += repr (f ) + ", "
1997
+ filters += "])"
1998
+
1999
+ dtype = "ascii" if self ._get_type () == TILEDB_STRING_ASCII else self .dtype
1994
2000
1995
2001
# for consistency, print `var=True` for string-like types
1996
- varlen = "" if not self . dtype in ( np . str_ , np . bytes_ ) else ", var=True"
1997
- return "Dim(name={0 !r}, domain={1!s }, tile={2 !r}, dtype='{3!s }'{4}{5 })" \
1998
- . format ( self . name , self . domain , self . tile , self . dtype , varlen , filters_str )
2002
+ varlen = "" if dtype != "ascii" else ", var=True"
2003
+ return f "Dim(name={ self . name !r} , domain={ self . domain } , tile={ self . tile !r} , dtype='{ dtype } '{ varlen } { filters } )"
2004
+
1999
2005
2000
2006
def _repr_html_ (self ) -> str :
2001
2007
output = io .StringIO ()
@@ -2022,7 +2028,7 @@ cdef class Dim(object):
2022
2028
output .write (f"<td>{ self .domain } </td>" )
2023
2029
output .write (f"<td>{ self .tile } </td>" )
2024
2030
output .write (f"<td>{ self .dtype } </td>" )
2025
- output .write (f"<td>{ self .dtype in ( np . str_ , np . bytes_ ) } </td>" )
2031
+ output .write (f"<td>{ self .dtype == 'ascii' } </td>" )
2026
2032
output .write (f"<td>{ self .filters ._repr_html_ ()} </td>" )
2027
2033
output .write ("</tr>" )
2028
2034
@@ -2222,7 +2228,7 @@ cdef class Dim(object):
2222
2228
:rtype: tuple(numpy scalar, numpy scalar)
2223
2229
2224
2230
"""
2225
- if self .dtype == np .dtype ('S ' ):
2231
+ if self .dtype == np .dtype ('U ' ):
2226
2232
return None , None
2227
2233
cdef const void * domain_ptr = NULL
2228
2234
check_error (self .ctx ,
@@ -3864,9 +3870,8 @@ cdef class Array(object):
3864
3870
results .append ((None , None ))
3865
3871
continue
3866
3872
3867
- buf_dtype = 'S'
3868
- start_buf = np .empty (start_size , 'S' + str (start_size ))
3869
- end_buf = np .empty (end_size , 'S' + str (end_size ))
3873
+ start_buf = np .empty (start_size , f"S{ start_size } " )
3874
+ end_buf = np .empty (end_size , f"S{ end_size } " )
3870
3875
start_buf_ptr = np .PyArray_DATA (start_buf )
3871
3876
end_buf_ptr = np .PyArray_DATA (end_buf )
3872
3877
else :
@@ -3884,7 +3889,8 @@ cdef class Array(object):
3884
3889
return None
3885
3890
3886
3891
if start_size > 0 and end_size > 0 :
3887
- results .append ((start_buf .item (0 ), end_buf .item (0 )))
3892
+ results .append ((start_buf .item (0 ).decode ("UTF-8" ),
3893
+ end_buf .item (0 ).decode ("UTF-8" )))
3888
3894
else :
3889
3895
results .append ((None , None ))
3890
3896
else :
@@ -4918,7 +4924,7 @@ def index_domain_coords(dom: Domain, idx: tuple, check_ndim: bool):
4918
4924
# ensure strings contain only ASCII characters
4919
4925
domain_coords .append (np .array (sel , dtype = np .bytes_ , ndmin = 1 ))
4920
4926
except Exception as exc :
4921
- raise TileDBError (f'Dim \' strings may only contain ASCII characters' )
4927
+ raise TileDBError ('Dimension strings may only contain ASCII characters' )
4922
4928
else :
4923
4929
domain_coords .append (np .array (sel , dtype = dim .dtype , ndmin = 1 ))
4924
4930
0 commit comments