2323 #
2424 # IMPORTANT: ALL non-primitive types supported by TileDB must be in this table.
2525 #
26- pa .string (): np .dtype (
27- "S"
28- ), # XXX TODO: temporary work-around until UTF8 support is native. GH #338.
26+ pa .string (): "ascii" , # XXX TODO: temporary work-around until UTF8 support is native. GH #338.
2927 pa .binary (): np .dtype ("S" ),
3028 pa .timestamp ("s" ): "datetime64[s]" ,
3129 pa .timestamp ("ms" ): "datetime64[ms]" ,
3937}
4038
4139
42- def tiledb_type_from_arrow_type (t : pa .DataType ) -> Union [type , np .dtype ]:
40+ def tiledb_type_from_arrow_type (t : pa .DataType ) -> Union [type , np .dtype , str ]:
4341 """
4442 Given an Arrow type, return the corresponding TileDB type as a Numpy dtype.
4543 Building block for Arrow-to-TileDB schema translation.
@@ -61,7 +59,10 @@ def tiledb_type_from_arrow_type(t: pa.DataType) -> Union[type, np.dtype]:
6159 arrow_type = ARROW_TO_TDB [t ]
6260 if isinstance (arrow_type , Exception ):
6361 raise arrow_type
64- return np .dtype (arrow_type )
62+ if arrow_type == "ascii" :
63+ return arrow_type
64+ else :
65+ return np .dtype (arrow_type )
6566
6667 if not pa .types .is_primitive (t ):
6768 raise TypeError (f"Type { str (t )} - unsupported type" )
@@ -83,11 +84,11 @@ def tiledb_type_from_arrow_type(t: pa.DataType) -> Union[type, np.dtype]:
8384 raise TypeError ("Unsupported Arrow type" ) from exc
8485
8586
86- def get_arrow_type_from_tiledb_dtype (tiledb_dtype : np .dtype ) -> pa .DataType :
87+ def get_arrow_type_from_tiledb_dtype (tiledb_dtype : Union [ str , np .dtype ] ) -> pa .DataType :
8788 """
8889 TODO: COMMENT
8990 """
90- if tiledb_dtype .name == "bytes" :
91+ if tiledb_dtype == "ascii" or tiledb_dtype .name == "bytes" :
9192 # XXX TODO: temporary work-around until UTF8 support is native. GH #338.
9293 return pa .string ()
9394 else :
@@ -119,22 +120,3 @@ def get_arrow_schema_from_tiledb_uri(
119120 arrow_schema_dict [name ] = get_arrow_type_from_tiledb_dtype (attr .dtype )
120121
121122 return pa .schema (arrow_schema_dict )
122-
123-
124- def ascii_to_unicode_pyarrow_readback (table : pa .Table ) -> pa .Table :
125- """
126- Implements the 'decode on read' part of our ASCII/Unicode logic
127- """
128- # TODO: COMMENT/LINK HEAVILY
129- names = [ofield .name for ofield in table .schema ]
130- new_fields = []
131- for name in names :
132- old_field = table [name ]
133- if len (old_field ) > 0 and isinstance (old_field [0 ], pa .LargeBinaryScalar ):
134- nfield = pa .array (
135- [element .as_py ().decode ("utf-8" ) for element in old_field ]
136- )
137- new_fields .append (nfield )
138- else :
139- new_fields .append (old_field )
140- return pa .Table .from_arrays (new_fields , names = names )
0 commit comments