11from concurrent .futures import ThreadPoolExecutor
22from typing import Optional , Sequence , Set , Tuple , Union
33
4- import numpy as np
54import pandas as pd
65import pyarrow as pa
76import tiledb
@@ -86,9 +85,10 @@ def ids(self) -> Sequence[str]:
8685 self .timing_end (s1 )
8786 self .dim_name = A .domain .dim (0 ).name
8887
89- # TileDB string dims are ASCII not UTF-8. Decode them so they readback
90- # not like `b"AKR1C3"` but rather like `"AKR1C3"`.
9188 s2 = self .timing_start ("ids" , "tiledb_query" )
89+ # TileDB string dims are ASCII not UTF-8. Decode them so they readback not like
90+ # `b"AKR1C3"` but rather like `"AKR1C3"`. Update as of
91+ # https://github.com/TileDB-Inc/TileDB-Py/pull/1304 these dims will read back OK.
9292 retval = A .query (attrs = [], dims = [self .dim_name ])[:][self .dim_name ].tolist ()
9393 self .timing_end (s2 )
9494
@@ -97,7 +97,12 @@ def ids(self) -> Sequence[str]:
9797 self .timing_end (s3 )
9898
9999 self .timing_end (s0 )
100- return list (retval ) # coerce to list to appease the linter
100+
101+ if len (retval ) > 0 and isinstance (retval [0 ], bytes ):
102+ return [e .decode () for e in retval ]
103+ else :
104+ # list(...) is there to appease the linter which thinks we're returning `Any`
105+ return list (retval )
101106
102107 # ----------------------------------------------------------------
103108 def __repr__ (self ) -> str :
@@ -268,14 +273,8 @@ def _query_aux(
268273 # (('__pandas_index_dims', '{"obs_id": "<U0"}'),)
269274 # so the set_index is already done for us.
270275 #
271- < << << << HEAD :apis / python / src / tiledbsoma / annotation_dataframe .py
272276 # However if the data was written somehow else (e.g. by tiledbsoma-r) then we do.
273- | | | | | | | parent of 19963 aa (tiledbsc - py stats experiment ):apis / python / src / tiledbsc / annotation_dataframe .py
274- # However if the data was written somehow else (e.g. by tiledbscr-r) then we do.
275- == == == =
276- # However if the data was written somehow else (e.g. by tiledbscr-r) then we do.
277277 s3 = self .timing_start ("query" , "set_index" )
278- >> >> >> > 19963 aa (tiledbsc - py stats experiment ):apis / python / src / tiledbsc / annotation_dataframe .py
279278 if not return_arrow :
280279 if isinstance (df .index , pd .RangeIndex ) and self .dim_name in df .columns :
281280 df .set_index (self .dim_name , inplace = True )
@@ -451,7 +450,7 @@ def from_dataframe(self, dataframe: pd.DataFrame, extent: int = 2048) -> None:
451450 dfc = dataframe [column_name ]
452451 if len (dfc ) > 0 and type (dfc [0 ]) == str :
453452 # Force ASCII storage if string, in order to make obs/var columns queryable.
454- column_types [column_name ] = np . dtype ( "S" )
453+ column_types [column_name ] = "ascii"
455454
456455 tiledb .from_pandas (
457456 uri = self .uri ,
0 commit comments