11from concurrent .futures import ThreadPoolExecutor
22from typing import Optional , Sequence , Set , Tuple , Union
33
4- import numpy as np
54import pandas as pd
65import pyarrow as pa
76import tiledb
@@ -86,9 +85,10 @@ def ids(self) -> Sequence[str]:
8685 self .timing_end (s1 )
8786 self .dim_name = A .domain .dim (0 ).name
8887
89- # TileDB string dims are ASCII not UTF-8. Decode them so they readback
90- # not like `b"AKR1C3"` but rather like `"AKR1C3"`.
9188 s2 = self .timing_start ("ids" , "tiledb_query" )
89+ # TileDB string dims are ASCII not UTF-8. Decode them so they readback not like
90+ # `b"AKR1C3"` but rather like `"AKR1C3"`. Update as of
91+ # https://github.com/TileDB-Inc/TileDB-Py/pull/1304 these dims will read back OK.
9292 retval = A .query (attrs = [], dims = [self .dim_name ])[:][self .dim_name ].tolist ()
9393 self .timing_end (s2 )
9494
@@ -97,7 +97,12 @@ def ids(self) -> Sequence[str]:
9797 self .timing_end (s3 )
9898
9999 self .timing_end (s0 )
100- return list (retval ) # coerce to list to appease the linter
100+
101+ if len (retval ) > 0 and isinstance (retval [0 ], bytes ):
102+ return [e .decode () for e in retval ]
103+ else :
104+ # list(...) is there to appease the linter which thinks we're returning `Any`
105+ return list (retval )
101106
102107 # ----------------------------------------------------------------
103108 def __repr__ (self ) -> str :
@@ -445,7 +450,7 @@ def from_dataframe(self, dataframe: pd.DataFrame, extent: int = 2048) -> None:
445450 dfc = dataframe [column_name ]
446451 if len (dfc ) > 0 and type (dfc [0 ]) == str :
447452 # Force ASCII storage if string, in order to make obs/var columns queryable.
448- column_types [column_name ] = np . dtype ( "S" )
453+ column_types [column_name ] = "ascii"
449454
450455 tiledb .from_pandas (
451456 uri = self .uri ,
0 commit comments