Skip to content

Commit 4abbe3f

Browse files
committed
Update ASCII storage for dataframes
1 parent 4e620ef commit 4abbe3f

1 file changed

Lines changed: 9 additions & 5 deletions

File tree

apis/python/src/tiledbsc/annotation_dataframe.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from concurrent.futures import ThreadPoolExecutor
22
from typing import Optional, Sequence, Set, Tuple, Union
33

4-
import numpy as np
54
import pandas as pd
65
import pyarrow as pa
76
import tiledb
@@ -74,10 +73,15 @@ def ids(self) -> Sequence[str]:
7473
Returns the `obs_ids` in the matrix (for `obs`) or the `var_ids` (for `var`).
7574
"""
7675
with self._open("r") as A:
77-
# TileDB string dims are ASCII not UTF-8. Decode them so they readback
78-
# not like `b"AKR1C3"` but rather like `"AKR1C3"`.
76+
# TileDB string dims are ASCII not UTF-8. Decode them so they readback not like
77+
# `b"AKR1C3"` but rather like `"AKR1C3"`. Update as of
78+
# https://github.com/TileDB-Inc/TileDB-Py/pull/1304 these dims will read back OK.
7979
retval = A.query(attrs=[], dims=[self.dim_name])[:][self.dim_name].tolist()
80-
return [e.decode() for e in retval]
80+
if len(retval) > 0 and isinstance(retval[0], bytes):
81+
return [e.decode() for e in retval]
82+
else:
83+
# list(...) is there to appease the linter which thinks we're returning `Any`
84+
return list(retval)
8185

8286
# ----------------------------------------------------------------
8387
def __repr__(self) -> str:
@@ -377,7 +381,7 @@ def from_dataframe(self, dataframe: pd.DataFrame, extent: int = 2048) -> None:
377381
dfc = dataframe[column_name]
378382
if len(dfc) > 0 and type(dfc[0]) == str:
379383
# Force ASCII storage if string, in order to make obs/var columns queryable.
380-
column_types[column_name] = np.dtype("S")
384+
column_types[column_name] = "ascii"
381385

382386
tiledb.from_pandas(
383387
uri=self.uri,

0 commit comments

Comments
 (0)