|
1 | 1 | from concurrent.futures import ThreadPoolExecutor |
2 | 2 | from typing import Optional, Sequence, Set, Tuple, Union |
3 | 3 |
|
4 | | -import numpy as np |
5 | 4 | import pandas as pd |
6 | 5 | import pyarrow as pa |
7 | 6 | import tiledb |
@@ -74,10 +73,15 @@ def ids(self) -> Sequence[str]: |
74 | 73 | Returns the `obs_ids` in the matrix (for `obs`) or the `var_ids` (for `var`). |
75 | 74 | """ |
76 | 75 | with self._open("r") as A: |
77 | | - # TileDB string dims are ASCII not UTF-8. Decode them so they readback |
78 | | - # not like `b"AKR1C3"` but rather like `"AKR1C3"`. |
| 76 | + # TileDB string dims are ASCII not UTF-8. Decode them so they readback not like |
| 77 | + # `b"AKR1C3"` but rather like `"AKR1C3"`. Update as of |
| 78 | + # https://github.com/TileDB-Inc/TileDB-Py/pull/1304 these dims will read back OK. |
79 | 79 | retval = A.query(attrs=[], dims=[self.dim_name])[:][self.dim_name].tolist() |
80 | | - return [e.decode() for e in retval] |
| 80 | + if len(retval) > 0 and isinstance(retval[0], bytes): |
| 81 | + return [e.decode() for e in retval] |
| 82 | + else: |
| 83 | + # list(...) is there to appease the linter which thinks we're returning `Any` |
| 84 | + return list(retval) |
81 | 85 |
|
82 | 86 | # ---------------------------------------------------------------- |
83 | 87 | def __repr__(self) -> str: |
@@ -377,7 +381,7 @@ def from_dataframe(self, dataframe: pd.DataFrame, extent: int = 2048) -> None: |
377 | 381 | dfc = dataframe[column_name] |
378 | 382 | if len(dfc) > 0 and type(dfc[0]) == str: |
379 | 383 | # Force ASCII storage if string, in order to make obs/var columns queryable. |
380 | | - column_types[column_name] = np.dtype("S") |
| 384 | + column_types[column_name] = "ascii" |
381 | 385 |
|
382 | 386 | tiledb.from_pandas( |
383 | 387 | uri=self.uri, |
|
0 commit comments