|
4 | 4 | import pandas as pd |
5 | 5 | import pyarrow as pa |
6 | 6 | import tiledb |
| 7 | +import tiledbsoma.libtiledbsoma as clib |
7 | 8 |
|
8 | 9 | from . import util, util_arrow, util_tiledb |
9 | 10 | from .logging import log_io |
@@ -157,6 +158,55 @@ def is_indexed(self) -> Literal[False]: |
157 | 158 | def get_index_column_names(self) -> Sequence[str]: |
158 | 159 | return [] |
159 | 160 |
|
| 161 | + def read_using_lib_temp( |
| 162 | + self, |
| 163 | + *, |
| 164 | + # TODO: find the right syntax to get the typechecker to accept args like ``ids=slice(0,10)`` |
| 165 | + # ids: Optional[Union[Sequence[int], Slice]] = None, |
| 166 | + ids: Optional[Any] = None, |
| 167 | + value_filter: Optional[str] = None, |
| 168 | + column_names: Optional[Sequence[str]] = None, |
| 169 | + result_order: Optional[str] = None, |
| 170 | + # TODO: batch_size |
| 171 | + # TODO: partition, |
| 172 | + # TODO: platform_config, |
| 173 | + ) -> Iterator[pa.Table]: |
| 174 | + """ |
| 175 | + TODO: copy the text |
| 176 | + """ |
| 177 | + |
| 178 | + with self._tiledb_open("r") as A: |
| 179 | + dim_names, attr_names = util_tiledb.split_column_names( |
| 180 | + A.schema, column_names |
| 181 | + ) |
| 182 | + |
| 183 | + query_condition = None |
| 184 | + if value_filter is not None: |
| 185 | + # query_condition = tiledb.QueryCondition(value_filter) |
| 186 | + query_condition = QueryCondition(value_filter) |
| 187 | + |
| 188 | + # As an arg to this method, `column_names` is optional-None. For the pybind11 |
| 189 | + # code it's optional-[]. |
| 190 | + lib_column_names = [] if column_names is None else column_names |
| 191 | + |
| 192 | + sr = clib.SOMAReader( |
| 193 | + self._uri, |
| 194 | + name=self.name, |
| 195 | + schema=A.schema, # query_condition needs this |
| 196 | + column_names=lib_column_names, |
| 197 | + query_condition=query_condition, |
| 198 | + ) |
| 199 | + |
| 200 | + # TODO: platform_config |
| 201 | + # TODO: batch_size |
| 202 | + # TODO: result_order |
| 203 | + |
| 204 | + sr.submit() |
| 205 | + |
| 206 | + while arrow_table := sr.read_next(): |
| 207 | + # yield util_arrow.ascii_to_unicode_pyarrow_readback(batch) |
| 208 | + yield arrow_table # XXX what other post-processing |
| 209 | + |
160 | 210 | def read( |
161 | 211 | self, |
162 | 212 | *, |
|
0 commit comments