Skip to content

Commit f6bb262

Browse files
committed
Iterating from SOMADataFrame
1 parent c89409a commit f6bb262

File tree

3 files changed

+52
-1
lines changed

3 files changed

+52
-1
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,5 +52,6 @@ apis/python/src/tiledbsoma/libtiledb.*
5252
apis/python/src/tiledbsoma/libtiledbsoma.*
5353

5454
/.quarto/
55+
/tags
5556

5657
/NOTES/

apis/python/src/tiledbsoma/soma_dataframe.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pandas as pd
55
import pyarrow as pa
66
import tiledb
7+
import tiledbsoma.libtiledbsoma as clib
78

89
from . import util, util_arrow, util_tiledb
910
from .logging import log_io
@@ -157,6 +158,55 @@ def is_indexed(self) -> Literal[False]:
157158
def get_index_column_names(self) -> Sequence[str]:
158159
return []
159160

161+
def read_using_lib_temp(
162+
self,
163+
*,
164+
# TODO: find the right syntax to get the typechecker to accept args like ``ids=slice(0,10)``
165+
# ids: Optional[Union[Sequence[int], Slice]] = None,
166+
ids: Optional[Any] = None,
167+
value_filter: Optional[str] = None,
168+
column_names: Optional[Sequence[str]] = None,
169+
result_order: Optional[str] = None,
170+
# TODO: batch_size
171+
# TODO: partition,
172+
# TODO: platform_config,
173+
) -> Iterator[pa.Table]:
174+
"""
175+
TODO: copy the text
176+
"""
177+
178+
with self._tiledb_open("r") as A:
179+
dim_names, attr_names = util_tiledb.split_column_names(
180+
A.schema, column_names
181+
)
182+
183+
query_condition = None
184+
if value_filter is not None:
185+
# query_condition = tiledb.QueryCondition(value_filter)
186+
query_condition = QueryCondition(value_filter)
187+
188+
# As an arg to this method, `column_names` is optional-None. For the pybind11
189+
# code it's optional-[].
190+
lib_column_names = [] if column_names is None else column_names
191+
192+
sr = clib.SOMAReader(
193+
self._uri,
194+
name=self.name,
195+
schema=A.schema, # query_condition needs this
196+
column_names=lib_column_names,
197+
query_condition=query_condition,
198+
)
199+
200+
# TODO: platform_config
201+
# TODO: batch_size
202+
# TODO: result_order
203+
204+
sr.submit()
205+
206+
while arrow_table := sr.read_next():
207+
# yield util_arrow.ascii_to_unicode_pyarrow_readback(batch)
208+
yield arrow_table # XXX what other post-processing
209+
160210
def read(
161211
self,
162212
*,

apis/python/tests/test_soma_dataframe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def _check_tbl(tbl, col_names, ids):
192192
print(r)
193193
print("INPUT SCHEMA")
194194
print(i)
195-
print("CMP", r==i)
195+
print("CMP", r == i)
196196

197197
print("")
198198

0 commit comments

Comments
 (0)