Skip to content

Commit 07c55cf

Browse files
committed
Iterating from SOMADataFrame
1 parent c89409a commit 07c55cf

File tree

3 files changed

+54
-1
lines changed

3 files changed

+54
-1
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,5 +52,6 @@ apis/python/src/tiledbsoma/libtiledb.*
5252
apis/python/src/tiledbsoma/libtiledbsoma.*
5353

5454
/.quarto/
55+
/tags
5556

5657
/NOTES/

apis/python/src/tiledbsoma/soma_dataframe.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,11 @@
55
import pyarrow as pa
66
import tiledb
77

8+
import tiledbsoma.libtiledbsoma as clib
9+
810
from . import util, util_arrow, util_tiledb
911
from .logging import log_io
12+
from .query_condition import QueryCondition
1013
from .soma_collection import SOMACollectionBase
1114
from .tiledb_array import TileDBArray
1215
from .types import Ids, NTuple
@@ -157,6 +160,55 @@ def is_indexed(self) -> Literal[False]:
157160
def get_index_column_names(self) -> Sequence[str]:
158161
return []
159162

163+
def read_using_lib_temp(
164+
self,
165+
*,
166+
# TODO: find the right syntax to get the typechecker to accept args like ``ids=slice(0,10)``
167+
# ids: Optional[Union[Sequence[int], Slice]] = None,
168+
ids: Optional[Any] = None,
169+
value_filter: Optional[str] = None,
170+
column_names: Optional[Sequence[str]] = None,
171+
result_order: Optional[str] = None,
172+
# TODO: batch_size
173+
# TODO: partition,
174+
# TODO: platform_config,
175+
) -> Iterator[pa.Table]:
176+
"""
177+
TODO: copy the text
178+
"""
179+
180+
with self._tiledb_open("r") as A:
181+
dim_names, attr_names = util_tiledb.split_column_names(
182+
A.schema, column_names
183+
)
184+
185+
query_condition = None
186+
if value_filter is not None:
187+
# query_condition = tiledb.QueryCondition(value_filter)
188+
query_condition = QueryCondition(value_filter)
189+
190+
# As an arg to this method, `column_names` is optional-None. For the pybind11
191+
# code it's optional-[].
192+
lib_column_names = [] if column_names is None else column_names
193+
194+
sr = clib.SOMAReader(
195+
self._uri,
196+
name=self.name,
197+
schema=A.schema, # query_condition needs this
198+
column_names=lib_column_names,
199+
query_condition=query_condition,
200+
)
201+
202+
# TODO: platform_config
203+
# TODO: batch_size
204+
# TODO: result_order
205+
206+
sr.submit()
207+
208+
while arrow_table := sr.read_next():
209+
# yield util_arrow.ascii_to_unicode_pyarrow_readback(batch)
210+
yield arrow_table # XXX what other post-processing
211+
160212
def read(
161213
self,
162214
*,

apis/python/tests/test_soma_dataframe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def _check_tbl(tbl, col_names, ids):
192192
print(r)
193193
print("INPUT SCHEMA")
194194
print(i)
195-
print("CMP", r==i)
195+
print("CMP", r == i)
196196

197197
print("")
198198

0 commit comments

Comments
 (0)