Skip to content

Commit abc6768

Browse files
committed
Port the essenceof #360
1 parent f1de98d commit abc6768

File tree

3 files changed

+81
-66
lines changed

3 files changed

+81
-66
lines changed

apis/python/src/tiledbsoma/soma_dataframe.py

Lines changed: 31 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,12 @@
55
import pyarrow as pa
66
import tiledb
77

8-
from . import util_arrow, util_tiledb
8+
# This package's pybind11 code
9+
import tiledbsoma.libtiledbsoma as clib
10+
11+
from . import util, util_arrow
912
from .constants import SOMA_JOINID, SOMA_ROWID
13+
from .query_condition import QueryCondition # type: ignore
1014
from .soma_collection import SOMACollectionBase
1115
from .tiledb_array import TileDBArray
1216
from .types import Ids, SOMAResultOrder
@@ -150,40 +154,35 @@ def read(
150154
151155
**Indexing**: the ``ids`` parameter will support, per dimension: a row offset (uint), a row-offset range (slice), or a list of both.
152156
"""
153-
tiledb_result_order = util_tiledb.tiledb_result_order_from_soma_result_order(
154-
result_order, accept=["rowid-ordered", "unordered"]
155-
)
156-
157157
with self._tiledb_open("r") as A:
158-
dim_names, attr_names = util_tiledb.split_column_names(
159-
A.schema, column_names
158+
query_condition = None
159+
if value_filter is not None:
160+
query_condition = QueryCondition(value_filter)
161+
162+
# As an arg to this method, `column_names` is optional-None. For the pybind11
163+
# code it's optional-[].
164+
lib_column_names = [] if column_names is None else column_names
165+
166+
sr = clib.SOMAReader(
167+
self._uri,
168+
name=self.__class__.__name__,
169+
schema=A.schema, # query_condition needs this
170+
column_names=lib_column_names,
171+
query_condition=query_condition,
160172
)
161-
if value_filter is None:
162-
query = A.query(
163-
return_arrow=True,
164-
return_incomplete=True,
165-
order=tiledb_result_order,
166-
dims=dim_names,
167-
attrs=attr_names,
168-
)
169-
else:
170-
qc = tiledb.QueryCondition(value_filter)
171-
query = A.query(
172-
return_arrow=True,
173-
return_incomplete=True,
174-
attr_cond=qc,
175-
order=tiledb_result_order,
176-
dims=dim_names,
177-
attrs=attr_names,
178-
)
179-
180-
if ids is None:
181-
iterator = query.df[:]
182-
else:
183-
iterator = query.df[ids]
184173

185-
for table in iterator:
186-
yield table
174+
if ids is not None:
175+
# XXX TODO NEEDS TO ALWAYS BE A LIST NO MATTER WHAT
176+
if isinstance(ids, slice):
177+
ids = util.slice_to_list(ids)
178+
sr.set_dim_points(SOMA_ROWID, ids)
179+
# TODO: platform_config
180+
# TODO: batch_size
181+
# TODO: result_order
182+
sr.submit()
183+
184+
while arrow_table := sr.read_next():
185+
yield arrow_table # XXX what other post-processing
187186

188187
def read_all(
189188
self,

apis/python/src/tiledbsoma/soma_indexed_dataframe.py

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,12 @@
55
import pyarrow as pa
66
import tiledb
77

8-
from . import util_arrow, util_tiledb
8+
# This package's pybind11 code
9+
import tiledbsoma.libtiledbsoma as clib
10+
11+
from . import util, util_arrow
912
from .constants import SOMA_JOINID
13+
from .query_condition import QueryCondition # type: ignore
1014
from .soma_collection import SOMACollectionBase
1115
from .tiledb_array import TileDBArray
1216
from .types import Ids, SOMAResultOrder
@@ -182,41 +186,37 @@ def read(
182186
183187
**Indexing**: the ``ids`` parameter will support, per dimension: a list of values of the type of the indexed column.
184188
"""
185-
tiledb_result_order = util_tiledb.tiledb_result_order_from_soma_result_order(
186-
result_order, accept=["row-major", "column-major", "unordered"]
187-
)
188-
189-
# TODO: more about index_column_names
190189
with self._tiledb_open("r") as A:
191-
dim_names, attr_names = util_tiledb.split_column_names(
192-
A.schema, column_names
190+
query_condition = None
191+
if value_filter is not None:
192+
query_condition = QueryCondition(value_filter)
193+
194+
# As an arg to this method, `column_names` is optional-None. For the pybind11
195+
# code it's optional-[].
196+
lib_column_names = [] if column_names is None else column_names
197+
198+
sr = clib.SOMAReader(
199+
self._uri,
200+
name=self.__class__.__name__,
201+
schema=A.schema, # query_condition needs this
202+
column_names=lib_column_names,
203+
query_condition=query_condition,
193204
)
194-
if value_filter is None:
195-
query = A.query(
196-
return_arrow=True,
197-
return_incomplete=True,
198-
order=tiledb_result_order,
199-
dims=dim_names,
200-
attrs=attr_names,
201-
)
202-
else:
203-
qc = tiledb.QueryCondition(value_filter)
204-
query = A.query(
205-
return_arrow=True,
206-
return_incomplete=True,
207-
attr_cond=qc,
208-
order=tiledb_result_order,
209-
dims=dim_names,
210-
attrs=attr_names,
211-
)
212-
213-
if ids is None:
214-
iterator = query.df[:]
215-
else:
216-
iterator = query.df[ids]
217205

218-
for table in iterator:
219-
yield table
206+
if ids is not None:
207+
# XXX TODO NEEDS TO ALWAYS BE A LIST NO MATTER WHAT
208+
if isinstance(ids, slice):
209+
ids = util.slice_to_list(ids)
210+
sr.set_dim_points(A.schema.domain.dim(0).name, ids)
211+
212+
# TODO: platform_config
213+
# TODO: batch_size
214+
# TODO: result_order
215+
216+
sr.submit()
217+
218+
while arrow_table := sr.read_next():
219+
yield arrow_table # XXX what other post-processing
220220

221221
def read_all(
222222
self,

apis/python/src/tiledbsoma/util.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pathlib
22
import time
33
import urllib.parse
4-
from typing import TypeVar
4+
from typing import List, TypeVar
55

66
import numpy as np
77
import pandas as pd
@@ -91,3 +91,19 @@ def uri_joinpath(base: str, path: str) -> str:
9191
parts[2] = parts[2] + "/" + path
9292

9393
return urllib.parse.urlunparse(parts)
94+
95+
96+
def slice_to_list(sl: slice) -> List[int]:
97+
"""
98+
TODO: COMMENT GOES HERE
99+
"""
100+
# xxx note tiledb doesn't support stepped slices -- only 1 (and maybe -1)?
101+
assert isinstance(sl, slice)
102+
step = sl.step
103+
if step is None:
104+
if sl.start <= sl.stop:
105+
step = 1
106+
else:
107+
step = -1
108+
stop = sl.stop + step
109+
return list(range(sl.start, stop, step))

0 commit comments

Comments
 (0)