Skip to content

Commit 10d0d92

Browse files
committed
rebase prep
1 parent e70523f commit 10d0d92

File tree

2 files changed

+7
-69
lines changed

2 files changed

+7
-69
lines changed

apis/python/src/tiledbsoma/soma_dataframe.py

Lines changed: 2 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,8 @@
55
import pyarrow as pa
66
import tiledb
77

8-
import tiledbsoma.libtiledbsoma as clib
9-
108
from . import util, util_arrow, util_tiledb
119
from .logging import log_io
12-
from .query_condition import QueryCondition
1310
from .soma_collection import SOMACollectionBase
1411
from .tiledb_array import TileDBArray
1512
from .types import Ids, NTuple, SOMAResultOrder
@@ -160,55 +157,6 @@ def is_indexed(self) -> Literal[False]:
160157
def get_index_column_names(self) -> Sequence[str]:
161158
return []
162159

163-
def read_using_lib_temp(
164-
self,
165-
*,
166-
# TODO: find the right syntax to get the typechecker to accept args like ``ids=slice(0,10)``
167-
# ids: Optional[Union[Sequence[int], Slice]] = None,
168-
ids: Optional[Any] = None,
169-
value_filter: Optional[str] = None,
170-
column_names: Optional[Sequence[str]] = None,
171-
result_order: Optional[str] = None,
172-
# TODO: batch_size
173-
# TODO: partition,
174-
# TODO: platform_config,
175-
) -> Iterator[pa.Table]:
176-
"""
177-
TODO: copy the text
178-
"""
179-
180-
with self._tiledb_open("r") as A:
181-
dim_names, attr_names = util_tiledb.split_column_names(
182-
A.schema, column_names
183-
)
184-
185-
query_condition = None
186-
if value_filter is not None:
187-
# query_condition = tiledb.QueryCondition(value_filter)
188-
query_condition = QueryCondition(value_filter)
189-
190-
# As an arg to this method, `column_names` is optional-None. For the pybind11
191-
# code it's optional-[].
192-
lib_column_names = [] if column_names is None else column_names
193-
194-
sr = clib.SOMAReader(
195-
self._uri,
196-
name=self.name,
197-
schema=A.schema, # query_condition needs this
198-
column_names=lib_column_names,
199-
query_condition=query_condition,
200-
)
201-
202-
# TODO: platform_config
203-
# TODO: batch_size
204-
# TODO: result_order
205-
206-
sr.submit()
207-
208-
while arrow_table := sr.read_next():
209-
# yield util_arrow.ascii_to_unicode_pyarrow_readback(batch)
210-
yield arrow_table # XXX what other post-processing
211-
212160
def read(
213161
self,
214162
*,
@@ -270,12 +218,7 @@ def read(
270218
iterator = query.df[ids]
271219

272220
for table in iterator:
273-
# XXX COMMENT MORE
274-
# This is the 'decode on read' part of our logic; in dim_select we have the
275-
# 'encode on write' part.
276-
# Context: https://github.com/single-cell-data/TileDB-SOMA/issues/99.
277-
#
278-
# Also: don't materialize these on read
221+
# Don't materialize these on read
279222
# TODO: get the arrow syntax for drop
280223
# df.drop(ROWID, axis=1)
281224
yield table
@@ -295,7 +238,7 @@ def read_all(
295238
# TODO: platform_config,
296239
) -> pa.Table:
297240
"""
298-
This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the table-pieces found. Its nominal use is to simply unit-test cases.
241+
This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the table-pieces found. Its nominal use is to simplify unit-test cases.
299242
"""
300243
return pa.concat_tables(
301244
self.read(
@@ -412,11 +355,6 @@ def read_as_pandas(
412355

413356
for df in iterator:
414357

415-
# This is the 'decode on read' part of our logic; in dim_select we have the 'encode on
416-
# write' part.
417-
# Context: https://github.com/single-cell-data/TileDB-SOMA/issues/99.
418-
df = df
419-
420358
if id_column_name is not None:
421359
df.reset_index(inplace=True)
422360
df.set_index(id_column_name, inplace=True)

apis/python/src/tiledbsoma/soma_indexed_dataframe.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ def read(
259259
iterator = query.df[ids]
260260

261261
for table in iterator:
262-
yield table
262+
yield df
263263

264264
def read_all(
265265
self,
@@ -275,17 +275,17 @@ def read_all(
275275
# TODO: platform_config,
276276
) -> pa.Table:
277277
"""
278-
This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the table-pieces found. Its nominal use is to simply unit-test cases.
278+
This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the record batches found. Its nominal use is to simplify unit-test cases.
279279
"""
280280
return pa.concat_tables(
281281
self.read(ids=ids, value_filter=value_filter, column_names=column_names)
282282
)
283283

284-
def write(self, values: pa.Table) -> None:
284+
def write(self, values: pa.RecordBatch) -> None:
285285
"""
286-
Write an Arrow.Table to the persistent object. As duplicate index values are not allowed, index values already present in the object are overwritten and new index values are added.
286+
Write an Arrow.RecordBatch to the persistent object. As duplicate index values are not allowed, index values already present in the object are overwritten and new index values are added.
287287
288-
:param values: An Arrow.Table containing all columns, including the index columns. The schema for the values must match the schema for the ``SOMAIndexedDataFrame``.
288+
:param values: An Arrow.RecordBatch containing all columns, including the index columns. The schema for the values must match the schema for the ``SOMAIndexedDataFrame``.
289289
"""
290290
self._shape = None # cache-invalidate
291291

0 commit comments

Comments
 (0)