Skip to content

Commit 70844be

Browse files
committed
rebase prep
1 parent d1de0f2 commit 70844be

File tree

2 files changed

+14
-62
lines changed

2 files changed

+14
-62
lines changed

apis/python/src/tiledbsoma/soma_dataframe.py

Lines changed: 5 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,8 @@
55
import pyarrow as pa
66
import tiledb
77

8-
import tiledbsoma.libtiledbsoma as clib
9-
10-
from . import util, util_arrow, util_tiledb
8+
from . import util, util_arrow, util_pandas, util_tiledb
119
from .logging import log_io
12-
from .query_condition import QueryCondition
1310
from .soma_collection import SOMACollectionBase
1411
from .tiledb_array import TileDBArray
1512
from .types import Ids, NTuple, SOMAResultOrder
@@ -160,55 +157,6 @@ def is_indexed(self) -> Literal[False]:
160157
def get_index_column_names(self) -> Sequence[str]:
161158
return []
162159

163-
def read_using_lib_temp(
164-
self,
165-
*,
166-
# TODO: find the right syntax to get the typechecker to accept args like ``ids=slice(0,10)``
167-
# ids: Optional[Union[Sequence[int], Slice]] = None,
168-
ids: Optional[Any] = None,
169-
value_filter: Optional[str] = None,
170-
column_names: Optional[Sequence[str]] = None,
171-
result_order: Optional[str] = None,
172-
# TODO: batch_size
173-
# TODO: partition,
174-
# TODO: platform_config,
175-
) -> Iterator[pa.Table]:
176-
"""
177-
TODO: copy the text
178-
"""
179-
180-
with self._tiledb_open("r") as A:
181-
dim_names, attr_names = util_tiledb.split_column_names(
182-
A.schema, column_names
183-
)
184-
185-
query_condition = None
186-
if value_filter is not None:
187-
# query_condition = tiledb.QueryCondition(value_filter)
188-
query_condition = QueryCondition(value_filter)
189-
190-
# As an arg to this method, `column_names` is optional-None. For the pybind11
191-
# code it's optional-[].
192-
lib_column_names = [] if column_names is None else column_names
193-
194-
sr = clib.SOMAReader(
195-
self._uri,
196-
name=self.name,
197-
schema=A.schema, # query_condition needs this
198-
column_names=lib_column_names,
199-
query_condition=query_condition,
200-
)
201-
202-
# TODO: platform_config
203-
# TODO: batch_size
204-
# TODO: result_order
205-
206-
sr.submit()
207-
208-
while arrow_table := sr.read_next():
209-
# yield util_arrow.ascii_to_unicode_pyarrow_readback(batch)
210-
yield arrow_table # XXX what other post-processing
211-
212160
def read(
213161
self,
214162
*,
@@ -278,7 +226,7 @@ def read(
278226
# Also: don't materialize these on read
279227
# TODO: get the arrow syntax for drop
280228
# df.drop(ROWID, axis=1)
281-
yield table
229+
yield util_arrow.ascii_to_unicode_pyarrow_readback(table)
282230

283231
def read_all(
284232
self,
@@ -295,7 +243,7 @@ def read_all(
295243
# TODO: platform_config,
296244
) -> pa.Table:
297245
"""
298-
This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the table-pieces found. Its nominal use is to simply unit-test cases.
246+
This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the table-pieces found. Its nominal use is to simplify unit-test cases.
299247
"""
300248
return pa.concat_tables(
301249
self.read(
@@ -415,7 +363,7 @@ def read_as_pandas(
415363
# This is the 'decode on read' part of our logic; in dim_select we have the 'encode on
416364
# write' part.
417365
# Context: https://github.com/single-cell-data/TileDB-SOMA/issues/99.
418-
df = df
366+
df = util_pandas.ascii_to_unicode_pandas_readback(df)
419367

420368
if id_column_name is not None:
421369
df.reset_index(inplace=True)
@@ -529,7 +477,7 @@ def write_from_pandas(
529477
dfc = dataframe[column_name]
530478
if len(dfc) > 0 and type(dfc[0]) == str:
531479
# Force ASCII storage if string, in order to make obs/var columns queryable.
532-
column_types[column_name] = "ascii"
480+
column_types[column_name] = np.dtype("S")
533481

534482
tiledb.from_pandas(
535483
uri=self.uri,

apis/python/src/tiledbsoma/soma_indexed_dataframe.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,11 @@ def read(
259259
iterator = query.df[ids]
260260

261261
for table in iterator:
262-
yield table
262+
# XXX COMMENT MORE
263+
# This is the 'decode on read' part of our logic; in dim_select we have the
264+
# 'encode on write' part.
265+
# Context: # https://github.com/single-cell-data/TileDB-SOMA/issues/99.
266+
yield util_arrow.ascii_to_unicode_pyarrow_readback(table)
263267

264268
def read_all(
265269
self,
@@ -275,17 +279,17 @@ def read_all(
275279
# TODO: platform_config,
276280
) -> pa.Table:
277281
"""
278-
This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the table-pieces found. Its nominal use is to simply unit-test cases.
282+
This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the record batches found. Its nominal use is to simplify unit-test cases.
279283
"""
280284
return pa.concat_tables(
281285
self.read(ids=ids, value_filter=value_filter, column_names=column_names)
282286
)
283287

284-
def write(self, values: pa.Table) -> None:
288+
def write(self, values: pa.RecordBatch) -> None:
285289
"""
286-
Write an Arrow.Table to the persistent object. As duplicate index values are not allowed, index values already present in the object are overwritten and new index values are added.
290+
Write an Arrow.RecordBatch to the persistent object. As duplicate index values are not allowed, index values already present in the object are overwritten and new index values are added.
287291
288-
:param values: An Arrow.Table containing all columns, including the index columns. The schema for the values must match the schema for the ``SOMAIndexedDataFrame``.
292+
:param values: An Arrow.RecordBatch containing all columns, including the index columns. The schema for the values must match the schema for the ``SOMAIndexedDataFrame``.
289293
"""
290294
self._shape = None # cache-invalidate
291295

0 commit comments

Comments
 (0)