rebase prep

johnkerl · johnkerl · commit 10d0d92f96c1 · 2022-10-04T11:21:05.000-04:00
diff --git a/apis/python/src/tiledbsoma/soma_dataframe.py b/apis/python/src/tiledbsoma/soma_dataframe.py
@@ -5,11 +5,8 @@
 import pyarrow as pa
 import tiledb
 
-import tiledbsoma.libtiledbsoma as clib
-
 from . import util, util_arrow, util_tiledb
 from .logging import log_io
-from .query_condition import QueryCondition
 from .soma_collection import SOMACollectionBase
 from .tiledb_array import TileDBArray
 from .types import Ids, NTuple, SOMAResultOrder
@@ -160,55 +157,6 @@ def is_indexed(self) -> Literal[False]:
     def get_index_column_names(self) -> Sequence[str]:
         return []
 
-    def read_using_lib_temp(
-        self,
-        *,
-        # TODO: find the right syntax to get the typechecker to accept args like ``ids=slice(0,10)``
-        # ids: Optional[Union[Sequence[int], Slice]] = None,
-        ids: Optional[Any] = None,
-        value_filter: Optional[str] = None,
-        column_names: Optional[Sequence[str]] = None,
-        result_order: Optional[str] = None,
-        # TODO: batch_size
-        # TODO: partition,
-        # TODO: platform_config,
-    ) -> Iterator[pa.Table]:
-        """
-        TODO: copy the text
-        """
-
-        with self._tiledb_open("r") as A:
-            dim_names, attr_names = util_tiledb.split_column_names(
-                A.schema, column_names
-            )
-
-            query_condition = None
-            if value_filter is not None:
-                # query_condition = tiledb.QueryCondition(value_filter)
-                query_condition = QueryCondition(value_filter)
-
-            # As an arg to this method, `column_names` is optional-None. For the pybind11
-            # code it's optional-[].
-            lib_column_names = [] if column_names is None else column_names
-
-            sr = clib.SOMAReader(
-                self._uri,
-                name=self.name,
-                schema=A.schema,  # query_condition needs this
-                column_names=lib_column_names,
-                query_condition=query_condition,
-            )
-
-            # TODO: platform_config
-            # TODO: batch_size
-            # TODO: result_order
-
-            sr.submit()
-
-            while arrow_table := sr.read_next():
-                # yield util_arrow.ascii_to_unicode_pyarrow_readback(batch)
-                yield arrow_table  # XXX what other post-processing
-
     def read(
         self,
         *,
@@ -270,12 +218,7 @@ def read(
                 iterator = query.df[ids]
 
             for table in iterator:
-                # XXX COMMENT MORE
-                # This is the 'decode on read' part of our logic; in dim_select we have the
-                # 'encode on write' part.
-                # Context: https://github.com/single-cell-data/TileDB-SOMA/issues/99.
-                #
-                # Also: don't materialize these on read
+                # Don't materialize these on read
                 # TODO: get the arrow syntax for drop
                 # df.drop(ROWID, axis=1)
                 yield table
@@ -295,7 +238,7 @@ def read_all(
         # TODO: platform_config,
     ) -> pa.Table:
         """
-        This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the table-pieces found. Its nominal use is to simply unit-test cases.
+        This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the table-pieces found. Its nominal use is to simplify unit-test cases.
         """
         return pa.concat_tables(
             self.read(
@@ -412,11 +355,6 @@ def read_as_pandas(
 
             for df in iterator:
 
-                # This is the 'decode on read' part of our logic; in dim_select we have the 'encode on
-                # write' part.
-                # Context: https://github.com/single-cell-data/TileDB-SOMA/issues/99.
-                df = df
-
                 if id_column_name is not None:
                     df.reset_index(inplace=True)
                     df.set_index(id_column_name, inplace=True)
diff --git a/apis/python/src/tiledbsoma/soma_indexed_dataframe.py b/apis/python/src/tiledbsoma/soma_indexed_dataframe.py
@@ -259,7 +259,7 @@ def read(
                 iterator = query.df[ids]
 
             for table in iterator:
-                yield table
+                yield df
 
     def read_all(
         self,
@@ -275,17 +275,17 @@ def read_all(
         # TODO: platform_config,
     ) -> pa.Table:
         """
-        This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the table-pieces found. Its nominal use is to simply unit-test cases.
+        This is a convenience method around ``read``. It iterates the return value from ``read`` and returns a concatenation of all the record batches found. Its nominal use is to simplify unit-test cases.
         """
         return pa.concat_tables(
             self.read(ids=ids, value_filter=value_filter, column_names=column_names)
         )
 
-    def write(self, values: pa.Table) -> None:
+    def write(self, values: pa.RecordBatch) -> None:
         """
-        Write an Arrow.Table to the persistent object. As duplicate index values are not allowed, index values already present in the object are overwritten and new index values are added.
+        Write an Arrow.RecordBatch to the persistent object. As duplicate index values are not allowed, index values already present in the object are overwritten and new index values are added.
 
-        :param values: An Arrow.Table containing all columns, including the index columns. The schema for the values must match the schema for the ``SOMAIndexedDataFrame``.
+        :param values: An Arrow.RecordBatch containing all columns, including the index columns. The schema for the values must match the schema for the ``SOMAIndexedDataFrame``.
         """
         self._shape = None  # cache-invalidate