Skip to content

Commit eda0866

Browse files
committed
fix failing unit test
1 parent 63666d6 commit eda0866

File tree

9 files changed

+39
-377
lines changed

9 files changed

+39
-377
lines changed

apis/python/src/tiledbsoma/annotation_dataframe.py

Lines changed: 6 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,7 @@ def __init__(
3131
"""
3232
assert name in ["obs", "var"]
3333
super().__init__(uri=uri, name=name, parent=parent)
34-
s0 = self.timing_start("__init__", "total")
3534
self.dim_name = name + "_id"
36-
self.timing_end(s0)
3735

3836
# ----------------------------------------------------------------
3937
def shape(self) -> Tuple[int, int]:
@@ -42,10 +40,7 @@ def shape(self) -> Tuple[int, int]:
4240
The row-count is the number of obs_ids (for ``obs``) or the number of var_ids (for ``var``).
4341
The column-count is the number of columns/attributes in the dataframe.
4442
"""
45-
s0 = self.timing_start("shape", "total")
46-
s1 = self.timing_start("shape", "open")
4743
with self._open("r") as A:
48-
self.timing_end(s1)
4944
self.dim_name = A.domain.dim(0).name
5045
# These TileDB arrays are string-dimensioned sparse arrays so there is no '.shape'.
5146
# Instead we compute it ourselves. See also:
@@ -71,32 +66,22 @@ def shape(self) -> Tuple[int, int]:
7166
].tolist()
7267
)
7368
num_cols = A.schema.nattr
74-
self.timing_end(s0)
7569
return (num_rows, num_cols)
7670

7771
# ----------------------------------------------------------------
7872
def ids(self) -> Sequence[str]:
7973
"""
8074
Returns the ``obs_ids`` in the matrix (for ``obs``) or the ``var_ids`` (for ``var``).
8175
"""
82-
s0 = self.timing_start("ids", "total")
83-
s1 = self.timing_start("ids", "open")
8476
with self._open("r") as A:
85-
self.timing_end(s1)
8677
self.dim_name = A.domain.dim(0).name
8778

88-
s2 = self.timing_start("ids", "tiledb_query")
8979
# TileDB string dims are ASCII not UTF-8. Decode them so they readback not like
9080
# `b"AKR1C3"` but rather like `"AKR1C3"`. Update as of
9181
# https://github.com/TileDB-Inc/TileDB-Py/pull/1304 these dims will read back OK.
9282
retval = A.query(attrs=[], dims=[self.dim_name])[:][self.dim_name].tolist()
93-
self.timing_end(s2)
9483

95-
s3 = self.timing_start("ids", "decode")
9684
retval = [e.decode() for e in retval]
97-
self.timing_end(s3)
98-
99-
self.timing_end(s0)
10085

10186
if len(retval) > 0 and isinstance(retval[0], bytes):
10287
return [e.decode() for e in retval]
@@ -124,10 +109,7 @@ def keys(self) -> Sequence[str]:
124109
Returns the column names for the ``obs`` or ``var`` dataframe. For obs and varp, ``.keys()`` is a
125110
keystroke-saver for the more general array-schema accessor ``attr_names``.
126111
"""
127-
s0 = self.timing_start("keys", "total")
128-
retval = self.attr_names()
129-
self.timing_end(s0)
130-
return retval
112+
return self.attr_names()
131113

132114
# ----------------------------------------------------------------
133115
def keyset(self) -> Set[str]:
@@ -149,18 +131,13 @@ def dim_select(
149131
``var``). If ``ids`` is ``None``, the entire dataframe is returned. Similarly, if ``attrs`` are
150132
provided, they're used for the query; else, all attributes are returned.
151133
"""
152-
s0 = self.timing_start("dim_select", "total")
153-
s1 = self.timing_start("dim_select", "open")
154134
with self._open("r") as A:
155-
self.timing_end(s1)
156135
self.dim_name = A.domain.dim(0).name
157-
s2 = self.timing_start("dim_select", "tiledb_query")
158136
query = A.query(return_arrow=return_arrow, attrs=attrs)
159137
if ids is None:
160138
df = query.df[:]
161139
else:
162140
df = query.df[ids]
163-
self.timing_end(s2)
164141

165142
# We do not need this:
166143
# df.set_index(self.dim_name, inplace=True)
@@ -171,24 +148,17 @@ def dim_select(
171148
# so the set_index is already done for us.
172149
#
173150
# However if the data was written somehow else (e.g. by tiledbsoma-r) then we do.
174-
s3 = self.timing_start("dim_select", "set_index")
175151
if not return_arrow:
176152
if isinstance(df.index, pd.RangeIndex) and self.dim_name in df.columns:
177153
df.set_index(self.dim_name, inplace=True)
178-
self.timing_end(s3)
179154

180155
# TODO: when UTF-8 attributes are queryable using TileDB-Py's QueryCondition API we can remove this.
181156
# This is the 'decode on read' part of our logic; in from_dataframe we have the 'encode on write' part.
182157
# Context: https://github.com/single-cell-data/TileDB-SingleCell/issues/99.
183-
s4 = self.timing_start("dim_select", "ascii_to_unicode")
184158
if return_arrow:
185-
retval = self._ascii_to_unicode_arrow_readback(df)
159+
return self._ascii_to_unicode_arrow_readback(df)
186160
else:
187-
retval = self._ascii_to_unicode_pandas_readback(df)
188-
self.timing_end(s4)
189-
190-
self.timing_end(s0)
191-
return retval
161+
return self._ascii_to_unicode_pandas_readback(df)
192162

193163
# ----------------------------------------------------------------
194164
def df(
@@ -223,12 +193,9 @@ def query(
223193
if query_string is None:
224194
return self.dim_select(ids, attrs=attrs, return_arrow=return_arrow)
225195

226-
s0 = self.timing_start("query", "total")
227-
retval = self._query_aux(
196+
return self._query_aux(
228197
query_string=query_string, ids=ids, attrs=attrs, return_arrow=return_arrow
229198
)
230-
self.timing_end(s0)
231-
return retval
232199

233200
def _query_aux(
234201
self,
@@ -243,11 +210,8 @@ def _query_aux(
243210
elapsed-time stats in a call to this helper.
244211
"""
245212

246-
s1 = self.timing_start("query", "open")
247213
with self._open() as A:
248-
self.timing_end(s1)
249214
self.dim_name = A.domain.dim(0).name
250-
s2 = self.timing_start("query", "tiledb_query")
251215
qc = tiledb.QueryCondition(query_string)
252216
if attrs is None:
253217
slice_query = A.query(attr_cond=qc, return_arrow=return_arrow)
@@ -263,7 +227,6 @@ def _query_aux(
263227
df = slice_query.df[:]
264228
else:
265229
df = slice_query.df[ids]
266-
self.timing_end(s2)
267230

268231
# We do not need this:
269232
# df.set_index(self.dim_name, inplace=True)
@@ -274,22 +237,16 @@ def _query_aux(
274237
# so the set_index is already done for us.
275238
#
276239
# However if the data was written somehow else (e.g. by tiledbsoma-r) then we do.
277-
s3 = self.timing_start("query", "set_index")
278240
if not return_arrow:
279241
if isinstance(df.index, pd.RangeIndex) and self.dim_name in df.columns:
280242
df.set_index(self.dim_name, inplace=True)
281243
# This is the 'decode on read' part of our logic; in dim_select we have the 'encode on write' part.
282244
# Context: https://github.com/single-cell-data/TileDB-SingleCell/issues/99.
283-
self.timing_end(s3)
284245

285-
s4 = self.timing_start("query", "ascii_to_unicode")
286246
if return_arrow:
287-
retval = self._ascii_to_unicode_arrow_readback(df)
247+
return self._ascii_to_unicode_arrow_readback(df)
288248
else:
289-
retval = self._ascii_to_unicode_pandas_readback(df)
290-
self.timing_end(s4)
291-
292-
return retval
249+
return self._ascii_to_unicode_pandas_readback(df)
293250

294251
# ----------------------------------------------------------------
295252
def _ascii_to_unicode_pandas_series_readback(
@@ -378,7 +335,6 @@ def from_dataframe(self, dataframe: pd.DataFrame, extent: int = 2048) -> None:
378335
:param dataframe: ``anndata.obs``, ``anndata.var``, ``anndata.raw.var``.
379336
:param extent: TileDB ``extent`` parameter for the array schema.
380337
"""
381-
s0 = self.timing_start("from_dataframe", "total")
382338

383339
offsets_filters = tiledb.FilterList(
384340
[tiledb.PositiveDeltaFilter(), tiledb.ZstdFilter(level=-1)]
@@ -474,5 +430,3 @@ def from_dataframe(self, dataframe: pd.DataFrame, extent: int = 2048) -> None:
474430
f"Wrote {self.nested_name}",
475431
util.format_elapsed(s, f"{self._indent}FINISH WRITING {self.uri}"),
476432
)
477-
478-
self.timing_end(s0)

apis/python/src/tiledbsoma/annotation_matrix.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ def shape(self) -> Tuple[int, int]:
4242
4343
Note: currently implemented via data scan --- will be optimized in an upcoming TileDB Core release.
4444
"""
45-
s0 = self.timing_start("shape", "total")
4645
with self._open() as A:
4746
# These TileDB arrays are string-dimensioned sparse arrays so there is no '.shape'.
4847
# Instead we compute it ourselves. See also:
@@ -65,7 +64,6 @@ def shape(self) -> Tuple[int, int]:
6564
].tolist()
6665
)
6766
num_cols = A.schema.nattr
68-
self.timing_end(s0)
6967
return (num_rows, num_cols)
7068

7169
# ----------------------------------------------------------------
@@ -79,7 +77,6 @@ def dim_select(
7977
Selects a slice out of the array with specified ``obs_ids`` (for ``obsm`` elements) or
8078
``var_ids`` (for ``varm`` elements). If ``ids`` is ``None``, the entire array is returned.
8179
"""
82-
s0 = self.timing_start("dim_select", "total")
8380
if ids is None:
8481
with self._open() as A:
8582
query = A.query(return_arrow=return_arrow)
@@ -90,7 +87,6 @@ def dim_select(
9087
df = query.df[ids]
9188
if not return_arrow:
9289
df.set_index(self.dim_name, inplace=True)
93-
self.timing_end(s0)
9490
return df
9591

9692
# ----------------------------------------------------------------
@@ -116,8 +112,6 @@ def from_matrix_and_dim_values(
116112
:param matrix: ``anndata.obsm['foo']``, ``anndata.varm['foo']``, or ``anndata.raw.varm['foo']``.
117113
:param dim_values: ``anndata.obs_names``, ``anndata.var_names``, or ``anndata.raw.var_names``.
118114
"""
119-
s0 = self.timing_start("from_matrix_and_dim_values", "total")
120-
121115
s = util.get_start_stamp()
122116
log_io(None, f"{self._indent}START WRITING {self.uri}")
123117

@@ -132,13 +126,11 @@ def from_matrix_and_dim_values(
132126
f"Wrote {self.nested_name}",
133127
util.format_elapsed(s, f"{self._indent}FINISH WRITING {self.uri}"),
134128
)
135-
self.timing_end(s0)
136129

137130
# ----------------------------------------------------------------
138131
def _numpy_ndarray_or_scipy_sparse_csr_matrix(
139132
self, matrix: Matrix, dim_values: Labels
140133
) -> None:
141-
s0 = self.timing_start("_numpy_ndarray_or_scipy_sparse_csr_matrix", "total")
142134
# We do not have column names for anndata-provenance annotation matrices.
143135
# So, if say we're looking at anndata.obsm['X_pca'], we create column names
144136
# 'X_pca_1', 'X_pca_2', etc.
@@ -154,11 +146,9 @@ def _numpy_ndarray_or_scipy_sparse_csr_matrix(
154146
df = pd.DataFrame(matrix, columns=attr_names)
155147
with tiledb.open(self.uri, mode="w", ctx=self._ctx) as A:
156148
A[dim_values] = df.to_dict(orient="list")
157-
self.timing_end(s0)
158149

159150
# ----------------------------------------------------------------
160151
def _from_pandas_dataframe(self, df: pd.DataFrame, dim_values: Labels) -> None:
161-
s0 = self.timing_start("_from_pandas_dataframe", "total")
162152
attr_names = df.columns.values.tolist()
163153

164154
# Ingest annotation matrices as 1D/multi-attribute sparse arrays
@@ -169,7 +159,6 @@ def _from_pandas_dataframe(self, df: pd.DataFrame, dim_values: Labels) -> None:
169159

170160
with tiledb.open(self.uri, mode="w", ctx=self._ctx) as A:
171161
A[dim_values] = df.to_dict(orient="list")
172-
self.timing_end(s0)
173162

174163
# ----------------------------------------------------------------
175164
def _create_empty_array(
@@ -182,7 +171,6 @@ def _create_empty_array(
182171
repeated once per column. For pandas.DataFrame, there is a dtype per column.
183172
:param attr_names: column names for the dataframe
184173
"""
185-
s0 = self.timing_start("_create_empty_array", "total")
186174

187175
# Nominally 'obs_id' or 'var_id'
188176
level = self._soma_options.string_dim_zstd_level
@@ -225,5 +213,3 @@ def _create_empty_array(
225213
)
226214

227215
tiledb.Array.create(self.uri, sch, ctx=self._ctx)
228-
229-
self.timing_end(s0)

0 commit comments

Comments
 (0)