@@ -53,12 +53,14 @@ def __init__(
5353 * For reading from an already-populated SOMA, we wish to avoid cache-coherency issues.
5454 """
5555 super ().__init__ (uri = uri , name = name , parent = parent )
56+ s0 = self .timing_start ("__init__" , "total" )
5657
5758 self .row_dim_name = row_dim_name
5859 self .col_dim_name = col_dim_name
5960 self .attr_name = "value"
6061 self .row_dataframe = row_dataframe
6162 self .col_dataframe = col_dataframe
63+ self .timing_end (s0 )
6264
6365 # ----------------------------------------------------------------
6466 def shape (self ) -> Tuple [int , int ]:
@@ -69,12 +71,15 @@ def shape(self) -> Tuple[int, int]:
6971
7072 Note: currently implemented via data scan --- will be optimized for TileDB core 2.10.
7173 """
74+ s1 = self .timing_start ("shape" , "total" )
7275 with self ._open ():
7376 # These TileDB arrays are string-dimensioned sparse arrays so there is no '.shape'.
7477 # Instead we compute it ourselves. See also:
7578 num_rows = self .row_dataframe .shape ()[0 ]
7679 num_cols = self .col_dataframe .shape ()[0 ]
77- return (num_rows , num_cols )
80+ retval = (num_rows , num_cols )
81+ self .timing_end (s1 )
82+ return retval
7883
7984 # ----------------------------------------------------------------
8085 def dim_select (
@@ -89,7 +94,13 @@ def dim_select(
8994 Either or both of the ID lists may be ``None``, meaning, do not subselect along
9095 that dimension. If both ID lists are ``None``, the entire matrix is returned.
9196 """
97+ s0 = self .timing_start ("dim_select" , "open" )
98+
99+ s1 = self .timing_start ("dim_select" , "open" )
92100 with tiledb .open (self .uri , ctx = self ._ctx ) as A :
101+ self .timing_end (s1 )
102+
103+ s2 = self .timing_start ("dim_select" , "tiledb_query" )
93104 query = A .query (return_arrow = return_arrow )
94105 if obs_ids is None :
95106 if var_ids is None :
@@ -101,8 +112,14 @@ def dim_select(
101112 df = query .df [obs_ids , :]
102113 else :
103114 df = query .df [obs_ids , var_ids ]
115+ self .timing_end (s2 )
116+
117+ s3 = self .timing_start ("dim_select" , "set_index" )
104118 if not return_arrow :
105119 df .set_index ([self .row_dim_name , self .col_dim_name ], inplace = True )
120+ self .timing_end (s3 )
121+
122+ self .timing_end (s0 )
106123 return df
107124
108125 # ----------------------------------------------------------------
@@ -126,15 +143,21 @@ def csr(
126143 """
127144 Like ``.df()`` but returns results in ``scipy.sparse.csr_matrix`` format.
128145 """
129- return self ._csr_or_csc ("csr" , obs_ids , var_ids )
146+ s0 = self .timing_start ("csr" , "total" )
147+ retval = self ._csr_or_csc ("csr" , obs_ids , var_ids )
148+ self .timing_end (s0 )
149+ return retval
130150
131151 def csc (
132152 self , obs_ids : Optional [Ids ] = None , var_ids : Optional [Ids ] = None
133153 ) -> sp .csc_matrix :
134154 """
135155 Like ``.df()`` but returns results in ``scipy.sparse.csc_matrix`` format.
136156 """
137- return self ._csr_or_csc ("csc" , obs_ids , var_ids )
157+ s0 = self .timing_start ("csc" , "total" )
158+ retval = self ._csr_or_csc ("csc" , obs_ids , var_ids )
159+ self .timing_end (s0 )
160+ return retval
138161
139162 def _csr_or_csc (
140163 self ,
@@ -168,6 +191,7 @@ def from_matrix_and_dim_values(
168191 ``scipy.sparse.csr_matrix``, ``scipy.sparse.csc_matrix``, ``numpy.ndarray``, etc.
169192 For ingest from ``AnnData``, these should be ``ann.obs_names`` and ``ann.var_names``.
170193 """
194+ s0 = self .timing_start ("from_matrix_and_dim_values" , "total" )
171195
172196 s = util .get_start_stamp ()
173197 log_io (
@@ -206,11 +230,14 @@ def from_matrix_and_dim_values(
206230 util .format_elapsed (s , f"{ self ._indent } FINISH WRITING { self .uri } " ),
207231 )
208232
233+ self .timing_end (s0 )
234+
209235 # ----------------------------------------------------------------
210236 def _create_empty_array (self , matrix_dtype : np .dtype ) -> None :
211237 """
212238 Create a TileDB 2D sparse array with string dimensions and a single attribute.
213239 """
240+ s0 = self .timing_start ("_create_empty_array" , "total" )
214241
215242 dom = tiledb .Domain (
216243 tiledb .Dim (
@@ -248,6 +275,7 @@ def _create_empty_array(self, matrix_dtype: np.dtype) -> None:
248275 )
249276
250277 tiledb .Array .create (self .uri , sch , ctx = self ._ctx )
278+ self .timing_end (s0 )
251279
252280 # ----------------------------------------------------------------
253281 def ingest_data_whole (
@@ -264,6 +292,7 @@ def ingest_data_whole(
264292 :param row_names: List of row names.
265293 :param col_names: List of column names.
266294 """
295+ s0 = self .timing_start ("ingest_data_whole" , "total" )
267296
268297 assert len (row_names ) == matrix .shape [0 ]
269298 assert len (col_names ) == matrix .shape [1 ]
@@ -274,6 +303,7 @@ def ingest_data_whole(
274303
275304 with tiledb .open (self .uri , mode = "w" , ctx = self ._ctx ) as A :
276305 A [d0 , d1 ] = mat_coo .data
306+ self .timing_end (s0 )
277307
278308 # ----------------------------------------------------------------
279309 # Example: suppose this 4x3 is to be written in two chunks of two rows each
@@ -321,7 +351,9 @@ def ingest_data_rows_chunked(
321351 :param row_names: List of row names.
322352 :param col_names: List of column names.
323353 """
354+ s0 = self .timing_start ("ingest_data_rows_chunked" , "total" )
324355
356+ s1 = self .timing_start ("ingest_data_rows_chunked" , "sortprep" )
325357 assert len (row_names ) == matrix .shape [0 ]
326358 assert len (col_names ) == matrix .shape [1 ]
327359
@@ -341,13 +373,20 @@ def ingest_data_rows_chunked(
341373 f"{ self ._indent } START ingest_data_rows_chunked" ,
342374 )
343375
376+ self .timing_end (s1 )
377+
344378 eta_tracker = util .ETATracker ()
379+ s2 = self .timing_start ("ingest_data_rows_chunked" , "open" )
345380 with tiledb .open (self .uri , mode = "w" , ctx = self ._ctx ) as A :
381+ self .timing_end (s2 )
382+
346383 nrow = len (sorted_row_names )
347384
348385 i = 0
349386 while i < nrow :
350387 t1 = time .time ()
388+
389+ s3 = self .timing_start ("ingest_data_rows_chunked" , "chunkprep" )
351390 # Find a number of CSR rows which will result in a desired nnz for the chunk.
352391 chunk_size = util ._find_csr_chunk_size (
353392 matrix , permutation , i , self ._soma_options .goal_chunk_nnz
@@ -360,6 +399,7 @@ def ingest_data_rows_chunked(
360399 # Write the chunk-COO to TileDB.
361400 d0 = sorted_row_names [chunk_coo .row + i ]
362401 d1 = col_names [chunk_coo .col ]
402+ self .timing_end (s3 )
363403
364404 if len (d0 ) == 0 :
365405 i = i2
@@ -385,7 +425,9 @@ def ingest_data_rows_chunked(
385425 )
386426
387427 # Write a TileDB fragment
428+ s4 = self .timing_start ("ingest_data_rows_chunked" , "tiledb-write" )
388429 A [d0 , d1 ] = chunk_coo .data
430+ self .timing_end (s4 )
389431
390432 t2 = time .time ()
391433 chunk_seconds = t2 - t1
@@ -408,6 +450,7 @@ def ingest_data_rows_chunked(
408450 f"{ self ._indent } FINISH __ingest_coo_data_string_dims_rows_chunked" ,
409451 ),
410452 )
453+ self .timing_end (s0 )
411454
412455 # This method is very similar to ingest_data_rows_chunked. The code is largely repeated,
413456 # and this is intentional. The algorithm here is non-trivial (among the most non-trivial
@@ -427,6 +470,7 @@ def ingest_data_cols_chunked(
427470 :param row_names: List of row names.
428471 :param col_names: List of column names.
429472 """
473+ s0 = self .timing_start ("ingest_data_cols_chunked" , "total" )
430474
431475 assert len (row_names ) == matrix .shape [0 ]
432476 assert len (col_names ) == matrix .shape [1 ]
@@ -514,6 +558,7 @@ def ingest_data_cols_chunked(
514558 f"{ self ._indent } FINISH __ingest_coo_data_string_dims_rows_chunked" ,
515559 ),
516560 )
561+ self .timing_end (s0 )
517562
518563 # This method is very similar to ingest_data_rows_chunked. The code is largely repeated,
519564 # and this is intentional. The algorithm here is non-trivial (among the most non-trivial
@@ -533,6 +578,7 @@ def ingest_data_dense_rows_chunked(
533578 :param row_names: List of row names.
534579 :param col_names: List of column names.
535580 """
581+ s0 = self .timing_start ("ingest_data_dense_rows_chunked" , "total" )
536582
537583 assert len (row_names ) == matrix .shape [0 ]
538584 assert len (col_names ) == matrix .shape [1 ]
@@ -622,6 +668,7 @@ def ingest_data_dense_rows_chunked(
622668 f"{ self ._indent } FINISH __ingest_coo_data_string_dims_dense_rows_chunked" ,
623669 ),
624670 )
671+ self .timing_end (s0 )
625672
626673 # ----------------------------------------------------------------
627674 def to_csr_matrix (self , row_labels : Labels , col_labels : Labels ) -> sp .csr_matrix :
@@ -633,6 +680,7 @@ def to_csr_matrix(self, row_labels: Labels, col_labels: Labels) -> sp.csr_matrix
633680 be in the same order as they were in any anndata object which was used to create the
634681 TileDB storage.
635682 """
683+ s0 = self .timing_start ("to_csr_matrix" , "total" )
636684
637685 s = util .get_start_stamp ()
638686 log_io (None , f"{ self ._indent } START read { self .uri } " )
@@ -644,4 +692,5 @@ def to_csr_matrix(self, row_labels: Labels, col_labels: Labels) -> sp.csr_matrix
644692 util .format_elapsed (s , f"{ self ._indent } FINISH read { self .uri } " ),
645693 )
646694
695+ self .timing_end (s0 )
647696 return csr
0 commit comments