@@ -53,12 +53,14 @@ def __init__(
5353 * For reading from an already-populated SOMA, we wish to avoid cache-coherency issues.
5454 """
5555 super ().__init__ (uri = uri , name = name , parent = parent )
56+ s0 = self .timing_start ("__init__" , "total" )
5657
5758 self .row_dim_name = row_dim_name
5859 self .col_dim_name = col_dim_name
5960 self .attr_name = "value"
6061 self .row_dataframe = row_dataframe
6162 self .col_dataframe = col_dataframe
63+ self .timing_end (s0 )
6264
6365 # ----------------------------------------------------------------
6466 def shape (self ) -> Tuple [int , int ]:
@@ -69,12 +71,15 @@ def shape(self) -> Tuple[int, int]:
6971
7072 Note: currently implemented via data scan -- will be optimized for TileDB core 2.10.
7173 """
74+ s1 = self .timing_start ("shape" , "total" )
7275 with self ._open ():
7376 # These TileDB arrays are string-dimensioned sparse arrays so there is no '.shape'.
7477 # Instead we compute it ourselves. See also:
7578 num_rows = self .row_dataframe .shape ()[0 ]
7679 num_cols = self .col_dataframe .shape ()[0 ]
77- return (num_rows , num_cols )
80+ retval = (num_rows , num_cols )
81+ self .timing_end (s1 )
82+ return retval
7883
7984 # ----------------------------------------------------------------
8085 def dim_select (
@@ -89,7 +94,13 @@ def dim_select(
8994 Either or both of the ID lists may be `None`, meaning, do not subselect along
9095 that dimension. If both ID lists are `None`, the entire matrix is returned.
9196 """
97+ s0 = self .timing_start ("dim_select" , "open" )
98+
99+ s1 = self .timing_start ("dim_select" , "open" )
92100 with tiledb .open (self .uri , ctx = self ._ctx ) as A :
101+ self .timing_end (s1 )
102+
103+ s2 = self .timing_start ("dim_select" , "tiledb_query" )
93104 query = A .query (return_arrow = return_arrow )
94105 if obs_ids is None :
95106 if var_ids is None :
@@ -101,8 +112,14 @@ def dim_select(
101112 df = query .df [obs_ids , :]
102113 else :
103114 df = query .df [obs_ids , var_ids ]
115+ self .timing_end (s2 )
116+
117+ s3 = self .timing_start ("dim_select" , "set_index" )
104118 if not return_arrow :
105119 df .set_index ([self .row_dim_name , self .col_dim_name ], inplace = True )
120+ self .timing_end (s3 )
121+
122+ self .timing_end (s0 )
106123 return df
107124
108125 # ----------------------------------------------------------------
@@ -126,15 +143,21 @@ def csr(
126143 """
127144 Like `.df()` but returns results in `scipy.sparse.csr_matrix` format.
128145 """
129- return self ._csr_or_csc ("csr" , obs_ids , var_ids )
146+ s0 = self .timing_start ("csr" , "total" )
147+ retval = self ._csr_or_csc ("csr" , obs_ids , var_ids )
148+ self .timing_end (s0 )
149+ return retval
130150
131151 def csc (
132152 self , obs_ids : Optional [Ids ] = None , var_ids : Optional [Ids ] = None
133153 ) -> sp .csc_matrix :
134154 """
135155 Like `.df()` but returns results in `scipy.sparse.csc_matrix` format.
136156 """
137- return self ._csr_or_csc ("csc" , obs_ids , var_ids )
157+ s0 = self .timing_start ("csc" , "total" )
158+ retval = self ._csr_or_csc ("csc" , obs_ids , var_ids )
159+ self .timing_end (s0 )
160+ return retval
138161
139162 def _csr_or_csc (
140163 self ,
@@ -168,6 +191,7 @@ def from_matrix_and_dim_values(
168191 `scipy.sparse.csr_matrix`, `scipy.sparse.csc_matrix`, `numpy.ndarray`, etc.
169192 For ingest from `AnnData`, these should be `ann.obs_names` and `ann.var_names`.
170193 """
194+ s0 = self .timing_start ("from_matrix_and_dim_values" , "total" )
171195
172196 s = util .get_start_stamp ()
173197 log_io (
@@ -206,11 +230,14 @@ def from_matrix_and_dim_values(
206230 util .format_elapsed (s , f"{ self ._indent } FINISH WRITING { self .uri } " ),
207231 )
208232
233+ self .timing_end (s0 )
234+
209235 # ----------------------------------------------------------------
210236 def _create_empty_array (self , matrix_dtype : np .dtype ) -> None :
211237 """
212238 Create a TileDB 2D sparse array with string dimensions and a single attribute.
213239 """
240+ s0 = self .timing_start ("_create_empty_array" , "total" )
214241
215242 level = self ._soma_options .string_dim_zstd_level
216243 dom = tiledb .Domain (
@@ -253,6 +280,7 @@ def _create_empty_array(self, matrix_dtype: np.dtype) -> None:
253280 )
254281
255282 tiledb .Array .create (self .uri , sch , ctx = self ._ctx )
283+ self .timing_end (s0 )
256284
257285 # ----------------------------------------------------------------
258286 def ingest_data_whole (
@@ -269,6 +297,7 @@ def ingest_data_whole(
269297 :param row_names: List of row names.
270298 :param col_names: List of column names.
271299 """
300+ s0 = self .timing_start ("ingest_data_whole" , "total" )
272301
273302 assert len (row_names ) == matrix .shape [0 ]
274303 assert len (col_names ) == matrix .shape [1 ]
@@ -279,6 +308,7 @@ def ingest_data_whole(
279308
280309 with tiledb .open (self .uri , mode = "w" , ctx = self ._ctx ) as A :
281310 A [d0 , d1 ] = mat_coo .data
311+ self .timing_end (s0 )
282312
283313 # ----------------------------------------------------------------
284314 # Example: suppose this 4x3 is to be written in two chunks of two rows each
@@ -326,7 +356,9 @@ def ingest_data_rows_chunked(
326356 :param row_names: List of row names.
327357 :param col_names: List of column names.
328358 """
359+ s0 = self .timing_start ("ingest_data_rows_chunked" , "total" )
329360
361+ s1 = self .timing_start ("ingest_data_rows_chunked" , "sortprep" )
330362 assert len (row_names ) == matrix .shape [0 ]
331363 assert len (col_names ) == matrix .shape [1 ]
332364
@@ -346,13 +378,20 @@ def ingest_data_rows_chunked(
346378 f"{ self ._indent } START ingest_data_rows_chunked" ,
347379 )
348380
381+ self .timing_end (s1 )
382+
349383 eta_tracker = util .ETATracker ()
384+ s2 = self .timing_start ("ingest_data_rows_chunked" , "open" )
350385 with tiledb .open (self .uri , mode = "w" , ctx = self ._ctx ) as A :
386+ self .timing_end (s2 )
387+
351388 nrow = len (sorted_row_names )
352389
353390 i = 0
354391 while i < nrow :
355392 t1 = time .time ()
393+
394+ s3 = self .timing_start ("ingest_data_rows_chunked" , "chunkprep" )
356395 # Find a number of CSR rows which will result in a desired nnz for the chunk.
357396 chunk_size = util ._find_csr_chunk_size (
358397 matrix , permutation , i , self ._soma_options .goal_chunk_nnz
@@ -365,6 +404,7 @@ def ingest_data_rows_chunked(
365404 # Write the chunk-COO to TileDB.
366405 d0 = sorted_row_names [chunk_coo .row + i ]
367406 d1 = col_names [chunk_coo .col ]
407+ self .timing_end (s3 )
368408
369409 if len (d0 ) == 0 :
370410 i = i2
@@ -390,7 +430,9 @@ def ingest_data_rows_chunked(
390430 )
391431
392432 # Write a TileDB fragment
433+ s4 = self .timing_start ("ingest_data_rows_chunked" , "tiledb-write" )
393434 A [d0 , d1 ] = chunk_coo .data
435+ self .timing_end (s4 )
394436
395437 t2 = time .time ()
396438 chunk_seconds = t2 - t1
@@ -413,6 +455,7 @@ def ingest_data_rows_chunked(
413455 f"{ self ._indent } FINISH __ingest_coo_data_string_dims_rows_chunked" ,
414456 ),
415457 )
458+ self .timing_end (s0 )
416459
417460 # This method is very similar to ingest_data_rows_chunked. The code is largely repeated,
418461 # and this is intentional. The algorithm here is non-trivial (among the most non-trivial
@@ -432,6 +475,7 @@ def ingest_data_cols_chunked(
432475 :param row_names: List of row names.
433476 :param col_names: List of column names.
434477 """
478+ s0 = self .timing_start ("ingest_data_cols_chunked" , "total" )
435479
436480 assert len (row_names ) == matrix .shape [0 ]
437481 assert len (col_names ) == matrix .shape [1 ]
@@ -519,6 +563,7 @@ def ingest_data_cols_chunked(
519563 f"{ self ._indent } FINISH __ingest_coo_data_string_dims_rows_chunked" ,
520564 ),
521565 )
566+ self .timing_end (s0 )
522567
523568 # This method is very similar to ingest_data_rows_chunked. The code is largely repeated,
524569 # and this is intentional. The algorithm here is non-trivial (among the most non-trivial
@@ -538,6 +583,7 @@ def ingest_data_dense_rows_chunked(
538583 :param row_names: List of row names.
539584 :param col_names: List of column names.
540585 """
586+ s0 = self .timing_start ("ingest_data_dense_rows_chunked" , "total" )
541587
542588 assert len (row_names ) == matrix .shape [0 ]
543589 assert len (col_names ) == matrix .shape [1 ]
@@ -627,6 +673,7 @@ def ingest_data_dense_rows_chunked(
627673 f"{ self ._indent } FINISH __ingest_coo_data_string_dims_dense_rows_chunked" ,
628674 ),
629675 )
676+ self .timing_end (s0 )
630677
631678 # ----------------------------------------------------------------
632679 def to_csr_matrix (self , row_labels : Labels , col_labels : Labels ) -> sp .csr_matrix :
@@ -638,6 +685,7 @@ def to_csr_matrix(self, row_labels: Labels, col_labels: Labels) -> sp.csr_matrix
638685 be in the same order as they were in any anndata object which was used to create the
639686 TileDB storage.
640687 """
688+ s0 = self .timing_start ("to_csr_matrix" , "total" )
641689
642690 s = util .get_start_stamp ()
643691 log_io (None , f"{ self ._indent } START read { self .uri } " )
@@ -649,4 +697,5 @@ def to_csr_matrix(self, row_labels: Labels, col_labels: Labels) -> sp.csr_matrix
649697 util .format_elapsed (s , f"{ self ._indent } FINISH read { self .uri } " ),
650698 )
651699
700+ self .timing_end (s0 )
652701 return csr
0 commit comments