Update unit tests to reflect feature change

johnkerl · johnkerl · commit f3a0d6255764 · 2022-09-15T21:00:41.000-04:00
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -25,7 +25,7 @@ build:
   tools:
     python: "3.8"
   commands:
-    # `pip install -e .` or `python setup.py develop` will _not_ let python find the tiledbsc package
+    # `pip install -e .` or `python setup.py develop` will _not_ let python find the tiledbsoma package
     # within sphinx build
     #- apt-get install python3-sphinx
     - python -m pip install -r doc/requirements_doc.txt
diff --git a/apis/python/tests/test_ascii_and_unicode.py b/apis/python/tests/test_ascii_and_unicode.py
@@ -1,11 +1,14 @@
 import anndata as ad
 import numpy as np
 import pandas as pd
+import pytest
 
 import tiledbsoma.io as io
 from tiledbsoma import SOMA
 
 
+# TODO: restore once https://github.com/single-cell-data/TileDB-SingleCell/issues/274 is in place.
+@pytest.mark.skip(reason="Unicode attributes temporarily unsupported")
 def test_readback(tmp_path):
     """
     Validate correct encode/decode of non-ASCII attribute text.
diff --git a/apis/python/tests/test_dim_select.py b/apis/python/tests/test_dim_select.py
@@ -140,7 +140,7 @@ def test_dim_select(adata):
         "VDAC3",
     ]
 
-    df = soma.obs.dim_select([b"AAGCGACTTTGACG", b"AATGCGTGGACGGA"])
+    df = soma.obs.dim_select(["AAGCGACTTTGACG", "AATGCGTGGACGGA"])
     assert df.shape == (2, 7)
     assert df.at["AAGCGACTTTGACG", "groups"] == "g1"
     assert df.at["AATGCGTGGACGGA", "nFeature_RNA"] == 73
@@ -150,23 +150,23 @@ def test_dim_select(adata):
     # AATGCGTGGACGGA           0       389.0            73                1              1     g1              1
     assert soma.obs.dim_select(None).shape == (80, 7)
 
-    df = soma.var.dim_select([b"AKR1C3", b"MYL9"])
+    df = soma.var.dim_select(["AKR1C3", "MYL9"])
     assert df.shape == (2, 5)
     assert df.at["AKR1C3", "vst.variable"] == 1
     assert df.at["MYL9", "vst.variable"] == 1
     assert soma.var.dim_select(None).shape == (20, 5)
 
     assert sorted(soma.obsm.keys()) == sorted(["X_tsne", "X_pca"])
 
-    df = soma.obsm["X_tsne"].dim_select([b"AAGCGACTTTGACG", b"AATGCGTGGACGGA"])
+    df = soma.obsm["X_tsne"].dim_select(["AAGCGACTTTGACG", "AATGCGTGGACGGA"])
     assert df.shape == (2, 2)
 
-    df = soma.obsm["X_pca"].dim_select([b"AAGCGACTTTGACG", b"AATGCGTGGACGGA"])
+    df = soma.obsm["X_pca"].dim_select(["AAGCGACTTTGACG", "AATGCGTGGACGGA"])
     assert df.shape == (2, 19)
 
-    assert soma.X["data"].dim_select([b"AAGCGACTTTGACG"], [b"AKR1C3"]).shape == (1, 1)
-    assert soma.X["data"].dim_select(None, [b"AKR1C3"]).shape == (80, 1)
-    assert soma.X["data"].dim_select([b"AAGCGACTTTGACG"], None).shape == (20, 1)
+    assert soma.X["data"].dim_select(["AAGCGACTTTGACG"], ["AKR1C3"]).shape == (1, 1)
+    assert soma.X["data"].dim_select(None, ["AKR1C3"]).shape == (80, 1)
+    assert soma.X["data"].dim_select(["AAGCGACTTTGACG"], None).shape == (20, 1)
     assert soma.X["data"].dim_select(None, None).shape == (1600, 1)
 
     tempdir.cleanup()
@@ -211,7 +211,8 @@ def test_zeroes_handling():
     n_obs = len(obs_ids)
     n_var = len(var_ids)
 
-    cell_types = ["blööd" if obs_id[1] == "A" else "lung" for obs_id in obs_ids]
+    # TODO: restore once https://github.com/single-cell-data/TileDB-SingleCell/issues/274 is in place.
+    cell_types = ["blood" if obs_id[1] == "A" else "lung" for obs_id in obs_ids]
     feature_names = [
         "ENSG00000999999" if var_id[1] < "M" else "ENSG00000123456"
         for var_id in var_ids
diff --git a/apis/python/tests/test_type_diversity.py b/apis/python/tests/test_type_diversity.py
@@ -108,8 +108,10 @@ def test_from_anndata_DataFrame_type(tmp_path):
     df_col_type_sweep = [
         ("bool", lambda a: a.astype(bool)),
         ("str", lambda a: a.astype(str)),
-        ("bytes", lambda a: a.astype(str).astype(bytes)),
-        # ("float16", lambda a: a.astype(np.dtype("float16"))),         TODO: Enable when #39 is fixed
+        # TODO: restore once #274 is in place.
+        # ("bytes", lambda a: a.astype(str).astype(bytes)),
+        # TODO: Enable when #39 is fixed
+        # ("float16", lambda a: a.astype(np.dtype("float16"))),
         ("float32", lambda a: a.astype("float32")),
         ("float64", lambda a: a.astype("float64")),
         ("int8", lambda a: a.astype("int8")),
@@ -147,9 +149,7 @@ def test_from_anndata_DataFrame_type(tmp_path):
             ),
         ),
     ]
-    index = (
-        np.arange(1, n + 1).astype(str).astype(bytes)
-    )  # AnnData requires string indices, TileDB wants bytes. Use LCD
+    index = np.arange(1, n + 1).astype(str).astype(str)
     df = pd.DataFrame(
         data={
             f"col_{name}": cast(pd.Series(index=index, data=np.arange(n)))
@@ -176,7 +176,7 @@ def cmp_dtype(series, tdb: tiledb.Attr) -> bool:
             # TODO: see annotation_dataframe.py. Once Unicode attributes are queryable, we'll need
             # to remove this check which is verifying the current force-to-ASCII workaround.
             if ad_dtype.name == "str":
-                ad_dtype = np.dtype("S")
+                ad_dtype = np.dtype("U")
 
         return ad_dtype == tdb.dtype
 
@@ -212,9 +212,8 @@ def test_from_anndata_annotations_empty(tmp_path):
     n_obs = 100
     n_var = 10
 
-    # AnnData requires a string index. TileDB does not support UTF8, so use ASCII.
-    obs = pd.DataFrame(index=np.arange(n_obs).astype(bytes))
-    var = pd.DataFrame(index=np.arange(n_var).astype(bytes))
+    obs = pd.DataFrame(index=np.arange(n_obs).astype(str))
+    var = pd.DataFrame(index=np.arange(n_var).astype(str))
 
     X = np.ones((n_obs, n_var))
     adata = ad.AnnData(X=X, obs=obs, var=var, dtype=X.dtype)