Skip to content

Commit 324654b

Browse files
committed
iterate on string vs large_string
1 parent 124e275 commit 324654b

File tree

7 files changed

+21
-9
lines changed

7 files changed

+21
-9
lines changed

apis/python/src/tiledbsoma/util_arrow.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@
2424
# IMPORTANT: ALL non-primitive types supported by TileDB must be in this table.
2525
#
2626
pa.string(): "ascii", # XXX TODO: temporary work-around until UTF8 support is native. GH #338.
27+
pa.large_string(): "ascii", # XXX TODO: temporary work-around until UTF8 support is native. GH #338.
2728
pa.binary(): np.dtype("S"),
29+
pa.large_binary(): np.dtype("S"),
2830
pa.timestamp("s"): "datetime64[s]",
2931
pa.timestamp("ms"): "datetime64[ms]",
3032
pa.timestamp("us"): "datetime64[us]",
@@ -90,7 +92,7 @@ def get_arrow_type_from_tiledb_dtype(tiledb_dtype: Union[str, np.dtype]) -> pa.D
9092
"""
9193
if tiledb_dtype == "ascii" or tiledb_dtype.name == "bytes":
9294
# XXX TODO: temporary work-around until UTF8 support is native. GH #338.
93-
return pa.string()
95+
return pa.large_string()
9496
else:
9597
return pa.from_numpy_dtype(tiledb_dtype)
9698

apis/python/tests/test_soma_collection.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def create_and_populate_dataframe(dataframe: soma.SOMADataFrame) -> None:
1515
[
1616
("foo", pa.int32()),
1717
("bar", pa.float64()),
18-
("baz", pa.string()),
18+
("baz", pa.large_string()),
1919
]
2020
)
2121

@@ -108,7 +108,7 @@ def soma_object(request, tmp_path):
108108

109109
elif class_name == "SOMADataFrame":
110110
so = soma.SOMADataFrame(uri=uri)
111-
so.create(pa.schema([("A", pa.int32()), ("B", pa.string())]))
111+
so.create(pa.schema([("A", pa.int32()), ("B", pa.large_string())]))
112112

113113
elif class_name == "SOMAIndexedDataFrame":
114114
so = soma.SOMAIndexedDataFrame(uri=uri)

apis/python/tests/test_soma_dataframe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def test_soma_dataframe_non_indexed(tmp_path):
1313
[
1414
("foo", pa.int32()),
1515
("bar", pa.float64()),
16-
("baz", pa.string()),
16+
("baz", pa.large_string()),
1717
]
1818
)
1919
sdf.create(schema=asch)
@@ -120,7 +120,7 @@ def simple_soma_data_frame(tmp_path):
120120
("soma_rowid", pa.uint64()),
121121
("A", pa.int64()),
122122
("B", pa.float64()),
123-
("C", pa.string()),
123+
("C", pa.large_string()),
124124
]
125125
)
126126
sdf = t.SOMADataFrame(uri=tmp_path.as_posix())

apis/python/tests/test_soma_experiment_basic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def create_and_populate_obs(obs: soma.SOMADataFrame) -> soma.SOMADataFrame:
1414
[
1515
("foo", pa.int32()),
1616
("bar", pa.float64()),
17-
("baz", pa.string()),
17+
("baz", pa.large_string()),
1818
]
1919
)
2020

@@ -37,7 +37,7 @@ def create_and_populate_var(var: soma.SOMADataFrame) -> soma.SOMADataFrame:
3737

3838
var_arrow_schema = pa.schema(
3939
[
40-
("quux", pa.string()),
40+
("quux", pa.large_string()),
4141
("xyzzy", pa.float64()),
4242
]
4343
)

apis/python/tests/test_soma_indexed_dataframe.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,14 @@ def _schema():
2121
def test_soma_indexed_dataframe(tmp_path, arrow_schema):
2222
sdf = t.SOMAIndexedDataFrame(uri=tmp_path.as_posix())
2323

24+
asch = pa.schema(
25+
[
26+
("foo", pa.int32()),
27+
("bar", pa.float64()),
28+
("baz", pa.large_string()),
29+
]
30+
)
31+
2432
# Create
2533
asch = arrow_schema()
2634
sdf.create(schema=asch, index_column_names=["foo"])
@@ -72,7 +80,7 @@ def simple_soma_indexed_data_frame(tmp_path):
7280
("index", pa.uint64()),
7381
("A", pa.int64()),
7482
("B", pa.float64()),
75-
("C", pa.string()),
83+
("C", pa.large_string()),
7684
]
7785
)
7886
index_column_names = ["index"]

apis/python/tests/test_soma_metadata.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def soma_object(request, tmp_path):
3333

3434
elif class_name == "SOMADataFrame":
3535
so = soma.SOMADataFrame(uri=uri)
36-
so.create(pa.schema([("A", pa.int32()), ("B", pa.string())]))
36+
so.create(pa.schema([("A", pa.int32()), ("B", pa.large_string())]))
3737

3838
elif class_name == "SOMAIndexedDataFrame":
3939
so = soma.SOMAIndexedDataFrame(uri=uri)

apis/python/tests/test_type_system.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@
2525
pa.timestamp("us"),
2626
pa.timestamp("ns"),
2727
pa.string(),
28+
pa.large_string(),
2829
pa.binary(),
30+
pa.large_binary(),
2931
]
3032

3133

0 commit comments

Comments
 (0)