Skip to content

Commit 64ae175

Browse files
authored
[r] Enhance support for (partial) table extraction (#389)
* work towards direct import of columns in c++, general edits * generalize to get list of columns in single call * extract recordbatch by selected columns * one char typo fix in comment
1 parent 43bc041 commit 64ae175

File tree

8 files changed

+283
-75
lines changed

8 files changed

+283
-75
lines changed

apis/r/DESCRIPTION

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ Authors@R: c(
2323
person(given = "TileDB, Inc.",
2424
role = c("cph", "fnd"))
2525
)
26-
URL: https://github.com/single-cell-data/TileDB-SingleCell
27-
BugReports: https://github.com/single-cell-data/TileDB-SingleCell/issues
26+
URL: https://github.com/single-cell-data/TileDB-SOMA
27+
BugReports: https://github.com/single-cell-data/TileDB-SOMA/issues
2828
License: MIT + file LICENSE
2929
Encoding: UTF-8
3030
Imports:
@@ -42,7 +42,8 @@ Imports:
4242
arch
4343
LinkingTo:
4444
Rcpp,
45-
RcppSpdlog
45+
RcppSpdlog,
46+
arch
4647
Additional_repositories: https://ghrr.github.io/drat
4748
Roxygen: list(markdown = TRUE)
4849
RoxygenNote: 7.2.1

apis/r/NAMESPACE

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,18 @@ export(SOMADataFrame)
44
export(SOMAIndexedDataFrame)
55
export(TileDBArray)
66
export(TileDBObject)
7-
export(getColumn)
8-
export(getTable)
7+
export(export_column_direct)
8+
export(export_recordbatch)
9+
export(get_column)
10+
export(get_column_names)
11+
export(get_table)
912
importFrom(Rcpp,evalCpp)
13+
importFrom(arch,arch_allocate_array_data)
14+
importFrom(arch,arch_allocate_schema)
15+
importFrom(arch,arch_array)
16+
importFrom(arch,arch_schema_info)
17+
importFrom(arch,as_arch_array_stream)
18+
importFrom(arch,from_arch_array)
1019
importFrom(bit64,as.integer64)
1120
importFrom(bit64,lim.integer64)
1221
importFrom(glue,glue_collapse)

apis/r/R/RcppExports.R

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,25 @@
11
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
22
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
33

4-
test_sdf <- function(uri) {
5-
invisible(.Call(`_tiledbsoma_test_sdf`, uri))
6-
}
7-
4+
#' @rdname get_table
5+
#' @export
86
get_column_names <- function(uri) {
97
.Call(`_tiledbsoma_get_column_names`, uri)
108
}
119

12-
export_column <- function(uri, name, schemaxp, arrayxp) {
13-
.Call(`_tiledbsoma_export_column`, uri, name, schemaxp, arrayxp)
10+
export_column <- function(uri, colname, schemaxp, arrayxp) {
11+
.Call(`_tiledbsoma_export_column`, uri, colname, schemaxp, arrayxp)
12+
}
13+
14+
#' @rdname get_table
15+
#' @export
16+
export_column_direct <- function(uri, colnames) {
17+
.Call(`_tiledbsoma_export_column_direct`, uri, colnames)
1418
}
19+
20+
#' @rdname get_table
21+
#' @export
22+
export_recordbatch <- function(uri, colnames) {
23+
.Call(`_tiledbsoma_export_recordbatch`, uri, colnames)
24+
}
25+

apis/r/R/getData.R

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,32 @@
55
##'
66
##' @param uri Character value with URI path to a SOMA data set
77
##' @param column Character value with the name of the column to retrieve
8+
##' @param colnames Vector of character value with the name of the columns to retrieve
89
##' @return The selected data frame (or column) from the given data set
910
##' @examples
1011
##' \dontrun{
1112
##' uri <- "test/soco/pbmc3k_processed/obs"
1213
##' column <- "n_counts"
13-
##' summary(getColumn(uri, column)
14-
##' summary(getTable(uri))
14+
##' summary(get_table(uri))
15+
##' summary(get_columns(uri, column))
16+
##' columns <- c("n_genes", "louvain")
17+
##' z <- export_recordbatch(uri, columns)
18+
##' rb <- arch::from_arch_array(z, arrow::RecordBatch)
19+
##' z <- export_recordbatch(uri, columns)
20+
##' tb <- arrow::as_arrow_table(arch::from_arch_array(z, arrow::RecordBatch))
1521
##' }
22+
##' @importFrom arch arch_allocate_schema arch_allocate_array_data arch_array as_arch_array_stream from_arch_array arch_schema_info
1623
##' @export
17-
getColumn <- function(uri, column) {
24+
get_table <- function(uri) {
25+
colnames <- get_column_names(uri)
26+
ll <- lapply(colnames, function(n) get_column(uri, n))
27+
names(ll) <- colnames
28+
as.data.frame(ll)
29+
}
30+
31+
##' @rdname get_table
32+
##' @export
33+
get_column <- function(uri, column) {
1834
schema <- arch::arch_allocate_schema()
1935
array <- arch::arch_allocate_array_data()
2036
## modeled after libtiledb_query_export_buffer_arch_pointers
@@ -24,15 +40,6 @@ getColumn <- function(uri, column) {
2440
res
2541
}
2642

27-
##' @rdname getColumn
28-
##' @export
29-
getTable <- function(uri) {
30-
colnames <- get_column_names(uri)
31-
ll <- lapply(colnames, \(n) getColumn(uri, n))
32-
names(ll) <- colnames
33-
as.data.frame(ll)
34-
}
35-
3643
##' @importFrom Rcpp evalCpp
3744
##' @useDynLib tiledbsoma, .registration=TRUE
3845
NULL

apis/r/man/getColumn.Rd

Lines changed: 0 additions & 31 deletions
This file was deleted.

apis/r/man/get_table.Rd

Lines changed: 47 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apis/r/src/RcppExports.cpp

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,6 @@ Rcpp::Rostream<true>& Rcpp::Rcout = Rcpp::Rcpp_cout_get();
1010
Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
1111
#endif
1212

13-
// test_sdf
14-
void test_sdf(const std::string& uri);
15-
RcppExport SEXP _tiledbsoma_test_sdf(SEXP uriSEXP) {
16-
BEGIN_RCPP
17-
Rcpp::RNGScope rcpp_rngScope_gen;
18-
Rcpp::traits::input_parameter< const std::string& >::type uri(uriSEXP);
19-
test_sdf(uri);
20-
return R_NilValue;
21-
END_RCPP
22-
}
2313
// get_column_names
2414
std::vector<std::string> get_column_names(const std::string& uri);
2515
RcppExport SEXP _tiledbsoma_get_column_names(SEXP uriSEXP) {
@@ -32,24 +22,49 @@ BEGIN_RCPP
3222
END_RCPP
3323
}
3424
// export_column
35-
bool export_column(const std::string& uri, const std::string& name, SEXP schemaxp, SEXP arrayxp);
36-
RcppExport SEXP _tiledbsoma_export_column(SEXP uriSEXP, SEXP nameSEXP, SEXP schemaxpSEXP, SEXP arrayxpSEXP) {
25+
bool export_column(const std::string& uri, const std::string& colname, SEXP schemaxp, SEXP arrayxp);
26+
RcppExport SEXP _tiledbsoma_export_column(SEXP uriSEXP, SEXP colnameSEXP, SEXP schemaxpSEXP, SEXP arrayxpSEXP) {
3727
BEGIN_RCPP
3828
Rcpp::RObject rcpp_result_gen;
3929
Rcpp::RNGScope rcpp_rngScope_gen;
4030
Rcpp::traits::input_parameter< const std::string& >::type uri(uriSEXP);
41-
Rcpp::traits::input_parameter< const std::string& >::type name(nameSEXP);
31+
Rcpp::traits::input_parameter< const std::string& >::type colname(colnameSEXP);
4232
Rcpp::traits::input_parameter< SEXP >::type schemaxp(schemaxpSEXP);
4333
Rcpp::traits::input_parameter< SEXP >::type arrayxp(arrayxpSEXP);
44-
rcpp_result_gen = Rcpp::wrap(export_column(uri, name, schemaxp, arrayxp));
34+
rcpp_result_gen = Rcpp::wrap(export_column(uri, colname, schemaxp, arrayxp));
35+
return rcpp_result_gen;
36+
END_RCPP
37+
}
38+
// export_column_direct
39+
SEXP export_column_direct(const std::string& uri, const std::vector<std::string>& colnames);
40+
RcppExport SEXP _tiledbsoma_export_column_direct(SEXP uriSEXP, SEXP colnamesSEXP) {
41+
BEGIN_RCPP
42+
Rcpp::RObject rcpp_result_gen;
43+
Rcpp::RNGScope rcpp_rngScope_gen;
44+
Rcpp::traits::input_parameter< const std::string& >::type uri(uriSEXP);
45+
Rcpp::traits::input_parameter< const std::vector<std::string>& >::type colnames(colnamesSEXP);
46+
rcpp_result_gen = Rcpp::wrap(export_column_direct(uri, colnames));
47+
return rcpp_result_gen;
48+
END_RCPP
49+
}
50+
// export_recordbatch
51+
SEXP export_recordbatch(const std::string& uri, const std::vector<std::string>& colnames);
52+
RcppExport SEXP _tiledbsoma_export_recordbatch(SEXP uriSEXP, SEXP colnamesSEXP) {
53+
BEGIN_RCPP
54+
Rcpp::RObject rcpp_result_gen;
55+
Rcpp::RNGScope rcpp_rngScope_gen;
56+
Rcpp::traits::input_parameter< const std::string& >::type uri(uriSEXP);
57+
Rcpp::traits::input_parameter< const std::vector<std::string>& >::type colnames(colnamesSEXP);
58+
rcpp_result_gen = Rcpp::wrap(export_recordbatch(uri, colnames));
4559
return rcpp_result_gen;
4660
END_RCPP
4761
}
4862

4963
static const R_CallMethodDef CallEntries[] = {
50-
{"_tiledbsoma_test_sdf", (DL_FUNC) &_tiledbsoma_test_sdf, 1},
5164
{"_tiledbsoma_get_column_names", (DL_FUNC) &_tiledbsoma_get_column_names, 1},
5265
{"_tiledbsoma_export_column", (DL_FUNC) &_tiledbsoma_export_column, 4},
66+
{"_tiledbsoma_export_column_direct", (DL_FUNC) &_tiledbsoma_export_column_direct, 2},
67+
{"_tiledbsoma_export_recordbatch", (DL_FUNC) &_tiledbsoma_export_recordbatch, 2},
5368
{NULL, NULL, 0}
5469
};
5570

0 commit comments

Comments
 (0)