Skip to content

Commit 518fd6f

Browse files
committed
Ready for CRAN
1 parent aca5a1e commit 518fd6f

8 files changed

Lines changed: 189 additions & 22 deletions

File tree

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
Package: filearray
22
Type: Package
33
Title: File-Backed Array for Out-of-Memory Computation
4-
Version: 0.1.3.9001
4+
Version: 0.1.4
55
Language: en-US
66
Encoding: UTF-8
77
License: LGPL-3
8-
URL: http://dipterix.org/filearray/, https://github.com/dipterix/filearray
8+
URL: https://dipterix.org/filearray/, https://github.com/dipterix/filearray
99
BugReports: https://github.com/dipterix/filearray/issues
1010
Authors@R: c(
1111
person(

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ export(filearray_bind)
2020
export(filearray_checkload)
2121
export(filearray_create)
2222
export(filearray_load)
23+
export(filearray_load_or_create)
2324
export(filearray_threads)
2425
export(fmap)
2526
export(fmap2)

NEWS.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# filearray (development version)
22

3+
# filearray 0.1.4
4+
5+
* Fixed a bug when allocated memory is one byte short than requested. The bug would crash R when triggered in certain cases.
6+
* Removed limit to the maximum number of partitions when writing. The previous implementation creates and opens related file descriptors all at once before writing. This setup will raise errors when the number of connections reach to certain limit, often defined by the operating systems. This update only opens the connection on demand. The performance might be impacted when writing to disk, but in return, the program will be more robust
7+
* Fixed `subset` function environment not resolved correctly when using formula
8+
* Added `filearray_load_or_create` as an alternative to `filearray_checkload` by automatically replace existing obsolete array files if the headers, dimensions, or data types don't match. Also `on_missing` argument is provided to allow array initialization if new array is created.
9+
310
# filearray 0.1.3
411

512
* Automatically detect whether symbolic-link works and show warnings

R/class_filearray.R

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,13 +208,16 @@ setRefClass(
208208
}
209209
return(default)
210210
},
211-
set_header = function(key, value){
211+
set_header = function(key, value, save = TRUE){
212212
force(value)
213213
if(key %in% RESERVED_HEADERS){
214214
stop("Key `", key, "` is preserved and should be read-only or altered via other methods.")
215215
}
216216
.self$.header[[key]] <- value
217-
.self$.save_header()
217+
if( save ) {
218+
.self$.save_header()
219+
}
220+
invisible(value)
218221
},
219222
header_signature = function(include_path = TRUE){
220223
header_sig <- digest::digest(.self$.header, algo = "sha256")

R/load.R

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ guess_partition <- function(dim, elem_size){
4747
#' @param symlink_ok whether arrays with symbolic-link partitions can pass
4848
#' the test; this is usually used on bound arrays with symbolic-links; see
4949
#' \code{\link{filearray_bind}};
50+
#' @param verbose whether to print out some debug messages
51+
#' @param on_missing function to handle file array (such as initialization)
52+
#' when a new array is created; must take only one argument, the array object
5053
#' @return A \code{\link{FileArray-class}} instance.
5154
#'
5255
#' @details The file arrays partition out-of-memory array objects and store them
@@ -107,6 +110,28 @@ guess_partition <- function(dim, elem_size){
107110
#' }
108111
#'
109112
#'
113+
#' # check-load, and create a new array if fail
114+
#' x <- filearray_load_or_create(
115+
#' filebase = filebase, dimension = c(200, 30, 8),
116+
#' verbose = TRUE, signature = "henry"
117+
#' )
118+
#' x$get_header("signature")
119+
#'
120+
#' # check-load with initialization
121+
#' x <- filearray_load_or_create(
122+
#' filebase = filebase,
123+
#' dimension = c(3, 4, 5),
124+
#' verbose = TRUE, mode = "readonly",
125+
#' on_missing = function(array) {
126+
#' array[] <- seq_len(60)
127+
#' }
128+
#' )
129+
#'
130+
#' x[1:3,1,1]
131+
#'
132+
#' # Clean up
133+
#' unlink(filebase, recursive = TRUE)
134+
#'
110135
NULL
111136

112137
#' @rdname filearray
@@ -190,3 +215,107 @@ filearray_checkload <- function(
190215
}
191216
return(arr)
192217
}
218+
219+
220+
#' @rdname filearray
221+
#' @export
222+
filearray_load_or_create <- function(
223+
filebase, dimension, on_missing = NULL, type = NA,
224+
..., mode = c("readonly", "readwrite"), symlink_ok = TRUE,
225+
initialize = FALSE, partition_size = NA, verbose = FALSE
226+
) {
227+
mode <- match.arg(mode)
228+
filebase <- normalizePath(filebase, mustWork = FALSE, winslash = "/")
229+
if(length(filebase) != 1 || grepl("(^|^[A-Za-z]:)/$", filebase)) {
230+
stop("Invalid filebase to store a file array.")
231+
}
232+
233+
234+
dimension <- as.integer(dimension)
235+
if(length(dimension) < 2 || any(is.na(dimension) | dimension < 0)) {
236+
stop("Incorrect dimension for a file array: `dimension` must a valid positive integer vector with length of two or above.")
237+
}
238+
239+
if(!is.null(on_missing)) {
240+
if(!is.function(on_missing) || !length(formals(on_missing))) {
241+
stop("`filearray_load_or_create`: `on_missing` must be a function with one argument (i.e. the file array)")
242+
}
243+
}
244+
245+
246+
additional_headers <- list(...)
247+
add_header_names <- names(additional_headers)
248+
if(length(additional_headers)) {
249+
if(!length(add_header_names) || "" %in% trimws(additional_headers)) {
250+
stop("`filearray_load_or_create`: additional parameters must be named.")
251+
}
252+
}
253+
254+
255+
arr <- tryCatch(
256+
expr = {
257+
# try to load existing array
258+
arr <- filearray_checkload(
259+
filebase = filebase, mode = mode,
260+
symlink_ok = symlink_ok, ...
261+
)
262+
263+
# If no error raised, the array has been loaded
264+
265+
if(!is.na(type)) {
266+
if(!identical(arr$type(), type)) {
267+
stop("`filearray_load_or_create`: Requested array type does not match with existing array.")
268+
}
269+
} else {
270+
# in case the array needs to be reconstructed, assuming the type
271+
# remain the same
272+
type <- arr$type()
273+
}
274+
275+
# Now check the dimension
276+
arr_dim <- as.integer(arr$dimension())
277+
if(!identical(arr_dim, dimension)) {
278+
stop("`filearray_load_or_create`: Requested dimension does not match with existing array.")
279+
}
280+
281+
arr
282+
},
283+
error = function(e) {
284+
if(verbose) {
285+
message("`filearray_load_or_create`: cannot load the existing file array: ", e$message, "\nTrying creating a new one. If the array already exists, its file path will be removed.")
286+
}
287+
if(file.exists(filebase)) {
288+
unlink(filebase, recursive = TRUE, force = TRUE)
289+
}
290+
pdir <- dirname(filebase)
291+
if(!dir.exists(pdir)) {
292+
dir.create(pdir, showWarnings = FALSE, recursive = TRUE)
293+
}
294+
# create the array
295+
if(is.na(type)) { type <- 'double' }
296+
arr <- filearray_create(
297+
filebase = filebase,
298+
dimension = dimension,
299+
type = type,
300+
partition_size = partition_size,
301+
initialize = initialize
302+
)
303+
# run on_missing if the function exists
304+
if(is.function(on_missing)) {
305+
arr$.mode <- "readwrite"
306+
on_missing(arr)
307+
}
308+
# seal the header
309+
for(nm in add_header_names) {
310+
arr$set_header(key = nm, value = additional_headers[[nm]], save = FALSE)
311+
}
312+
arr$.save_header()
313+
arr
314+
}
315+
)
316+
317+
# set mode
318+
arr$.mode <- mode
319+
arr
320+
}
321+

adhoc/rchk.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
rm "$HOME/Dropbox/projects/filearray_0.1.0.9000.tar.gz"
2-
docker run -v "$HOME/Dropbox/projects":/projects rchk "/projects/filearray_0.1.0.9000.tar.gz"
1+
# rm "$HOME/Dropbox/projects/filearray_0.1.3.9001.tar.gz"
2+
docker run -v "$HOME/Dropbox/projects":/projects kalibera/rchk:latest "/projects/filearray_0.1.3.9001.tar.gz"

cran-comments.md

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
## Dev environment
2-
* osx (ARM), R 4.1.2
2+
* osx (ARM), R 4.2.1
33

44
## Test environments
55
* osx (x64, github-action), R-release
@@ -9,19 +9,5 @@
99

1010
## R CMD check results
1111

12-
On `release` and `devel`
12+
On `oldrelease`, `release` and `devel`
1313
0 errors | 0 warnings | 0 notes
14-
15-
On R-4.0
16-
0 errors | 1 warning | 0 notes
17-
18-
```
19-
Codoc mismatches from documentation object 'apply':
20-
apply
21-
Code: function(X, MARGIN, FUN, ...)
22-
Docs: function(X, MARGIN, FUN, ..., simplify = TRUE)
23-
Argument names in docs not in code:
24-
simplify
25-
```
26-
27-
This is because `simplify` was added to `apply` function since R-4.1.

man/filearray.Rd

Lines changed: 41 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)