Skip to content

Commit aa81270

Browse files
authored
Merge pull request #2 from dipterix/mmap
Replaced fopen with boost memory mapping
2 parents 7530ed4 + c33a41e commit aa81270

35 files changed

Lines changed: 3855 additions & 1050 deletions

.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,9 @@ tests2/
2020
docs
2121
CRAN-RELEASE
2222
inst/doc
23+
# C++ related
24+
*.a
25+
*.dll
26+
*.o
27+
*.so
28+
*.dll

DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Suggests:
2525
testthat (>= 3.0.0)
2626
RoxygenNote: 7.1.1
2727
LinkingTo:
28+
BH,
2829
Rcpp
2930
Config/testthat/edition: 3
3031
VignetteBuilder: knitr

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ export(filearray_create)
2121
export(filearray_load)
2222
export(filearray_threads)
2323
export(fmap)
24+
export(fmap2)
2425
export(fmap_element_wise)
2526
export(fwhich)
2627
export(mapreduce)

R/RcppExports.R

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ filearray_subset <- function(filebase, position_indices, drop = TRUE, use_dimnam
6969
.Call(`_filearray_filearray_subset`, filebase, position_indices, drop, use_dimnames, reshape)
7070
}
7171

72-
FARR_subset_sequential <- function(filebase, unit_partlen, cum_partsizes, array_type, file_buffer, ret, from = 0L, len = 1L) {
73-
.Call(`_filearray_FARR_subset_sequential`, filebase, unit_partlen, cum_partsizes, array_type, file_buffer, ret, from, len)
72+
FARR_subset_sequential <- function(filebase, unit_partlen, cum_partsizes, array_type, ret, from = 0L, len = 1L) {
73+
.Call(`_filearray_FARR_subset_sequential`, filebase, unit_partlen, cum_partsizes, array_type, ret, from, len)
7474
}
7575

7676
FARR_subset2 <- function(filebase, listOrEnv, reshape = NULL, drop = FALSE, use_dimnames = TRUE, thread_buffer = 2097152L, split_dim = 0L, strict = 1L) {
@@ -81,6 +81,10 @@ FARR_buffer_map <- function(input_filebases, output_filebase, map, buffer_nelems
8181
.Call(`_filearray_FARR_buffer_map`, input_filebases, output_filebase, map, buffer_nelems, result_nelems)
8282
}
8383

84+
FARR_buffer_map2 <- function(input_filebases, map, buffer_nelems) {
85+
.Call(`_filearray_FARR_buffer_map2`, input_filebases, map, buffer_nelems)
86+
}
87+
8488
FARR_buffer_mapreduce <- function(filebase, map, reduce, buffer_nelems) {
8589
.Call(`_filearray_FARR_buffer_mapreduce`, filebase, map, reduce, buffer_nelems)
8690
}
@@ -97,6 +101,10 @@ hasOpenMP <- function() {
97101
.Call(`_filearray_hasOpenMP`)
98102
}
99103

104+
FARR_subset_assign_sequential <- function(filebase, unit_partlen, cum_partsizes, array_type, value, from) {
105+
.Call(`_filearray_FARR_subset_assign_sequential`, filebase, unit_partlen, cum_partsizes, array_type, value, from)
106+
}
107+
100108
FARR_subset_assign2 <- function(filebase, value, listOrEnv, thread_buffer = 2097152L, split_dim = 0L) {
101109
.Call(`_filearray_FARR_subset_assign2`, filebase, value, listOrEnv, thread_buffer, split_dim)
102110
}

R/header.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ read_header <- function(fid){
4242
endian <- 'big'
4343
stop("The file endianess is not little?")
4444
} else {
45-
endian <- 'little'
45+
endian <- "little"
4646
}
4747
}
4848

R/map.R

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#' @title Map multiple file arrays and save results
2-
#' @description Advanced mapping function for multiple file arrays. This
2+
#' @description Advanced mapping function for multiple file arrays. \code{fmap}
3+
#' runs the mapping functions and stores the results in file arrays.
4+
#' \code{fmap2} stores results in memory. This
35
#' feature is experimental. There are several constraints to the input.
46
#' Failure to meet these constraints may result in undefined results, or
57
#' even crashes. Please read Section 'Details' carefully before using
@@ -10,6 +12,8 @@
1012
#' @param .y a file array object, used to save results
1113
#' @param .input_size number of elements to read from each array of \code{x}
1214
#' @param .output_size \code{fun} output vector length
15+
#' @param .simplify whether to apply \code{\link[base]{simplify2array}} to
16+
#' the result
1317
#' @param ... other arguments passing to \code{fun}
1418
#' @return File array instance \code{.y}
1519
#' @details
@@ -190,6 +194,56 @@ fmap <- function(x, fun, .y, .input_size = NA, .output_size = NA, ...){
190194
.y
191195
}
192196

197+
#' @rdname fmap
198+
#' @export
199+
fmap2 <- function(x, fun, .input_size = NA, .simplify = TRUE, ...){
200+
if(!length(x)){
201+
stop("`x` must be a list of file arrays")
202+
}
203+
204+
if(inherits(x, "FileArray")){
205+
x <- list(x)
206+
}
207+
208+
dims <- sapply(x, dim)
209+
dim <- dims[,1, drop = TRUE]
210+
211+
if(any(dims - dim != 0)){
212+
stop("Input `x` array dimensions must match")
213+
}
214+
215+
fbases <- sapply(x, function(el){
216+
if( !is_filearray(el) ){
217+
stop("Input `x` must only contains file arrays")
218+
}
219+
el$initialize_partition()
220+
el$.filebase
221+
})
222+
223+
if(is.na(.input_size)){
224+
.input_size <- get_buffer_size() / 8L
225+
}
226+
if(.input_size <= 0){
227+
stop("`.input_size` must be postive")
228+
}
229+
.input_size <- as.integer(.input_size)
230+
231+
args <- list(quote(input), ...)
232+
map <- function(input){
233+
do.call(fun, args)
234+
}
235+
236+
res <- FARR_buffer_map2(
237+
input_filebases = fbases,
238+
map = map,
239+
buffer_nelems = .input_size
240+
)
241+
if(.simplify){
242+
res <- simplify2array(res)
243+
}
244+
res
245+
}
246+
193247
is_filearray <- function(object){
194248
if(!isS4(object)){ return(FALSE) }
195249
cls <- class(object)

0 commit comments

Comments
 (0)