Skip to content

Commit d0fcebe

Browse files
committed
Supporting #9
1 parent e82a751 commit d0fcebe

5 files changed

Lines changed: 78 additions & 47 deletions

File tree

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: filearray
22
Type: Package
33
Title: File-Backed Array for Out-of-Memory Computation
4-
Version: 0.1.6.9000
4+
Version: 0.1.6.9001
55
Language: en-US
66
Encoding: UTF-8
77
License: LGPL-3

R/methods-subsetAssign.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ fa_subsetAssign1 <- function(x, ..., value){
2424
stop("SubsetAssign FileArray only allows x[] <- v or x[i,j,...] <- v (single index not allowed)")
2525
}
2626
}
27-
if(length(value) != prod(dim)){
28-
stop("SubsetAssign FileArray `value` length mismatch.")
27+
if(!length(value) %in% c(1, prod(dim))){
28+
stop("SubsetAssign FileArray `value` length mismatch: `value` length must be either 1 or the same length of the subset.")
2929
}
3030
target_dim <- dim
3131
x$initialize_partition(x$.partition_info[, 1])
@@ -62,8 +62,8 @@ fa_subsetAssign1 <- function(x, ..., value){
6262
}
6363

6464
target_dim <- sapply(locs, length)
65-
if(prod(target_dim) != length(value)){
66-
stop("SubsetAssign FileArray `value` length mismatch.")
65+
if(!length(value) %in% c(1, prod(target_dim))){
66+
stop("SubsetAssign FileArray `value` length mismatch: `value` length must be either 1 or the same length of the subset.")
6767
}
6868

6969
# make sure partitions exist

src/save.cpp

Lines changed: 39 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,8 @@ void subset_assign_partition(
226226
char* conn0, T* value, const R_xlen_t block_size,
227227
int64_t* idx1ptr0, R_xlen_t idx1len,
228228
int64_t idx1_start, int64_t idx2_start,
229-
int64_t* idx2ptr0, R_xlen_t idx2len ) {
229+
int64_t* idx2ptr0, R_xlen_t idx2len,
230+
const int &value_inc = 1) {
230231
// TODO: swap_endian
231232
int elem_size = sizeof(T);
232233

@@ -261,7 +262,7 @@ void subset_assign_partition(
261262
// block_size: 861584, idx1len: 9001, idx1_start: 216024, idx2_start: 0, idx2_len: 1
262263
// ### idx2ii:0, start_loc: 0, buf pos: 0, idx1_start: 216024
263264

264-
for(idx1ii = 0; idx1ii < idx1len; idx1ii++, idx1ptr++, valptr2++){
265+
for(idx1ii = 0; idx1ii < idx1len; idx1ii++, idx1ptr++, valptr2+=value_inc){
265266
// calculate pointer location in the file
266267
// no check here, but tmp_loc should be >=0
267268

@@ -287,6 +288,10 @@ struct FARRAssigner : public TinyParallel::Worker {
287288
const List& sch;
288289
T* value_ptr;
289290

291+
// value_ptr increment size, can either be 0 (length(value) == 1)
292+
// or 1 (length(value) is the same as subset size)
293+
int value_ptr_inc;
294+
290295
SEXP idx1;
291296
SEXP idx1range;
292297
List idx2s;
@@ -306,9 +311,16 @@ struct FARRAssigner : public TinyParallel::Worker {
306311

307312
FARRAssigner(
308313
const std::string& filebase,
309-
const List& sch, T* value_ptr
314+
const List& sch,
315+
const int64_t& value_len,
316+
T* value_ptr
310317
): filebase(filebase), sch(sch) {
311318
this->value_ptr = value_ptr;
319+
if( value_len - 1 == 0 ) {
320+
this->value_ptr_inc = 0;
321+
} else {
322+
this->value_ptr_inc = 1;
323+
}
312324
this->idx1 = sch["idx1"];
313325
this->idx1range = sch["idx1range"];
314326
this->idx2s = sch["idx2s"];
@@ -433,7 +445,7 @@ struct FARRAssigner : public TinyParallel::Worker {
433445

434446
int64_t* idx2_ptr = INTEGER64(idx2);
435447
R_xlen_t idx2_len = Rf_xlength(idx2);
436-
T* value_ptr2 = value_ptr + (idx1len * skips);
448+
T* value_ptr2 = value_ptr + (idx1len * skips) * this->value_ptr_inc;
437449
int64_t* idx1ptr = idx1ptr0;
438450

439451
// Rcout << "block_size: " << block_size << ", idx1len: " << idx1len << ", idx1_start: " << idx1_start <<
@@ -443,7 +455,8 @@ struct FARRAssigner : public TinyParallel::Worker {
443455
begin, value_ptr2,
444456
block_size, idx1ptr, idx1len,
445457
idx1_start, idx2_start,
446-
idx2_ptr, idx2_len );
458+
idx2_ptr, idx2_len,
459+
this->value_ptr_inc );
447460

448461

449462
// region.flush();
@@ -472,8 +485,11 @@ struct FARRAssigner : public TinyParallel::Worker {
472485
template <typename T>
473486
SEXP FARR_subset_assign_template(
474487
const std::string& filebase,
475-
const List& sch, T* value_ptr){
476-
FARRAssigner<T> assigner(filebase, sch, value_ptr);
488+
const List& sch,
489+
const R_xlen_t &value_len,
490+
T* value_ptr
491+
){
492+
FARRAssigner<T> assigner(filebase, sch, value_len, value_ptr);
477493
assigner.save();
478494
return( R_NilValue );
479495
}
@@ -520,43 +536,44 @@ SEXP FARR_subset_assign2(
520536

521537
// coerce vector to desired SEXP type
522538
SEXP value_ = PROTECT(convert_as(value, sexp_type));
523-
SEXPTYPE valtype = TYPEOF(value_);
524-
525-
// allocate buffers
526-
int ncores = getThreads();
527-
std::vector<SEXP> buff_pool(ncores);
528-
for(int i = 0; i < ncores; i++){
529-
buff_pool[i] = PROTECT(Rf_allocVector(
530-
valtype, idx1_end - idx1_start + 1));
531-
}
539+
// SEXPTYPE valtype = TYPEOF(value_);
540+
541+
// // allocate buffers
542+
// int ncores = getThreads();
543+
// std::vector<SEXP> buff_pool(ncores);
544+
// for(int i = 0; i < ncores; i++){
545+
// buff_pool[i] = PROTECT(Rf_allocVector(
546+
// valtype, idx1_end - idx1_start + 1));
547+
// }
532548

533549

534550
switch(sexp_type) {
535551
case INTSXP: {
536-
FARR_subset_assign_template(fbase, sch, INTEGER(value_));
552+
FARR_subset_assign_template(fbase, sch, XLENGTH(value_), INTEGER(value_));
537553
break;
538554
}
539555
case CPLXSXP:
540556
case REALSXP: {
541-
FARR_subset_assign_template(fbase, sch, REAL(value_));
557+
FARR_subset_assign_template(fbase, sch, XLENGTH(value_), REAL(value_));
542558
break;
543559
}
544560
case FLTSXP: {
545-
FARR_subset_assign_template(fbase, sch, FLOAT(value_));
561+
FARR_subset_assign_template(fbase, sch, XLENGTH(value_), FLOAT(value_));
546562
break;
547563
}
548564
case LGLSXP:
549565
case RAWSXP: {
550-
FARR_subset_assign_template(fbase, sch, RAW(value_));
566+
FARR_subset_assign_template(fbase, sch, XLENGTH(value_), RAW(value_));
551567
break;
552568
}
553569
default: {
554-
UNPROTECT( 1 + ncores );
570+
UNPROTECT( 1 );
555571
stop("SEXP type not supported.");
572+
return(R_NilValue); // wall
556573
}
557574
}
558575

559-
UNPROTECT( 1 + ncores );
576+
UNPROTECT( 1 );
560577
return(R_NilValue);
561578

562579
}

tests/testthat/test-cpp.R

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,13 @@ test_that("C++: IO - subset/assign", {
106106
lapply(dim, function(d) {
107107
sample(c(1:d), size = d, replace = FALSE)
108108
})
109-
expect_error({
109+
expect_no_error({
110110
x[locs[[1]], locs[[2]], locs[[3]]] <- 1
111111
})
112+
expect_equal(
113+
unique(as.vector(x[locs[[1]], locs[[2]], locs[[3]]])),
114+
1
115+
)
112116

113117
expect_true({
114118
x[locs[[1]], locs[[2]], locs[[3]]] <- 1:prod(sapply(locs, length))
@@ -202,10 +206,15 @@ test_that("C++: IO - subset/assign - complex", {
202206
lapply(dim, function(d) {
203207
sample(c(1:d), size = d, replace = FALSE)
204208
})
205-
expect_error({
209+
expect_no_error({
206210
x[locs[[1]], locs[[2]], locs[[3]]] <- 1
207211
})
208212

213+
expect_equal(
214+
unique(as.vector(x[locs[[1]], locs[[2]], locs[[3]]])),
215+
1 + 0i
216+
)
217+
209218
expect_true({
210219
x[locs[[1]], locs[[2]], locs[[3]]] <- tmp[1:prod(sapply(locs, length))]
211220
TRUE
@@ -303,9 +312,14 @@ test_that("C++: IO - subset/assign - float", {
303312
lapply(dim, function(d) {
304313
sample(c(1:d), size = d, replace = FALSE)
305314
})
306-
expect_error({
315+
expect_no_error({
307316
x[locs[[1]], locs[[2]], locs[[3]]] <- 1
308317
})
318+
expect_equal(
319+
unique(as.vector(x[locs[[1]], locs[[2]], locs[[3]]])),
320+
1
321+
)
322+
309323

310324
expect_true({
311325
x[locs[[1]], locs[[2]], locs[[3]]] <- 1:prod(sapply(locs, length))

vignettes/performance.Rmd

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ The simulation was performed on `MacBook Air 2020 (M1 Chip, ARM, 8GB RAM)`, with
2727

2828
We mainly test the performance of `double` and `float` data type. The dimensions for both arrays are `100x100x100x100`. Both arrays are around `800MB` in native R. This is because R does not have float precision. However, while `double` array occupies `800MB` space on the hard disk, `float` array only uses half size (`400MB`).
2929

30-
```{r setup}
30+
```{r setup, eval = FALSE}
3131
library(filearray)
3232
3333
options(digits = 3)
@@ -69,7 +69,7 @@ Writing along margins refer to something like `x[,,,i] <- v` (along the last mar
6969

7070
1. partition margin
7171

72-
```{r}
72+
```{r, eval = FALSE}
7373
microbenchmark::microbenchmark(
7474
double = {
7575
for(i in 1:100){
@@ -91,7 +91,7 @@ microbenchmark::microbenchmark(
9191

9292
2. Write along fast margin
9393

94-
```{r}
94+
```{r, eval = FALSE}
9595
microbenchmark::microbenchmark(
9696
double = {
9797
for(i in 1:100){
@@ -113,7 +113,7 @@ microbenchmark::microbenchmark(
113113

114114
3. Writing along slow margin
115115

116-
```{r}
116+
```{r, eval = FALSE}
117117
microbenchmark::microbenchmark(
118118
double = {
119119
for(i in 1:100){
@@ -140,7 +140,7 @@ Instead of writing one slice at a time along each margin, we write `100x100x100x
140140

141141
1. Write blocks of data along the partition margin
142142

143-
```{r}
143+
```{r, eval = FALSE}
144144
microbenchmark::microbenchmark(
145145
double = {
146146
for(i in 1:10){
@@ -164,7 +164,7 @@ microbenchmark::microbenchmark(
164164

165165
2. Write blocks of data along the fast margin
166166

167-
```{r}
167+
```{r, eval = FALSE}
168168
microbenchmark::microbenchmark(
169169
double = {
170170
for(i in 1:10){
@@ -188,7 +188,7 @@ microbenchmark::microbenchmark(
188188

189189
3. Write blocks of data along slow margin
190190

191-
```{r}
191+
```{r, eval = FALSE}
192192
microbenchmark::microbenchmark(
193193
double = {
194194
for(i in 1:10){
@@ -214,7 +214,7 @@ microbenchmark::microbenchmark(
214214

215215
### 1. Read the whole array
216216

217-
```{r}
217+
```{r, eval = FALSE}
218218
microbenchmark::microbenchmark(
219219
double = { x_dbl[] },
220220
float = { x_flt[] },
@@ -229,7 +229,7 @@ microbenchmark::microbenchmark(
229229

230230
### 2. Read along margins
231231

232-
```{r}
232+
```{r, eval = FALSE}
233233
microbenchmark::microbenchmark(
234234
farr_double_partition_margin = { x_dbl[,,,1] },
235235
farr_double_fast_margin = { x_dbl[,,1,] },
@@ -260,7 +260,7 @@ The file array indexing is close to handling in-memory arrays in R!
260260

261261
### 3. Random access
262262

263-
```{r}
263+
```{r, eval = FALSE}
264264
# access 50 x 50 x 50 x 50 sub-array, with random indices
265265
idx1 <- sample(1:100, 50)
266266
idx2 <- sample(1:100, 50)
@@ -287,26 +287,26 @@ Random access could be faster than base R (also much less memory!)
287287

288288
Collapse calculates the margin sum/mean. Collapse function in `filearray` uses single thread. This is because the bottle-neck often comes from hard-disk accessing speed. However, it is still faster than native R, and is more memory-efficient.
289289

290-
```{r}
290+
```{r, eval = FALSE}
291291
keep <- c(2, 4)
292292
output <- filearray_create(tempfile(), dim(x_dbl)[keep])
293293
output$initialize_partition()
294294
microbenchmark::microbenchmark(
295295
farr_double = { x_dbl$collapse(keep = keep, method = "sum") },
296296
farr_float = { x_flt$collapse(keep = keep, method = "sum") },
297297
native = { apply(y, keep, sum) },
298-
dipsaus = { dipsaus::collapse(y, keep, average = FALSE) },
298+
ravetools = { ravetools::collapse(y, keep, average = FALSE) },
299299
unit = "s", times = 5
300300
)
301301
302302
#> Unit: seconds
303303
#> expr min lq mean median uq max neval
304-
#> farr_double 0.782 0.790 1.009 0.799 0.832 1.840 5
305-
#> farr_float 0.765 0.779 0.929 0.930 1.043 1.127 5
306-
#> native 0.964 1.174 1.222 1.213 1.370 1.390 5
307-
#> dipsaus 0.185 0.190 0.202 0.199 0.203 0.233 5
304+
#> farr_double 0.651 0.666 0.867 0.716 0.718 1.583 5
305+
#> farr_float 0.628 0.637 0.737 0.642 0.652 1.124 5
306+
#> native 1.011 1.029 1.128 1.078 1.207 1.316 5
307+
#> ravetools 0.109 0.110 0.126 0.131 0.138 0.139 5
308308
```
309309

310-
The `dipsaus` package uses multiple threads to collapse arrays in-memory. It is `7~8x` as fast as base R. File array is `1~2x` as fast as base R. Both `dipsaus` and `filearray` have little memory over-heads.
310+
The `ravetools` package uses multiple threads to collapse arrays in-memory. It is `7~8x` as fast as base R. File array is `1.5~2x` as fast as base R. Both `ravetools` and `filearray` have little memory over-heads.
311311

312312

0 commit comments

Comments
 (0)