Skip to content

Commit b86e3fd

Browse files
committed
Moving away from R internal
1 parent 09c2669 commit b86e3fd

10 files changed

Lines changed: 372 additions & 9 deletions

File tree

CRAN-SUBMISSION

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
Version: 0.1.9
2-
Date: 2024-11-08 16:38:30 UTC
3-
SHA: 5f59eb137a26b7e2915a36884d8742407ce9fa72
1+
Version: 0.2.0
2+
Date: 2025-04-01 15:57:13 UTC
3+
SHA: 09c26695b527513404da83abf3769461626dd54d

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: filearray
22
Type: Package
33
Title: File-Backed Array for Out-of-Memory Computation
4-
Version: 0.2.0
4+
Version: 0.2.0.9000
55
Language: en-US
66
Encoding: UTF-8
77
License: LGPL-3
@@ -28,7 +28,7 @@ Suggests:
2828
knitr,
2929
rmarkdown,
3030
testthat (>= 3.0.0)
31-
RoxygenNote: 7.3.2
31+
RoxygenNote: 7.3.3
3232
LinkingTo:
3333
BH,
3434
Rcpp

R/RcppExports.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,10 @@ getThreads <- function(max) {
105105
.Call(`_filearray_getThreads`, max)
106106
}
107107

108+
test_farr_findVarInFrame_ <- function(env, sym_name) {
109+
.Call(`_filearray_test_farr_findVarInFrame_`, env, sym_name)
110+
}
111+
108112
kinda_sorted <- function(idx, min_, buffer_count) {
109113
.Call(`_filearray_kinda_sorted`, idx, min_, buffer_count)
110114
}

adhoc/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
Dockerfile
2+
bench_*

src/RcppExports.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,18 @@ BEGIN_RCPP
353353
return rcpp_result_gen;
354354
END_RCPP
355355
}
356+
// test_farr_findVarInFrame_
357+
List test_farr_findVarInFrame_(SEXP env, std::string sym_name);
358+
RcppExport SEXP _filearray_test_farr_findVarInFrame_(SEXP envSEXP, SEXP sym_nameSEXP) {
359+
BEGIN_RCPP
360+
Rcpp::RObject rcpp_result_gen;
361+
Rcpp::RNGScope rcpp_rngScope_gen;
362+
Rcpp::traits::input_parameter< SEXP >::type env(envSEXP);
363+
Rcpp::traits::input_parameter< std::string >::type sym_name(sym_nameSEXP);
364+
rcpp_result_gen = Rcpp::wrap(test_farr_findVarInFrame_(env, sym_name));
365+
return rcpp_result_gen;
366+
END_RCPP
367+
}
356368
// kinda_sorted
357369
int kinda_sorted(SEXP idx, int64_t min_, int64_t buffer_count);
358370
RcppExport SEXP _filearray_kinda_sorted(SEXP idxSEXP, SEXP min_SEXP, SEXP buffer_countSEXP) {
@@ -418,6 +430,7 @@ static const R_CallMethodDef CallEntries[] = {
418430
{"_filearray_FARR_subset_assign2", (DL_FUNC) &_filearray_FARR_subset_assign2, 5},
419431
{"_filearray_getDefaultNumThreads", (DL_FUNC) &_filearray_getDefaultNumThreads, 0},
420432
{"_filearray_getThreads", (DL_FUNC) &_filearray_getThreads, 1},
433+
{"_filearray_test_farr_findVarInFrame_", (DL_FUNC) &_filearray_test_farr_findVarInFrame_, 2},
421434
{"_filearray_kinda_sorted", (DL_FUNC) &_filearray_kinda_sorted, 3},
422435
{"_filearray_check_missing_dots", (DL_FUNC) &_filearray_check_missing_dots, 1},
423436
{"_filearray_reshape_or_drop", (DL_FUNC) &_filearray_reshape_or_drop, 3},

src/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,6 @@
22
#define FARR_COMMON_H
33

44
#include <Rcpp.h>
5+
#include "compat.h"
56

67
#endif // FARR_COMMON_H

src/compat.h

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
#ifndef FARR_COMPAT_H
2+
#define FARR_COMPAT_H
3+
4+
/**
5+
* farr_findVarInFrame — compatibility wrapper for Rf_findVarInFrame
6+
* ===================================================================
7+
*
8+
* BACKGROUND
9+
* ----------
10+
* R 4.5.0 added R_getVar / R_getVarEx as the public C API replacements for
11+
* the non-API functions Rf_findVarInFrame and Rf_findVar. Their sibling
12+
* Rf_findVarInFrame3 was simultaneously flagged as non-API. R CMD check
13+
* already reports uses of Rf_findVarInFrame3; the remaining two may follow
14+
* in a future release. Writing R Extensions 4.5.0, §'Moving into C API
15+
* compliance' maps:
16+
*
17+
* Rf_findVarInFrame → R_getVar / R_getVarEx (added in R 4.5.0)
18+
* Rf_findVar → R_getVar / R_getVarEx (added in R 4.5.0)
19+
*
20+
* This header must be included after <Rcpp.h> (achieved via common.h, which
21+
* includes <Rcpp.h> first). Do NOT include <Rinternals.h> directly; CRAN
22+
* requires packages to access R internals only through the public headers.
23+
*
24+
* BEHAVIORAL DIFFERENCES: Rf_findVarInFrame vs R_getVarEx
25+
* --------------------------------------------------------
26+
*
27+
* Property Rf_findVarInFrame(rho,sym) R_getVarEx(sym,rho,FALSE,dflt)
28+
* ------------------------- --------------------------- -----------------------------
29+
* Argument order (env, symbol) (symbol, env, inherits, dflt)
30+
* Parent-frame search No No (inherits = FALSE)
31+
* Promise forcing Yes (doGet = TRUE) Yes (forces PROMSXP bindings)
32+
* Symbol not in frame Returns R_UnboundValue Returns dflt
33+
* Error on not-found No No (when dflt is provided)
34+
* R_MissingArg binding (*) Returns R_MissingArg Signals getMissingError
35+
* API status (R >= 4.5.0) Non-API (may be removed) Public/stable C API
36+
* Availability All R versions R >= 4.5.0 only
37+
*
38+
* KEY SUBTLETIES
39+
* --------------
40+
* 1. Argument reversal.
41+
* Rf_findVarInFrame(rho, sym) ≡ R_getVarEx(sym, rho, FALSE, dflt)
42+
* The env and symbol positions swap, and two extra arguments are required.
43+
*
44+
* 2. Not-found sentinel.
45+
* Passing R_UnboundValue as the default to R_getVarEx makes it return
46+
* R_UnboundValue when the symbol is absent — matching Rf_findVarInFrame.
47+
* Using R_getVar (no default) would throw an error, like base::get().
48+
*
49+
* 3. Promise forcing.
50+
* Both functions force PROMSXP bindings before returning. For the
51+
* '...' / R_DotsSymbol use case this is harmless: the DOTSXP itself is
52+
* not a PROMSXP; the individual dot elements inside are promises, but
53+
* they are only touched later via explicit CAR() calls by the caller.
54+
*
55+
* 4. (*) R_MissingArg — the critical difference for '...' lookup.
56+
* When a function with '...' is called with no extra arguments (e.g.
57+
* f() where f <- function(...) ...), R binds R_DotsSymbol to R_MissingArg
58+
* in the call frame. The symbol IS bound, but:
59+
* - Rf_findVarInFrame returns R_MissingArg directly, no error.
60+
* - R_getVarEx signals a getMissingError ("argument '...' is missing,
61+
* with no default"), matching base::get() semantics.
62+
* This getMissingError is a longjmp-based R condition, NOT a C++ exception,
63+
* so it CANNOT be caught with C++ try/catch — the longjmp bypasses all
64+
* catch blocks entirely.
65+
*
66+
* CHOSEN STRATEGY: We use R_existsVarInFrame (public, stable API) as a
67+
* fast pre-check. If the symbol is unbound, we return R_UnboundValue
68+
* immediately. When the symbol IS bound and IS R_DotsSymbol, we evaluate
69+
* ...length() in rho to detect empty dots (where R_getVarEx would longjmp).
70+
* ...length() is a SPECIALSXP, available since R 3.2.0, that returns 0 for
71+
* both R_NilValue and R_MissingArg bindings without forcing dot promises.
72+
* If ...length() == 0, we return R_MissingArg directly (matching the old
73+
* Rf_findVarInFrame behavior); otherwise, R_getVarEx is safe to call.
74+
*
75+
* For non-dots symbols, R_MissingArg bindings are theoretically possible
76+
* (e.g. a formal parameter with no default called without an argument)
77+
* but this wrapper is only used for R_DotsSymbol lookups in filearray.
78+
* Non-dots lookups fall through to R_getVarEx directly.
79+
*
80+
* Performance (all times per call on Apple M4, N = 1,000,000):
81+
*
82+
* Scenario Rf_findVarInFrame This wrapper
83+
* --------------------- ------------------- ----------------------
84+
* Unbound symbol 5.8 ns 6.7 ns (1.2x)
85+
* Normal binding 6.2 ns 16.2 ns (2.6x)
86+
* Populated dots 6.0 ns 96.8 ns (16.2x)
87+
* Empty dots (MissingArg) 5.3 ns 79.2 ns (14.9x)
88+
*
89+
* vs R_tryCatchError: ~14,000 ns (1,700-2,700x) — unacceptable.
90+
*
91+
* The dots overhead (~90ns) is entirely from evaluating ...length() via
92+
* Rf_eval. This runs once per subset/assign call (not per element), so
93+
* the absolute cost is negligible in practice.
94+
*
95+
* 5. R_UnboundValue vs R_NilValue vs R_MissingArg.
96+
* Rf_findVarInFrame returns R_UnboundValue for unbound symbols, but every
97+
* caller in filearray immediately maps R_UnboundValue → R_NilValue (since
98+
* both mean "nothing to iterate"). This wrapper absorbs that mapping so
99+
* callers need no extra boilerplate — just swap Rf_findVarInFrame with
100+
* farr_findVarInFrame.
101+
*
102+
* USAGE
103+
* -----
104+
* farr_findVarInFrame(rho, symbol) reproduces Rf_findVarInFrame behavior
105+
* with two intentional simplifications:
106+
* - Searches only frame rho, no parent-frame walk-up.
107+
* - Forces PROMSXP bindings.
108+
* - Returns R_NilValue when symbol is not bound in rho (NOT R_UnboundValue).
109+
* - Returns R_MissingArg when the binding is a missing-argument marker
110+
* (including empty '...' on R >= 4.5.0).
111+
* On R < 4.5.0 it calls Rf_findVarInFrame + maps R_UnboundValue → R_NilValue.
112+
* On R >= 4.5.0 it uses R_existsVarInFrame + ...length() + R_getVarEx.
113+
*/
114+
115+
/* Portable wrapper. ------------------------------------------------------- */
116+
static inline SEXP farr_findVarInFrame(SEXP rho, SEXP symbol) {
117+
#if R_VERSION >= R_Version(4, 5, 0)
118+
/* Fast-path: symbol not bound in this frame at all → R_NilValue. */
119+
if (!R_existsVarInFrame(rho, symbol)) {
120+
return R_NilValue;
121+
}
122+
/*
123+
* Symbol IS bound. For R_DotsSymbol, the binding may be R_MissingArg
124+
* (empty dots). R_getVarEx would longjmp in that case, so we pre-check
125+
* with ...length() — a SPECIALSXP that safely returns 0 for both
126+
* R_NilValue and R_MissingArg dot bindings without forcing promises.
127+
*/
128+
if (symbol == R_DotsSymbol) {
129+
SEXP call = PROTECT(Rf_lang1(Rf_install("...length")));
130+
int n = Rf_asInteger(Rf_eval(call, rho));
131+
UNPROTECT(1);
132+
if (n == 0) {
133+
return R_MissingArg;
134+
}
135+
}
136+
/* Safe to call R_getVarEx: symbol exists and is not R_MissingArg. */
137+
return R_getVarEx(symbol, rho,
138+
static_cast<Rboolean>(FALSE), R_NilValue);
139+
#else
140+
SEXP res = Rf_findVarInFrame(rho, symbol);
141+
return (res == R_UnboundValue) ? R_NilValue : res;
142+
#endif
143+
}
144+
145+
#endif /* FARR_COMPAT_H */

src/core.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ SEXP locationList(const SEXP listOrEnv, const NumericVector& dim, const int stri
265265
switch(TYPEOF(listOrEnv)) {
266266
case ENVSXP: {
267267
sliceIdx = PROTECT(Rf_allocVector(VECSXP, ndims));
268-
SEXP dots = Rf_findVarInFrame(listOrEnv, R_DotsSymbol);
268+
SEXP dots = farr_findVarInFrame(listOrEnv, R_DotsSymbol);
269269
n_protected++;
270270
for(; (dots != R_NilValue) && (dots != R_MissingArg); dots = CDR(dots), idx_size++ ){
271271
if(idx_size >= ndims){

src/utils.cpp

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,42 @@
11
#include "utils.h"
22
using namespace Rcpp;
33

4+
// ---------------------------------------------------------------------------
5+
// Test helper for farr_findVarInFrame (not for end-user use).
6+
//
7+
// Returns a named list with:
8+
// $result - the looked-up value (R_NilValue if unbound or R_MissingArg)
9+
// $is_unbound - TRUE when symbol was not found in env's frame
10+
// (farr_findVarInFrame returns R_NilValue for unbound)
11+
// $is_missing - TRUE when the binding existed but was R_MissingArg
12+
//
13+
// Note: farr_findVarInFrame returns R_NilValue for both "unbound" and
14+
// "bound to NULL". We distinguish them using R_existsVarInFrame on
15+
// R >= 4.5.0, or Rf_findVarInFrame on older R.
16+
// ---------------------------------------------------------------------------
17+
// [[Rcpp::export]]
18+
List test_farr_findVarInFrame_(SEXP env, std::string sym_name) {
19+
if( TYPEOF(env) != ENVSXP ){
20+
Rcpp::stop("env must be an environment");
21+
}
22+
SEXP sym = Rf_install(sym_name.c_str());
23+
SEXP result = farr_findVarInFrame(env, sym);
24+
bool is_missing = (result == R_MissingArg);
25+
// farr_findVarInFrame maps R_UnboundValue → R_NilValue, so we need
26+
// an independent check to report is_unbound correctly.
27+
#if R_VERSION >= R_Version(4, 5, 0)
28+
bool is_unbound = !R_existsVarInFrame(env, sym);
29+
#else
30+
bool is_unbound = (Rf_findVarInFrame(env, sym) == R_UnboundValue);
31+
#endif
32+
SEXP rval = (is_unbound || is_missing) ? R_NilValue : result;
33+
return List::create(
34+
_["result"] = rval,
35+
_["is_unbound"] = is_unbound,
36+
_["is_missing"] = is_missing
37+
);
38+
}
39+
440
int guess_splitdim(SEXP dim, int elem_size, size_t buffer_bytes){
541
R_len_t ndims = Rf_length(dim);
642

@@ -142,10 +178,10 @@ SEXP check_missing_dots(const SEXP env){
142178
if( TYPEOF(env) != ENVSXP ){
143179
Rcpp::stop("`check_missing_dots` is asking for an environment");
144180
}
145-
SEXP dots = Rf_findVarInFrame(env, R_DotsSymbol);
146-
181+
SEXP dots = farr_findVarInFrame(env, R_DotsSymbol);
182+
147183
std::vector<bool> is_missing(0);
148-
184+
149185
if( dots != R_NilValue ){
150186
SEXP el = R_NilValue;
151187

0 commit comments

Comments
 (0)