Skip to content

Commit 3a39b43

Browse files
authored
chore: Reduce allocation of Lazy (#650)
Motivation: Use specialized Lazy to reduce the allocation of Lazy was #645 ``` ⏺ Final JMH Benchmark Comparison: master vs jit ┌───────────────────────┬────────────────┬─────────────┬────────┐ │ Benchmark │ master (ms/op) │ jit (ms/op) │ Change │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ assertions │ 0.305 │ 0.302 │ -1.0% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ bench.01 │ 0.073 │ 0.073 │ 0.0% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ bench.02 │ 51.252 │ 47.703 │ -6.9% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ bench.03 │ 14.250 │ 14.089 │ -1.1% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ bench.04 │ 31.549 │ 31.787 │ +0.8% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ bench.06 │ 0.445 │ 0.425 │ -4.5% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ bench.07 │ 1188.786 │ 1110.849 │ -6.6% │ NOT Valid because of StackOverFlow, fixed now ├───────────────────────┼────────────────┼─────────────┼────────┤ │ bench.08 │ 0.058 │ 0.058 │ 0.0% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ bench.09 │ 0.066 │ 0.065 │ -1.5% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ gen_big_object │ 1.073 │ 1.056 │ -1.6% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ large_string_join │ 2.480 │ 2.174 │ -12.3% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ large_string_template │ 2.315 │ 2.291 │ -1.0% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ realistic1 │ 3.104 │ 2.749 │ -11.4% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ realistic2 │ 72.683 │ 67.321 │ -7.4% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ base64 │ 0.793 │ 0.809 │ +2.0% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ base64Decode │ 0.596 │ 0.605 │ +1.5% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ base64DecodeBytes │ 9.276 │ 9.207 │ -0.7% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ base64_byte_array │ 1.434 │ 1.458 │ +1.7% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ comparison │ 22.584 │ 23.068 │ +2.1% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ comparison2 │ 78.813 │ 73.607 │ -6.6% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ escapeStringJson │ 0.049 │ 0.049 │ 0.0% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ foldl │ 9.781 │ 9.475 │ -3.1% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ lstripChars │ 0.613 │ 0.612 │ -0.2% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ manifestJsonEx │ 0.074 │ 0.070 │ -5.4% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ manifestTomlEx │ 0.090 │ 0.086 │ -4.4% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ manifestYamlDoc │ 0.075 │ 0.073 │ -2.7% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ member │ 0.685 │ 0.731 │ +6.7% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ parseInt │ 0.052 │ 0.051 │ -1.9% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ reverse │ 10.933 │ 11.030 │ +0.9% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ rstripChars │ 0.637 │ 0.620 │ -2.7% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ stripChars │ 0.585 │ 0.595 │ +1.7% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ substr │ 0.163 │ 0.163 │ 0.0% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ setDiff │ 0.461 │ 0.449 │ -2.6% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ setInter │ 0.411 │ 0.428 │ +4.1% │ ├───────────────────────┼────────────────┼─────────────┼────────┤ │ setUnion │ 0.734 │ 0.756 │ +3.0% │ └───────────────────────┴────────────────┴─────────────┴────────┘ ```
1 parent 7a28385 commit 3a39b43

7 files changed

Lines changed: 162 additions & 30 deletions

File tree

bench/resources/refresh_golden_outputs.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ for suite in bench/resources/*_suite; do
1212
echo "Refreshing golden outputs for suite: $suite_name"
1313
for f in "$suite"/*.jsonnet; do
1414
echo " Processing file: $f"
15-
java -Xss100m -Xmx2g -jar "$SJSONNET" -J "$suite" "$f" > "$f.golden"
15+
java -Xss100m -Xmx2g -jar "$SJSONNET" --max-stack 100000 -J "$suite" "$f" > "$f.golden"
1616
done
1717
done
1818

bench/src/sjsonnet/bench/RegressionBenchmark.scala

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ object RegressionBenchmark {
1111
private val testSuiteRoot: os.Path =
1212
sys.env.get("MILL_WORKSPACE_ROOT").map(os.Path(_)).getOrElse(os.pwd)
1313

14+
/** Shared CLI args passed to every benchmark invocation (e.g. bench.07 needs deep recursion). */
15+
private val defaultArgs: Array[String] = Array("--max-stack", "100000")
16+
1417
private def createDummyOut = new PrintStream(new OutputStream {
1518
def write(b: Int): Unit = ()
1619
override def write(b: Array[Byte]): Unit = ()
@@ -36,7 +39,7 @@ class RegressionBenchmark {
3639
val baos = new ByteArrayOutputStream()
3740
val ps = new PrintStream(baos)
3841
SjsonnetMainBase.main0(
39-
Array(path),
42+
RegressionBenchmark.defaultArgs :+ path,
4043
new DefaultParseCache,
4144
System.in,
4245
ps,
@@ -61,7 +64,7 @@ class RegressionBenchmark {
6164
def main(bh: Blackhole): Unit = {
6265
bh.consume(
6366
SjsonnetMainBase.main0(
64-
Array(path),
67+
RegressionBenchmark.defaultArgs :+ path,
6568
new DefaultParseCache,
6669
System.in,
6770
dummyOut,

sjsonnet/src/sjsonnet/Evaluator.scala

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -121,11 +121,37 @@ class Evaluator(
121121
Error.fail("Should not have happened.", e.pos)
122122
}
123123

124+
/**
125+
* Convert an expression to an [[Eval]] for deferred evaluation.
126+
*
127+
* Three fast paths eliminate or reduce allocation vs the naive
128+
* `new LazyFunc(() => visitExpr(e))`:
129+
*
130+
* 1. [[Val]] literals — already evaluated, return as-is (zero cost).
131+
* 2. [[ValidId]] (variable reference) where the binding slot is non-null — reuse the existing
132+
* [[Eval]] from scope directly (zero allocation). Covers ~18% of calls. When the slot IS
133+
* null (self-recursive local, e.g. `local a = [a[1], 0]`), the binding hasn't been written
134+
* yet, so we must create a deferred thunk to defer the lookup.
135+
* 3. All other expressions — [[LazyExpr]] stores (Expr, ValScope, Evaluator) as fields instead
136+
* of capturing them in a closure: 1 JVM object vs 2. Covers ~76% of calls (dominated by
137+
* BinaryOp).
138+
*
139+
* PERF: Do not revert to `new LazyFunc(() => visitExpr(e))` — profiling across all benchmark
140+
* suites shows this method produces ~93% of deferred evaluations. The fast paths eliminate 242K
141+
* allocations (bench.02) and improve wall-clock time ~5% (comparison2).
142+
*/
124143
def visitAsLazy(e: Expr)(implicit scope: ValScope): Eval = e match {
125-
case v: Val => v
126-
case e =>
144+
case v: Val => v
145+
case e: ValidId =>
146+
val binding = scope.bindings(e.nameIdx)
147+
if (binding != null) binding
148+
else {
149+
if (debugStats != null) debugStats.lazyCreated += 1
150+
new LazyExpr(e, scope, this)
151+
}
152+
case e =>
127153
if (debugStats != null) debugStats.lazyCreated += 1
128-
new Lazy(() => visitExpr(e))
154+
new LazyExpr(e, scope, this)
129155
}
130156

131157
def visitValidId(e: ValidId)(implicit scope: ValScope): Val = {
@@ -151,7 +177,8 @@ class Evaluator(
151177
newScope.bindings(base + i) = b.args match {
152178
case null => visitAsLazy(b.rhs)(newScope)
153179
case argSpec =>
154-
new Lazy(() => visitMethod(b.rhs, argSpec, b.pos, b.name)(newScope))
180+
if (debugStats != null) debugStats.lazyCreated += 1
181+
new LazyFunc(() => visitMethod(b.rhs, argSpec, b.pos, b.name)(newScope))
155182
}
156183
i += 1
157184
}
@@ -789,7 +816,8 @@ class Evaluator(
789816
newScope.bindings(base + i) = b.args match {
790817
case null => visitAsLazy(b.rhs)(newScope)
791818
case argSpec =>
792-
new Lazy(() => visitMethod(b.rhs, argSpec, b.pos)(newScope))
819+
if (debugStats != null) debugStats.lazyCreated += 1
820+
new LazyFunc(() => visitMethod(b.rhs, argSpec, b.pos)(newScope))
793821
}
794822
i += 1
795823
}
@@ -853,6 +881,7 @@ class Evaluator(
853881
visitExpr(e)
854882
}
855883

884+
// Note: can't use LazyExpr here — `scope` is by-name (=> ValScope), must remain lazy.
856885
def visitBindings(bindings: Array[Bind], scope: => ValScope): Array[Eval] = {
857886
if (debugStats != null) debugStats.lazyCreated += bindings.length
858887
val arrF = new Array[Eval](bindings.length)
@@ -861,9 +890,9 @@ class Evaluator(
861890
val b = bindings(i)
862891
arrF(i) = b.args match {
863892
case null =>
864-
new Lazy(() => visitExpr(b.rhs)(scope))
893+
new LazyFunc(() => visitExpr(b.rhs)(scope))
865894
case argSpec =>
866-
new Lazy(() => visitMethod(b.rhs, argSpec, b.pos, b.name)(scope))
895+
new LazyFunc(() => visitMethod(b.rhs, argSpec, b.pos, b.name)(scope))
867896
}
868897
i += 1
869898
}
@@ -927,7 +956,8 @@ class Evaluator(
927956
case null =>
928957
visitAsLazy(b.rhs)(newScope)
929958
case argSpec =>
930-
new Lazy(() => visitMethod(b.rhs, argSpec, b.pos)(newScope))
959+
if (debugStats != null) debugStats.lazyCreated += 1
960+
new LazyFunc(() => visitMethod(b.rhs, argSpec, b.pos)(newScope))
931961
}
932962
i += 1
933963
j += 1

sjsonnet/src/sjsonnet/Materializer.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ abstract class Materializer {
336336
var i = 0
337337
while (i < len) {
338338
val x = xs(i)
339-
res(i) = new Lazy(() => reverse(pos, x))
339+
res(i) = new LazyFunc(() => reverse(pos, x))
340340
i += 1
341341
}
342342
Val.Arr(pos, res)

sjsonnet/src/sjsonnet/Val.scala

Lines changed: 106 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,119 @@ trait Eval {
1818
}
1919

2020
/**
21-
* Lazily evaluated dictionary values, array contents, or function parameters are all wrapped in
22-
* [[Lazy]] and only truly evaluated on-demand.
21+
* Abstract marker base for deferred (lazy) evaluation. Contains no fields — subclasses manage their
22+
* own caching to minimize per-instance memory.
23+
*
24+
* Hierarchy (allocation percentages measured across 591 test and benchmark files; actual
25+
* distribution varies by workload):
26+
* - [[LazyFunc]] — wraps a `() => Val` closure with a separate `cached` field (~0.1%)
27+
* - [[LazyExpr]] — closure-free `visitExpr` thunk, repurposes fields for caching (~91%)
28+
* - [[LazyApply1]] — closure-free `func.apply1` thunk (~9%)
29+
* - [[LazyApply2]] — closure-free `func.apply2` thunk (<1%)
30+
*
31+
* @see
32+
* [[Eval]] the parent trait shared with [[Val]] (eager values).
33+
*/
34+
abstract class Lazy extends Eval
35+
36+
/**
37+
* Closure-based [[Lazy]]: wraps an arbitrary `() => Val` thunk.
38+
*
39+
* Used for deferred evaluations that don't fit the specialized [[LazyExpr]]/[[LazyApply1]]/
40+
* [[LazyApply2]] patterns, e.g. `visitMethod` (local function defs), `visitBindings` (object field
41+
* bindings), and default parameter evaluation. These account for <1% of all deferred evaluations
42+
* (profiled across 591 benchmark and test files).
2343
*/
24-
final class Lazy(private var computeFunc: () => Val) extends Eval {
44+
final class LazyFunc(private var f: () => Val) extends Lazy {
2545
private var cached: Val = _
2646
def value: Val = {
2747
if (cached != null) return cached
28-
cached = computeFunc()
29-
computeFunc = null // allow closure to be GC'd
48+
cached = f()
49+
f = null // allow GC of captured references
3050
cached
3151
}
3252
}
3353

54+
/**
55+
* Closure-free [[Lazy]] that defers `evaluator.visitExpr(expr)(scope)`.
56+
*
57+
* Used in [[Evaluator.visitAsLazy]] instead of `new LazyFunc(() => visitExpr(e)(scope))`. By
58+
* storing (expr, scope, evaluator) as fields rather than capturing them in a closure, this cuts
59+
* per-thunk allocation from 2 JVM objects (LazyFunc + closure) to 1 (LazyExpr), and from 56B to 24B
60+
* (compressed oops).
61+
*
62+
* Profiling across all benchmark and test suites (591 files) shows [[Evaluator.visitAsLazy]]
63+
* produces ~91% of all deferred evaluations.
64+
*
65+
* After computation, the cached [[Val]] is stored in the `exprOrVal` field (which originally held
66+
* the [[Expr]]), and `ev` is nulled as a sentinel. `scope` is also cleared to allow GC.
67+
*/
68+
final class LazyExpr(
69+
private var exprOrVal: AnyRef, // Expr before compute, Val after
70+
private var scope: ValScope,
71+
private var ev: Evaluator)
72+
extends Lazy {
73+
def value: Val = {
74+
if (ev == null) exprOrVal.asInstanceOf[Val]
75+
else {
76+
val r = ev.visitExpr(exprOrVal.asInstanceOf[Expr])(scope)
77+
exprOrVal = r // cache result
78+
scope = null.asInstanceOf[sjsonnet.ValScope] // allow GC
79+
ev = null // sentinel: marks as computed
80+
r
81+
}
82+
}
83+
}
84+
85+
/**
86+
* Closure-free [[Lazy]] that defers `func.apply1(arg, pos)(ev, TailstrictModeDisabled)`.
87+
*
88+
* Used in stdlib builtins (`std.map`, `std.filterMap`, `std.makeArray`, etc.) to eliminate the
89+
* 2-object allocation (LazyFunc + Function0 closure), cutting from 56B to 32B per instance. After
90+
* computation, `funcOrVal` caches the result, `ev == null` serves as the computed sentinel, and
91+
* remaining fields are cleared for GC.
92+
*/
93+
final class LazyApply1(
94+
private var funcOrVal: AnyRef, // Val.Func before compute, Val after
95+
private var arg: Eval,
96+
private var pos: Position,
97+
private var ev: EvalScope)
98+
extends Lazy {
99+
def value: Val = {
100+
if (ev == null) funcOrVal.asInstanceOf[Val]
101+
else {
102+
val r = funcOrVal.asInstanceOf[Val.Func].apply1(arg, pos)(ev, TailstrictModeDisabled)
103+
funcOrVal = r
104+
arg = null; pos = null; ev = null
105+
r
106+
}
107+
}
108+
}
109+
110+
/**
111+
* Closure-free [[Lazy]] that defers `func.apply2(arg1, arg2, pos)(ev, TailstrictModeDisabled)`.
112+
*
113+
* Used in stdlib builtins (`std.mapWithIndex`, etc.). Same field-repurposing strategy as
114+
* [[LazyApply1]], cutting from 56B to 32B per instance.
115+
*/
116+
final class LazyApply2(
117+
private var funcOrVal: AnyRef, // Val.Func before compute, Val after
118+
private var arg1: Eval,
119+
private var arg2: Eval,
120+
private var pos: Position,
121+
private var ev: EvalScope)
122+
extends Lazy {
123+
def value: Val = {
124+
if (ev == null) funcOrVal.asInstanceOf[Val]
125+
else {
126+
val r = funcOrVal.asInstanceOf[Val.Func].apply2(arg1, arg2, pos)(ev, TailstrictModeDisabled)
127+
funcOrVal = r
128+
arg1 = null; arg2 = null; pos = null; ev = null
129+
r
130+
}
131+
}
132+
}
133+
34134
/**
35135
* [[Val]]s represented Jsonnet values that are the result of evaluating a Jsonnet program. The
36136
* [[Val]] data structure is essentially a JSON tree, except evaluation of object attributes and
@@ -750,7 +850,7 @@ object Val {
750850
if (argVals(j) == null) {
751851
val default = params.defaultExprs(i)
752852
if (default != null) {
753-
argVals(j) = new Lazy(() => evalDefault(default, newScope, ev))
853+
argVals(j) = new LazyFunc(() => evalDefault(default, newScope, ev))
754854
} else {
755855
if (missing == null) missing = new ArrayBuffer
756856
missing.+=(params.names(i))

sjsonnet/src/sjsonnet/stdlib/ArrayModule.scala

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -164,9 +164,10 @@ object ArrayModule extends AbstractFunctionModule {
164164
arg: Array[Eval],
165165
ev: EvalScope,
166166
pos: Position): Val.Arr = {
167+
val noOff = pos.noOffset
167168
Val.Arr(
168169
pos,
169-
arg.map(v => new Lazy(() => _func.apply1(v, pos.noOffset)(ev, TailstrictModeDisabled)))
170+
arg.map(v => new LazyApply1(_func, v, noOff, ev))
170171
)
171172
}
172173

@@ -180,11 +181,12 @@ object ArrayModule extends AbstractFunctionModule {
180181
val func = _func.value.asFunc
181182
val arr = _arr.value.asArr.asLazyArray
182183
val a = new Array[Eval](arr.length)
184+
val noOff = pos.noOffset
183185
var i = 0
184186
while (i < a.length) {
185187
val x = arr(i)
186188
val idx = Val.Num(pos, i)
187-
a(i) = new Lazy(() => func.apply2(idx, x, pos.noOffset)(ev, TailstrictModeDisabled))
189+
a(i) = new LazyApply2(func, idx, x, noOff, ev)
188190
i += 1
189191
}
190192
Val.Arr(pos, a)
@@ -425,16 +427,15 @@ object ArrayModule extends AbstractFunctionModule {
425427
},
426428
builtin("filterMap", "filter_func", "map_func", "arr") {
427429
(pos, ev, filter_func: Val.Func, map_func: Val.Func, arr: Val.Arr) =>
430+
val noOff = pos.noOffset
428431
Val.Arr(
429432
pos,
430433
arr.asLazyArray.flatMap { i =>
431434
i.value
432-
if (!filter_func.apply1(i, pos.noOffset)(ev, TailstrictModeDisabled).asBoolean) {
435+
if (!filter_func.apply1(i, noOff)(ev, TailstrictModeDisabled).asBoolean) {
433436
None
434437
} else {
435-
Some[Eval](
436-
new Lazy(() => map_func.apply1(i, pos.noOffset)(ev, TailstrictModeDisabled))
437-
)
438+
Some[Eval](new LazyApply1(map_func, i, noOff, ev))
438439
}
439440
}
440441
)
@@ -468,12 +469,10 @@ object ArrayModule extends AbstractFunctionModule {
468469
pos, {
469470
val sz = size.cast[Val.Num].asPositiveInt
470471
val a = new Array[Eval](sz)
472+
val noOff = pos.noOffset
471473
var i = 0
472474
while (i < sz) {
473-
val forcedI = i
474-
a(i) = new Lazy(() =>
475-
func.apply1(Val.Num(pos, forcedI), pos.noOffset)(ev, TailstrictModeDisabled)
476-
)
475+
a(i) = new LazyApply1(func, Val.Num(pos, i), noOff, ev)
477476
i += 1
478477
}
479478
a

sjsonnet/src/sjsonnet/stdlib/ObjectModule.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ object ObjectModule extends AbstractFunctionModule {
107107
def invoke(self: Val.Obj, sup: Val.Obj, fs: FileScope, ev: EvalScope): Val =
108108
func.apply2(
109109
Val.Str(pos, k),
110-
new Lazy(() => obj.value(k, pos.noOffset)(ev)),
110+
new LazyFunc(() => obj.value(k, pos.noOffset)(ev)),
111111
pos.noOffset
112112
)(
113113
ev,
@@ -139,7 +139,7 @@ object ObjectModule extends AbstractFunctionModule {
139139
Val.Arr(
140140
pos,
141141
keys.map { k =>
142-
new Lazy(() => v1.value(k, pos.noOffset)(ev))
142+
new LazyFunc(() => v1.value(k, pos.noOffset)(ev))
143143
}
144144
)
145145

0 commit comments

Comments
 (0)