Skip to content

Commit 7a2638b

Browse files
committed
wip
1 parent b7f58bd commit 7a2638b

7 files changed

Lines changed: 937 additions & 6 deletions

File tree

core/sail/lmdb/pom.xml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,24 @@
216216
<plugin>
217217
<artifactId>maven-assembly-plugin</artifactId>
218218
</plugin>
219+
<plugin>
220+
<groupId>org.apache.maven.plugins</groupId>
221+
<artifactId>maven-compiler-plugin</artifactId>
222+
<executions>
223+
<execution>
224+
<id>default-testCompile</id>
225+
<configuration>
226+
<annotationProcessorPaths>
227+
<path>
228+
<groupId>org.openjdk.jmh</groupId>
229+
<artifactId>jmh-generator-annprocess</artifactId>
230+
<version>${jmhVersion}</version>
231+
</path>
232+
</annotationProcessorPaths>
233+
</configuration>
234+
</execution>
235+
</executions>
236+
</plugin>
219237
</plugins>
220238
</build>
221239
</project>

core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbEvaluationStatistics.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ private double cardinality(Resource subj, IRI pred, Value obj, Resource context)
118118

119119
return tripleStore.cardinality(subjID, predID, objID, contextID);
120120

121-
122121
// CardinalityKey key = new CardinalityKey(subjID, predID, objID, contextID);
123122
// Double cached = cardinalityCache.get(key);
124123
// if (cached != null) {
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2026 Eclipse RDF4J contributors.
3+
*
4+
* All rights reserved. This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Distribution License v1.0
6+
* which accompanies this distribution, and is available at
7+
* http://www.eclipse.org/org/documents/edl-v10.php.
8+
*
9+
* SPDX-License-Identifier: BSD-3-Clause
10+
*******************************************************************************/
11+
// Some portions generated by Codex
12+
package org.eclipse.rdf4j.sail.lmdb.benchmark;
13+
14+
import static org.junit.jupiter.api.Assertions.assertEquals;
15+
16+
import java.io.File;
17+
import java.io.IOException;
18+
import java.nio.file.Files;
19+
20+
import org.apache.commons.io.FileUtils;
21+
import org.eclipse.rdf4j.benchmark.common.ThemeQueryCatalog;
22+
import org.eclipse.rdf4j.benchmark.rio.util.ThemeDataSetGenerator;
23+
import org.eclipse.rdf4j.benchmark.rio.util.ThemeDataSetGenerator.Theme;
24+
import org.eclipse.rdf4j.common.transaction.IsolationLevels;
25+
import org.eclipse.rdf4j.repository.sail.SailRepository;
26+
import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection;
27+
import org.eclipse.rdf4j.repository.util.RDFInserter;
28+
import org.eclipse.rdf4j.sail.lmdb.LmdbStore;
29+
import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig;
30+
import org.junit.jupiter.api.Test;
31+
32+
class PharmaQuery10CountRegressionTest {
33+
34+
@Test
35+
void pharmaQuery10KeepsExpectedCountWithDefaultEstimatorConfig() throws IOException {
36+
Theme theme = Theme.PHARMA;
37+
int queryIndex = 10;
38+
String query = ThemeQueryCatalog.queryFor(theme, queryIndex);
39+
long expected = ThemeQueryCatalog.expectedCountFor(theme, queryIndex);
40+
41+
File dataDir = Files.createTempDirectory("pharma-query10-regression").toFile();
42+
LmdbStoreConfig config = ConfigUtil.createConfig();
43+
config.setPageCardinalityEstimator(false);
44+
SailRepository repository = new SailRepository(new LmdbStore(dataDir, config));
45+
try {
46+
loadData(theme, repository);
47+
try (SailRepositoryConnection connection = repository.getConnection()) {
48+
long actual = connection.prepareTupleQuery(query)
49+
.evaluate()
50+
.stream()
51+
.count();
52+
assertEquals(expected, actual, "Unexpected count for PHARMA query index 10");
53+
}
54+
} finally {
55+
repository.shutDown();
56+
FileUtils.deleteDirectory(dataDir);
57+
}
58+
}
59+
60+
private static void loadData(Theme theme, SailRepository repository) throws IOException {
61+
try (SailRepositoryConnection connection = repository.getConnection()) {
62+
connection.begin(IsolationLevels.NONE);
63+
ThemeDataSetGenerator.generate(theme, new RDFInserter(connection));
64+
connection.commit();
65+
}
66+
}
67+
}

core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/ThemeQueryBenchmark.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,12 @@ public class ThemeQueryBenchmark {
8585
})
8686
public String themeName;
8787

88+
@Param({
89+
"false",
90+
"true"
91+
})
92+
public boolean pageCardinalityEstimator;
93+
8894
private File dataDir;
8995
private SailRepository repository;
9096
private Theme theme;
@@ -106,7 +112,7 @@ public void setup() throws IOException {
106112
expected = ThemeQueryCatalog.expectedCountFor(theme, z_queryIndex);
107113
dataDir = Files.newTemporaryFolder();
108114
LmdbStoreConfig config = ConfigUtil.createConfig();
109-
config.setPageCardinalityEstimator(false );
115+
config.setPageCardinalityEstimator(pageCardinalityEstimator);
110116
repository = new SailRepository(new LmdbStore(dataDir, config));
111117
loadData();
112118
}

core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/ThemeQueryBenchmarkExplanationTest.java

Lines changed: 157 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,23 @@ void measureSocialMediaQuery10PlanningAndExecutionWithAndWithoutPageEstimator()
120120
for (boolean pageEstimatorEnabled : new boolean[] { false, true }) {
121121
TimingStats stats = runTimingScenario(pageEstimatorEnabled, query, expectedCount);
122122
OptimizerCardinalityStats optimizerStats = runJoinOptimizerCardinalityScenario(pageEstimatorEnabled, query);
123+
OptimizerCardinalityStats optimizerColdStats = runJoinOptimizerCardinalityColdScenario(
124+
pageEstimatorEnabled, query);
123125
StatementPatternCardinalityStats patternStats = runStatementPatternCardinalityScenario(pageEstimatorEnabled,
124126
query);
125127
timings.append("pageCardinalityEstimator=").append(pageEstimatorEnabled).append('\n');
126128
timings.append(String.format(Locale.ROOT, "avgPrepareMillis=%.3f%n", stats.avgPrepareMillis()));
127129
timings.append(String.format(Locale.ROOT, "avgEvaluateMillis=%.3f%n", stats.avgEvaluateMillis()));
130+
timings.append(String.format(Locale.ROOT, "avgFreshConnectionPrepareEvaluateMillis=%.3f%n",
131+
stats.avgFreshConnectionPrepareEvaluateMillis()));
132+
timings.append(String.format(Locale.ROOT, "avgFreshConnectionPrepareEvaluateNoTimeoutMillis=%.3f%n",
133+
stats.avgFreshConnectionPrepareEvaluateNoTimeoutMillis()));
134+
timings.append(String.format(Locale.ROOT, "avgFreshConnectionOpenCloseMillis=%.3f%n",
135+
stats.avgFreshConnectionOpenCloseMillis()));
136+
timings.append(String.format(Locale.ROOT, "avgFreshConnectionQueryOnlyMillis=%.3f%n",
137+
stats.avgFreshConnectionQueryOnlyMillis()));
138+
timings.append(String.format(Locale.ROOT, "avgFreshConnectionQueryOnlyNoTimeoutMillis=%.3f%n",
139+
stats.avgFreshConnectionQueryOnlyNoTimeoutMillis()));
128140
timings.append(String.format(Locale.ROOT, "avgPreparedReuseEvaluateMillis=%.3f%n",
129141
stats.avgPreparedReuseEvaluateMillis()));
130142
timings.append(String.format(Locale.ROOT, "avgOptimizedExplainMillis=%.3f%n",
@@ -143,6 +155,20 @@ void measureSocialMediaQuery10PlanningAndExecutionWithAndWithoutPageEstimator()
143155
optimizerStats.avgOptimizerJoinCardinalityCalls()));
144156
timings.append(String.format(Locale.ROOT, "optimizerCardinalityShare=%.3f%n",
145157
optimizerStats.optimizerCardinalityShare()));
158+
timings.append(String.format(Locale.ROOT, "avgJoinOptimizerColdMillis=%.3f%n",
159+
optimizerColdStats.avgJoinOptimizerMillis()));
160+
timings.append(String.format(Locale.ROOT, "avgOptimizerColdCardinalityMillis=%.3f%n",
161+
optimizerColdStats.avgOptimizerCardinalityMillis()));
162+
timings.append(String.format(Locale.ROOT, "avgOptimizerColdNonCardinalityMillis=%.3f%n",
163+
optimizerColdStats.avgOptimizerNonCardinalityMillis()));
164+
timings.append(String.format(Locale.ROOT, "avgOptimizerColdCardinalityCalls=%.1f%n",
165+
optimizerColdStats.avgOptimizerCardinalityCalls()));
166+
timings.append(String.format(Locale.ROOT, "avgOptimizerColdStatementPatternCardinalityCalls=%.1f%n",
167+
optimizerColdStats.avgOptimizerStatementPatternCardinalityCalls()));
168+
timings.append(String.format(Locale.ROOT, "avgOptimizerColdJoinCardinalityCalls=%.1f%n",
169+
optimizerColdStats.avgOptimizerJoinCardinalityCalls()));
170+
timings.append(String.format(Locale.ROOT, "optimizerColdCardinalityShare=%.3f%n",
171+
optimizerColdStats.optimizerCardinalityShare()));
146172
timings.append(patternStats.renderBlock());
147173
timings.append("measuredRuns=").append(stats.measuredRuns).append("\n\n");
148174
}
@@ -171,6 +197,9 @@ private static TimingStats runTimingScenario(boolean pageEstimatorEnabled, Strin
171197

172198
long prepareNanos = 0;
173199
long evaluateNanos = 0;
200+
long freshConnectionPrepareEvaluateNanos = 0;
201+
long freshConnectionPrepareEvaluateNoTimeoutNanos = 0;
202+
long freshConnectionOpenCloseNanos = 0;
174203
long preparedReuseEvaluateNanos = 0;
175204
long optimizedExplainNanos = 0;
176205

@@ -193,6 +222,56 @@ private static TimingStats runTimingScenario(boolean pageEstimatorEnabled, Strin
193222
}
194223
}
195224

225+
for (int i = 0; i < WARMUP_RUNS; i++) {
226+
try (SailRepositoryConnection connection = repository.getConnection()) {
227+
var warmupQuery = connection.prepareTupleQuery(query);
228+
warmupQuery.setMaxExecutionTime(180);
229+
long warmupCount = warmupQuery.evaluate().stream().count();
230+
assertEquals(expectedCount, warmupCount, "Unexpected fresh-connection warmup count");
231+
}
232+
}
233+
for (int i = 0; i < MEASURED_RUNS; i++) {
234+
long start = System.nanoTime();
235+
try (SailRepositoryConnection connection = repository.getConnection()) {
236+
var measuredQuery = connection.prepareTupleQuery(query);
237+
measuredQuery.setMaxExecutionTime(180);
238+
long count = measuredQuery.evaluate().stream().count();
239+
assertEquals(expectedCount, count, "Unexpected fresh-connection measured count");
240+
}
241+
long end = System.nanoTime();
242+
freshConnectionPrepareEvaluateNanos += (end - start);
243+
}
244+
245+
for (int i = 0; i < WARMUP_RUNS; i++) {
246+
try (SailRepositoryConnection connection = repository.getConnection()) {
247+
long warmupCount = connection.prepareTupleQuery(query).evaluate().stream().count();
248+
assertEquals(expectedCount, warmupCount, "Unexpected fresh-connection no-timeout warmup count");
249+
}
250+
}
251+
for (int i = 0; i < MEASURED_RUNS; i++) {
252+
long start = System.nanoTime();
253+
try (SailRepositoryConnection connection = repository.getConnection()) {
254+
long count = connection.prepareTupleQuery(query).evaluate().stream().count();
255+
assertEquals(expectedCount, count, "Unexpected fresh-connection no-timeout measured count");
256+
}
257+
long end = System.nanoTime();
258+
freshConnectionPrepareEvaluateNoTimeoutNanos += (end - start);
259+
}
260+
261+
for (int i = 0; i < WARMUP_RUNS; i++) {
262+
try (SailRepositoryConnection ignoredConnection = repository.getConnection()) {
263+
// warmup
264+
}
265+
}
266+
for (int i = 0; i < MEASURED_RUNS; i++) {
267+
long start = System.nanoTime();
268+
try (SailRepositoryConnection ignoredConnection = repository.getConnection()) {
269+
// measure pure connection overhead
270+
}
271+
long end = System.nanoTime();
272+
freshConnectionOpenCloseNanos += (end - start);
273+
}
274+
196275
try (SailRepositoryConnection connection = repository.getConnection()) {
197276
var preparedQuery = connection.prepareTupleQuery(query);
198277
for (int i = 0; i < WARMUP_RUNS; i++) {
@@ -226,8 +305,9 @@ private static TimingStats runTimingScenario(boolean pageEstimatorEnabled, Strin
226305
}
227306
}
228307

229-
return new TimingStats(prepareNanos, evaluateNanos, preparedReuseEvaluateNanos, optimizedExplainNanos,
230-
MEASURED_RUNS);
308+
return new TimingStats(prepareNanos, evaluateNanos, freshConnectionPrepareEvaluateNanos,
309+
freshConnectionPrepareEvaluateNoTimeoutNanos, freshConnectionOpenCloseNanos,
310+
preparedReuseEvaluateNanos, optimizedExplainNanos, MEASURED_RUNS);
231311
} finally {
232312
repository.shutDown();
233313
FileUtils.deleteDirectory(dataDir);
@@ -276,6 +356,49 @@ private static OptimizerCardinalityStats runJoinOptimizerCardinalityScenario(boo
276356
}
277357
}
278358

359+
private static OptimizerCardinalityStats runJoinOptimizerCardinalityColdScenario(boolean pageEstimatorEnabled,
360+
String query)
361+
throws Exception {
362+
File dataDir = Files.createTempDirectory("theme-query-optimizer-cold").toFile();
363+
LmdbStoreConfig config = ConfigUtil.createConfig().setPageCardinalityEstimator(pageEstimatorEnabled);
364+
SailRepository repository = new SailRepository(new LmdbStore(dataDir, config));
365+
try {
366+
loadData(SOCIAL_MEDIA_THEME, repository);
367+
ParsedTupleQuery parsed = QueryParserUtil.parseTupleQuery(QueryLanguage.SPARQL, query, null);
368+
369+
for (int i = 0; i < WARMUP_RUNS; i++) {
370+
EvaluationStatistics warmupDelegate = extractEvaluationStatistics(repository);
371+
MeasuringEvaluationStatistics warmupStats = new MeasuringEvaluationStatistics(warmupDelegate);
372+
runJoinOptimizerOnce(parsed, warmupStats);
373+
}
374+
375+
long optimizerNanos = 0;
376+
long cardinalityNanos = 0;
377+
long cardinalityCalls = 0;
378+
long statementPatternCardinalityCalls = 0;
379+
long joinCardinalityCalls = 0;
380+
381+
for (int i = 0; i < MEASURED_RUNS; i++) {
382+
EvaluationStatistics delegate = extractEvaluationStatistics(repository);
383+
MeasuringEvaluationStatistics measuringStatistics = new MeasuringEvaluationStatistics(delegate);
384+
long runStart = System.nanoTime();
385+
runJoinOptimizerOnce(parsed, measuringStatistics);
386+
long runEnd = System.nanoTime();
387+
optimizerNanos += (runEnd - runStart);
388+
cardinalityNanos += measuringStatistics.cardinalityNanos;
389+
cardinalityCalls += measuringStatistics.cardinalityCalls;
390+
statementPatternCardinalityCalls += measuringStatistics.statementPatternCardinalityCalls;
391+
joinCardinalityCalls += measuringStatistics.joinCardinalityCalls;
392+
}
393+
394+
return new OptimizerCardinalityStats(optimizerNanos, cardinalityNanos, cardinalityCalls,
395+
statementPatternCardinalityCalls, joinCardinalityCalls, MEASURED_RUNS);
396+
} finally {
397+
repository.shutDown();
398+
FileUtils.deleteDirectory(dataDir);
399+
}
400+
}
401+
279402
private static void runJoinOptimizerOnce(ParsedTupleQuery parsed, MeasuringEvaluationStatistics statistics) {
280403
TupleExpr tupleExpr = parsed.getTupleExpr().clone();
281404
new QueryJoinOptimizer(statistics).optimize(tupleExpr, null, EmptyBindingSet.getInstance());
@@ -310,14 +433,21 @@ private static StatementPatternCardinalityStats runStatementPatternCardinalitySc
310433
private static final class TimingStats {
311434
private final long prepareNanos;
312435
private final long evaluateNanos;
436+
private final long freshConnectionPrepareEvaluateNanos;
437+
private final long freshConnectionPrepareEvaluateNoTimeoutNanos;
438+
private final long freshConnectionOpenCloseNanos;
313439
private final long preparedReuseEvaluateNanos;
314440
private final long optimizedExplainNanos;
315441
private final int measuredRuns;
316442

317-
private TimingStats(long prepareNanos, long evaluateNanos, long preparedReuseEvaluateNanos,
318-
long optimizedExplainNanos, int measuredRuns) {
443+
private TimingStats(long prepareNanos, long evaluateNanos, long freshConnectionPrepareEvaluateNanos,
444+
long freshConnectionPrepareEvaluateNoTimeoutNanos, long freshConnectionOpenCloseNanos,
445+
long preparedReuseEvaluateNanos, long optimizedExplainNanos, int measuredRuns) {
319446
this.prepareNanos = prepareNanos;
320447
this.evaluateNanos = evaluateNanos;
448+
this.freshConnectionPrepareEvaluateNanos = freshConnectionPrepareEvaluateNanos;
449+
this.freshConnectionPrepareEvaluateNoTimeoutNanos = freshConnectionPrepareEvaluateNoTimeoutNanos;
450+
this.freshConnectionOpenCloseNanos = freshConnectionOpenCloseNanos;
321451
this.preparedReuseEvaluateNanos = preparedReuseEvaluateNanos;
322452
this.optimizedExplainNanos = optimizedExplainNanos;
323453
this.measuredRuns = measuredRuns;
@@ -331,6 +461,29 @@ private double avgEvaluateMillis() {
331461
return nanosToMillis(evaluateNanos) / measuredRuns;
332462
}
333463

464+
private double avgFreshConnectionPrepareEvaluateMillis() {
465+
return nanosToMillis(freshConnectionPrepareEvaluateNanos) / measuredRuns;
466+
}
467+
468+
private double avgFreshConnectionPrepareEvaluateNoTimeoutMillis() {
469+
return nanosToMillis(freshConnectionPrepareEvaluateNoTimeoutNanos) / measuredRuns;
470+
}
471+
472+
private double avgFreshConnectionOpenCloseMillis() {
473+
return nanosToMillis(freshConnectionOpenCloseNanos) / measuredRuns;
474+
}
475+
476+
private double avgFreshConnectionQueryOnlyMillis() {
477+
long queryOnlyNanos = Math.max(0L, freshConnectionPrepareEvaluateNanos - freshConnectionOpenCloseNanos);
478+
return nanosToMillis(queryOnlyNanos) / measuredRuns;
479+
}
480+
481+
private double avgFreshConnectionQueryOnlyNoTimeoutMillis() {
482+
long queryOnlyNanos = Math.max(0L,
483+
freshConnectionPrepareEvaluateNoTimeoutNanos - freshConnectionOpenCloseNanos);
484+
return nanosToMillis(queryOnlyNanos) / measuredRuns;
485+
}
486+
334487
private double avgPreparedReuseEvaluateMillis() {
335488
return nanosToMillis(preparedReuseEvaluateNanos) / measuredRuns;
336489
}

0 commit comments

Comments
 (0)