From 3381e6ba1492c6c2962629e2cceb7e4bfdfc861a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 11:16:54 +0200 Subject: [PATCH 01/32] implement benchmark --- .../benchmark/FoafCliqueDataGenerator.java | 176 ++++++++++++++++++ .../benchmark/FoafCliqueQueryBenchmark.java | 163 ++++++++++++++++ 2 files changed, 339 insertions(+) create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueDataGenerator.java create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueDataGenerator.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueDataGenerator.java new file mode 100644 index 0000000000..42be1165c1 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueDataGenerator.java @@ -0,0 +1,176 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb.benchmark; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Random; +import java.util.Set; + +import org.eclipse.rdf4j.common.transaction.IsolationLevels; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.vocabulary.FOAF; +import org.eclipse.rdf4j.model.vocabulary.RDF; +import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; + +final class FoafCliqueDataGenerator { + + private static final String PERSON_NAMESPACE = "http://example.org/foaf/person/"; + private static final int BATCH_SIZE = 10_000; + + private final int peopleCount; + private final int cliquePercentage; + private final int minCliqueSize; + private final int maxCliqueSize; + private final int randomKnowsEdges; + private final Random random; + + private final List people = new ArrayList<>(); + private final Set knowsEdges = new HashSet<>(); + + private int pendingStatements; + + FoafCliqueDataGenerator(int peopleCount, int cliquePercentage, int minCliqueSize, int maxCliqueSize, + int randomKnowsEdges, long seed) { + if (peopleCount < 1) { + throw new IllegalArgumentException("peopleCount must be at least 1"); + } + if (cliquePercentage < 0 || cliquePercentage > 100) { + throw new IllegalArgumentException("cliquePercentage must be between 0 and 100"); + } + if (minCliqueSize < 3) { + throw new IllegalArgumentException("minCliqueSize must be at least 3"); + } + if (maxCliqueSize < minCliqueSize) { + throw new IllegalArgumentException("maxCliqueSize must be >= minCliqueSize"); + } + if (randomKnowsEdges < 0) { + throw new IllegalArgumentException("randomKnowsEdges must be >= 0"); + } + this.peopleCount = peopleCount; + this.cliquePercentage = cliquePercentage; + this.minCliqueSize = minCliqueSize; + this.maxCliqueSize = maxCliqueSize; + this.randomKnowsEdges = randomKnowsEdges; + this.random = new Random(seed); + } + + void populate(SailRepositoryConnection connection) { + ValueFactory valueFactory = connection.getValueFactory(); + knowsEdges.clear(); + pendingStatements = 0; + connection.begin(IsolationLevels.NONE); + try { + createPeople(connection, valueFactory); + createCliques(connection); + createRandomKnowsEdges(connection); + flush(connection); + } catch (RuntimeException e) { + if (connection.isActive()) { + connection.rollback(); + } + throw e; + } + } + + private void createPeople(SailRepositoryConnection connection, ValueFactory valueFactory) { + people.clear(); + for (int i = 0; i < peopleCount; i++) { + IRI person = valueFactory.createIRI(PERSON_NAMESPACE + i); + people.add(person); + addStatement(connection, person, RDF.TYPE, FOAF.PERSON); + } + } + + private void createCliques(SailRepositoryConnection connection) { + List available = new ArrayList<>(peopleCount); + for (int i = 0; i < peopleCount; i++) { + available.add(i); + } + Collections.shuffle(available, random); + + int targetCliquePeople = Math.min(peopleCount, (int) Math.round((peopleCount * cliquePercentage) / 100.0)); + int assignedPeople = 0; + int cursor = 0; + + while (assignedPeople + minCliqueSize <= targetCliquePeople && cursor < available.size()) { + int remainingTarget = targetCliquePeople - assignedPeople; + int remainingAvailable = available.size() - cursor; + int effectiveMax = Math.min(maxCliqueSize, Math.min(remainingTarget, remainingAvailable)); + if (effectiveMax < minCliqueSize) { + break; + } + + int cliqueSize = minCliqueSize + random.nextInt(effectiveMax - minCliqueSize + 1); + List members = new ArrayList<>(cliqueSize); + for (int i = 0; i < cliqueSize; i++) { + members.add(available.get(cursor++)); + } + + for (int source : members) { + for (int target : members) { + if (source != target) { + addKnowsEdge(connection, source, target); + } + } + } + + assignedPeople += cliqueSize; + } + } + + private void createRandomKnowsEdges(SailRepositoryConnection connection) { + long maxEdges = (long) peopleCount * (peopleCount - 1); + int attempts = 0; + int added = 0; + int maxAttempts = Math.max(randomKnowsEdges * 20, peopleCount * 10); + + while (added < randomKnowsEdges && knowsEdges.size() < maxEdges && attempts++ < maxAttempts) { + int source = random.nextInt(peopleCount); + int target = random.nextInt(peopleCount); + if (source == target) { + continue; + } + if (addKnowsEdge(connection, source, target)) { + added++; + } + } + } + + private boolean addKnowsEdge(SailRepositoryConnection connection, int source, int target) { + long edgeKey = (((long) source) << 32) | (target & 0xffffffffL); + if (!knowsEdges.add(edgeKey)) { + return false; + } + addStatement(connection, people.get(source), FOAF.KNOWS, people.get(target)); + return true; + } + + private void addStatement(SailRepositoryConnection connection, IRI subject, IRI predicate, IRI object) { + connection.add(subject, predicate, object); + pendingStatements++; + if (pendingStatements >= BATCH_SIZE) { + flush(connection); + connection.begin(IsolationLevels.NONE); + } + } + + private void flush(SailRepositoryConnection connection) { + if (connection.isActive()) { + connection.commit(); + } + pendingStatements = 0; + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java new file mode 100644 index 0000000000..52db260637 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java @@ -0,0 +1,163 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb.benchmark; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.io.FileUtils; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; +import org.eclipse.rdf4j.sail.lmdb.LmdbStore; +import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +@State(Scope.Benchmark) +@Warmup(iterations = 2) +@BenchmarkMode(Mode.AverageTime) +@Fork(value = 1, jvmArgs = { "-Xms2G", "-Xmx2G", "-XX:+UseG1GC" }) +@Measurement(iterations = 3) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +public class FoafCliqueQueryBenchmark { + + private static final String QUERY_CYCLE_3 = cycleQuery(3); + private static final String QUERY_CYCLE_4 = cycleQuery(4); + private static final String QUERY_CYCLE_5 = cycleQuery(5); + + @Param({ "5000" }) + public int peopleCount; + + @Param({ "15" }) + public int cliquePercentage; + + @Param({ "3" }) + public int minCliqueSize; + + @Param({ "8" }) + public int maxCliqueSize; + + @Param({ "15000" }) + public int randomKnowsEdges; + + @Param({ "12345" }) + public long seed; + + private File dataDir; + private SailRepository repository; + + public static void main(String[] args) throws RunnerException { + new Runner(new OptionsBuilder() + .include("FoafCliqueQueryBenchmark") + .forks(1) + .build()).run(); + } + + @Setup(Level.Trial) + public void setup() throws IOException { + dataDir = Files.createTempDirectory("rdf4j-lmdb-foaf-cliques").toFile(); + repository = new SailRepository(new LmdbStore(dataDir, createLftjBenchmarkConfig())); + repository.init(); + + try (SailRepositoryConnection connection = repository.getConnection()) { + new FoafCliqueDataGenerator(peopleCount, cliquePercentage, minCliqueSize, maxCliqueSize, randomKnowsEdges, + seed).populate(connection); + } + } + + @TearDown(Level.Trial) + public void tearDown() throws IOException { + if (repository != null) { + repository.shutDown(); + } + if (dataDir != null && dataDir.exists()) { + FileUtils.deleteDirectory(dataDir); + } + } + + @Benchmark + public long cycle3() { + return executeCount(QUERY_CYCLE_3); + } + + @Benchmark + public long cycle4() { + return executeCount(QUERY_CYCLE_4); + } + + @Benchmark + public long cycle5() { + return executeCount(QUERY_CYCLE_5); + } + + private long executeCount(String query) { + try (SailRepositoryConnection connection = repository.getConnection()) { + return connection.prepareTupleQuery(query).evaluate().stream().count(); + } + } + + private static LmdbStoreConfig createLftjBenchmarkConfig() { + LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); + config.setForceSync(false); + config.setValueDBSize(1_073_741_824L); + config.setTripleDBSize(config.getValueDBSize()); + return config; + } + + private static String cycleQuery(int size) { + StringBuilder builder = new StringBuilder(); + builder.append("PREFIX foaf: \n"); + builder.append("SELECT * WHERE {\n"); + for (int i = 0; i < size; i++) { + builder.append(" ?") + .append(variableName(i)) + .append(" foaf:knows ?") + .append(variableName((i + 1) % size)) + .append(" .\n"); + } + builder.append(" FILTER ("); + boolean first = true; + for (int i = 0; i < size; i++) { + for (int j = i + 1; j < size; j++) { + if (!first) { + builder.append(" && "); + } + builder.append("?").append(variableName(i)).append(" != ?").append(variableName(j)); + first = false; + } + } + builder.append(")\n"); + builder.append("}\n"); + builder.append("LIMIT 10\n"); + return builder.toString(); + } + + private static char variableName(int index) { + return (char) ('a' + index); + } +} From 1a5213f846b841b0884ed8effb481ffbad1075b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 11:17:15 +0200 Subject: [PATCH 02/32] implement lftj --- .../rdf4j/sail/base/SailSourceConnection.java | 10 +- .../rdf4j/sail/lmdb/LmdbLftjBindingState.java | 117 ++++++ .../sail/lmdb/LmdbLftjEvaluationStrategy.java | 54 +++ .../LmdbLftjEvaluationStrategyFactory.java | 39 ++ .../rdf4j/sail/lmdb/LmdbLftjExecutor.java | 213 +++++++++++ .../rdf4j/sail/lmdb/LmdbLftjMetrics.java | 43 +++ .../rdf4j/sail/lmdb/LmdbLftjOptimizer.java | 124 ++++++ .../sail/lmdb/LmdbLftjOptimizerPipeline.java | 60 +++ .../rdf4j/sail/lmdb/LmdbLftjPatternPlan.java | 162 ++++++++ .../eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java | 88 +++++ .../rdf4j/sail/lmdb/LmdbLftjPlanner.java | 358 ++++++++++++++++++ .../rdf4j/sail/lmdb/LmdbLftjTripleSource.java | 67 ++++ .../rdf4j/sail/lmdb/LmdbLftjTupleExpr.java | 80 ++++ .../rdf4j/sail/lmdb/LmdbQueryAccess.java | 36 ++ .../rdf4j/sail/lmdb/LmdbSailStore.java | 8 + .../eclipse/rdf4j/sail/lmdb/LmdbStore.java | 16 +- .../rdf4j/sail/lmdb/LmdbStoreConnection.java | 109 +++++- .../rdf4j/sail/lmdb/LmdbTrieCursor.java | 73 ++++ .../rdf4j/sail/lmdb/LmdbUnionTrieCursor.java | 54 +++ .../eclipse/rdf4j/sail/lmdb/TripleStore.java | 20 + .../sail/lmdb/config/LmdbStoreConfig.java | 24 ++ .../sail/lmdb/config/LmdbStoreSchema.java | 6 + .../rdf4j/sail/lmdb/LmdbSailStoreTest.java | 164 ++++++++ .../sail/lmdb/config/LmdbStoreConfigTest.java | 19 + 24 files changed, 1941 insertions(+), 3 deletions(-) create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjEvaluationStrategy.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjEvaluationStrategyFactory.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerPipeline.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlanner.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTripleSource.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTupleExpr.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java diff --git a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailSourceConnection.java b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailSourceConnection.java index 8b1e5d8300..fdaadd0a70 100644 --- a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailSourceConnection.java +++ b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailSourceConnection.java @@ -18,6 +18,7 @@ import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Function; import java.util.stream.Stream; import org.eclipse.rdf4j.common.iteration.CloseableIteration; @@ -222,6 +223,13 @@ protected EvaluationStrategy getEvaluationStrategy(Dataset dataset, TripleSource @Override protected CloseableIteration evaluateInternal(TupleExpr tupleExpr, Dataset dataset, BindingSet bindings, boolean includeInferred) throws SailException { + return evaluateWithTripleSource(tupleExpr, dataset, bindings, includeInferred, + rdfDataset -> new SailDatasetTripleSource(vf, rdfDataset)); + } + + protected CloseableIteration evaluateWithTripleSource(TupleExpr tupleExpr, + Dataset dataset, BindingSet bindings, boolean includeInferred, + Function tripleSourceFactory) throws SailException { logger.trace("Incoming query model:\n{}", tupleExpr); if (cloneTupleExpression) { @@ -243,7 +251,7 @@ protected CloseableIteration evaluateInternal(TupleExpr tu branch = branch(IncludeInferred.fromBoolean(includeInferred)); rdfDataset = branch.dataset(getIsolationLevel()); - TripleSource tripleSource = new SailDatasetTripleSource(vf, rdfDataset); + TripleSource tripleSource = tripleSourceFactory.apply(rdfDataset); EvaluationStrategy strategy = getEvaluationStrategy(dataset, tripleSource); if (trackResultSize) { strategy.setTrackResultSize(trackResultSize); diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java new file mode 100644 index 0000000000..caaa9c4813 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java @@ -0,0 +1,117 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.HashMap; +import java.util.IdentityHashMap; +import java.util.Map; + +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.MutableBindingSet; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; +import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue; + +final class LmdbLftjBindingState { + + private final LmdbLftjPlan plan; + private final BindingSet inputBindings; + private final LmdbQueryAccess queryAccess; + private final Map fixedValues = new HashMap<>(); + private final Map assignedValues = new HashMap<>(); + private final IdentityHashMap constantIds = new IdentityHashMap<>(); + + private TxnManager.Txn txn; + + LmdbLftjBindingState(LmdbLftjPlan plan, BindingSet inputBindings, LmdbQueryAccess queryAccess) { + this.plan = plan; + this.inputBindings = inputBindings; + this.queryAccess = queryAccess; + } + + boolean initialize() { + for (LmdbLftjPatternPlan patternPlan : plan.patternPlans()) { + for (LmdbLftjPatternPlan.TermRef term : patternPlan.terms()) { + if (term.isConstant() && !constantIds.containsKey(term)) { + long id = queryAccess.resolveId(term.constantValue()); + if (id == LmdbValue.UNKNOWN_ID) { + return false; + } + constantIds.put(term, id); + } + } + } + for (String variableName : plan.variableOrder()) { + if (inputBindings.hasBinding(variableName)) { + long id = queryAccess.resolveId(inputBindings.getValue(variableName)); + if (id == LmdbValue.UNKNOWN_ID) { + return false; + } + fixedValues.put(variableName, id); + } + } + return true; + } + + void attachTxn(TxnManager.Txn txn) { + this.txn = txn; + } + + TxnManager.Txn txn() { + return txn; + } + + boolean isBound(String variableName) { + return assignedValues.containsKey(variableName) || fixedValues.containsKey(variableName); + } + + long value(String variableName) { + if (assignedValues.containsKey(variableName)) { + return assignedValues.get(variableName); + } + return fixedValues.get(variableName); + } + + void assign(String variableName, long value) { + assignedValues.put(variableName, value); + } + + void clear(String variableName) { + assignedValues.remove(variableName); + } + + long fixedId(LmdbLftjPatternPlan.TermRef term) { + if (term.isConstant()) { + return constantIds.get(term); + } + if (term.isVisible() && isBound(term.name())) { + return value(term.name()); + } + return -1; + } + + BindingSet materialize(QueryEvaluationContext context) { + MutableBindingSet result = context.createBindingSet(inputBindings); + for (String variableName : plan.variableOrder()) { + if (!result.hasBinding(variableName) && isBound(variableName)) { + context.setBinding(variableName).accept(queryAccess.resolveValue(value(variableName)), result); + } + } + return result; + } + + void close() { + if (txn != null) { + queryAccess.releaseReadTxn(txn); + txn = null; + } + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjEvaluationStrategy.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjEvaluationStrategy.java new file mode 100644 index 0000000000..eb5dc7477b --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjEvaluationStrategy.java @@ -0,0 +1,54 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.function.Supplier; + +import org.eclipse.rdf4j.collection.factory.api.CollectionFactory; +import org.eclipse.rdf4j.query.Dataset; +import org.eclipse.rdf4j.query.algebra.TupleExpr; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; +import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource; +import org.eclipse.rdf4j.query.algebra.evaluation.federation.FederatedServiceResolver; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.StrictEvaluationStrategy; + +final class LmdbLftjEvaluationStrategy extends StrictEvaluationStrategy { + + private final LmdbLftjExecutor executor; + + LmdbLftjEvaluationStrategy(TripleSource tripleSource, Dataset dataset, + FederatedServiceResolver serviceResolver, long iterationCacheSyncThreshold, + EvaluationStatistics evaluationStatistics, boolean trackResultSize, + Supplier collectionFactorySupplier) { + super(tripleSource, dataset, serviceResolver, iterationCacheSyncThreshold, evaluationStatistics, + trackResultSize); + this.executor = new LmdbLftjExecutor(this); + setCollectionFactory(collectionFactorySupplier); + } + + @Override + public QueryEvaluationStep precompile(TupleExpr expr, QueryEvaluationContext context) { + if (expr instanceof LmdbLftjTupleExpr) { + return executor.prepare((LmdbLftjTupleExpr) expr, context); + } + return super.precompile(expr, context); + } + + LmdbQueryAccess queryAccess() { + if (tripleSource instanceof LmdbLftjTripleSource) { + return ((LmdbLftjTripleSource) tripleSource).queryAccess(); + } + return null; + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjEvaluationStrategyFactory.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjEvaluationStrategyFactory.java new file mode 100644 index 0000000000..046e239bb5 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjEvaluationStrategyFactory.java @@ -0,0 +1,39 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import org.eclipse.rdf4j.query.Dataset; +import org.eclipse.rdf4j.query.algebra.evaluation.EvaluationStrategy; +import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.StrictEvaluationStrategyFactory; + +final class LmdbLftjEvaluationStrategyFactory extends StrictEvaluationStrategyFactory { + + LmdbLftjEvaluationStrategyFactory(LmdbStore store) { + super(store.getFederatedServiceResolver()); + } + + @Override + public EvaluationStrategy createEvaluationStrategy(Dataset dataset, TripleSource tripleSource, + EvaluationStatistics evaluationStatistics) { + LmdbLftjEvaluationStrategy strategy = new LmdbLftjEvaluationStrategy(tripleSource, dataset, + getFederatedServiceResolver(), getQuerySolutionCacheThreshold(), evaluationStatistics, + isTrackResultSize(), collectionFactorySupplier); + if (getOptimizerPipeline().isPresent()) { + strategy.setOptimizerPipeline(getOptimizerPipeline().get()); + } else { + strategy.setOptimizerPipeline(new LmdbLftjOptimizerPipeline(strategy, tripleSource, evaluationStatistics)); + } + return strategy; + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java new file mode 100644 index 0000000000..28c515c1b3 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java @@ -0,0 +1,213 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; + +import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.common.iteration.CloseableIteratorIteration; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.QueryEvaluationException; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; + +final class LmdbLftjExecutor { + + private final LmdbLftjEvaluationStrategy strategy; + + LmdbLftjExecutor(LmdbLftjEvaluationStrategy strategy) { + this.strategy = strategy; + } + + QueryEvaluationStep prepare(LmdbLftjTupleExpr node, QueryEvaluationContext context) { + QueryEvaluationStep fallback = strategy.precompile(node.plan().fallbackExpr().clone(), context); + return bindings -> evaluate(node.plan(), context, fallback, bindings); + } + + private CloseableIteration evaluate(LmdbLftjPlan plan, QueryEvaluationContext context, + QueryEvaluationStep fallback, BindingSet bindings) { + LmdbQueryAccess queryAccess = strategy.queryAccess(); + if (queryAccess == null) { + return fallback.evaluate(bindings); + } + + LmdbLftjBindingState state = new LmdbLftjBindingState(plan, bindings, queryAccess); + if (!state.initialize()) { + return fallback.evaluate(bindings); + } + + try { + state.attachTxn(queryAccess.acquireReadTxn()); + List results = new ArrayList<>(); + LmdbLftjMetrics metrics = new LmdbLftjMetrics(); + search(plan, state, context, queryAccess, metrics, results, 0); + if (results.isEmpty()) { + return QueryEvaluationStep.EMPTY_ITERATION; + } + return new CloseableIteratorIteration<>(results.iterator()); + } catch (RuntimeException e) { + throw new QueryEvaluationException("LMDB LFTJ execution failed", e); + } finally { + state.close(); + } + } + + private void search(LmdbLftjPlan plan, LmdbLftjBindingState state, QueryEvaluationContext context, + LmdbQueryAccess queryAccess, LmdbLftjMetrics metrics, List results, int depth) { + if (depth >= plan.variableOrder().size()) { + long multiplicity = witnessMultiplicity(plan, state, queryAccess, metrics); + if (multiplicity <= 0) { + return; + } + for (long i = 0; i < multiplicity; i++) { + results.add(state.materialize(context)); + } + metrics.recordEmitted(multiplicity); + return; + } + + String variableName = plan.variableOrder().get(depth); + if (state.isBound(variableName)) { + search(plan, state, context, queryAccess, metrics, results, depth + 1); + return; + } + + List cursors = createCursors(plan, state, queryAccess, metrics, variableName); + if (cursors.isEmpty()) { + return; + } + + long current = cursors.stream().mapToLong(LmdbTrieCursor::value).max().orElseThrow(); + while (true) { + current = align(cursors, current); + if (current < 0) { + return; + } + state.assign(variableName, current); + search(plan, state, context, queryAccess, metrics, results, depth + 1); + state.clear(variableName); + if (!cursors.get(0).next()) { + return; + } + current = cursors.get(0).value(); + } + } + + private List createCursors(LmdbLftjPlan plan, LmdbLftjBindingState state, + LmdbQueryAccess queryAccess, + LmdbLftjMetrics metrics, String variableName) { + List cursors = new ArrayList<>(); + for (LmdbLftjPatternPlan patternPlan : plan.patternPlans()) { + if (!patternPlan.containsVariable(variableName)) { + continue; + } + LmdbTrieCursor cursor = queryAccess.includeInferred() + ? new LmdbUnionTrieCursor(patternPlan, variableName, queryAccess) + : new LmdbTrieCursor(patternPlan, variableName, queryAccess, true); + metrics.recordCandidateScan(); + if (!cursor.initialize(state)) { + return List.of(); + } + cursors.add(cursor); + } + return cursors; + } + + private long align(List cursors, long target) { + long current = target; + while (true) { + boolean allMatch = true; + long max = current; + for (LmdbTrieCursor cursor : cursors) { + if (!cursor.seek(current)) { + return -1; + } + if (cursor.value() != current) { + max = Math.max(max, cursor.value()); + allMatch = false; + } + } + if (allMatch) { + return current; + } + current = max; + } + } + + private long witnessMultiplicity(LmdbLftjPlan plan, LmdbLftjBindingState state, LmdbQueryAccess queryAccess, + LmdbLftjMetrics metrics) { + long multiplicity = 1; + for (LmdbLftjPatternPlan patternPlan : plan.patternPlans()) { + long witnesses = countMatches(patternPlan, state, queryAccess, metrics); + if (witnesses == 0) { + return 0; + } + multiplicity = Math.multiplyExact(multiplicity, witnesses); + } + return multiplicity; + } + + private long countMatches(LmdbLftjPatternPlan patternPlan, LmdbLftjBindingState state, LmdbQueryAccess queryAccess, + LmdbLftjMetrics metrics) { + long[] scanKey = patternPlan.scanKey(state); + Set matches = new HashSet<>(); + metrics.recordWitnessScan(); + collectMatches(matches, queryAccess, state, patternPlan, scanKey, true); + if (queryAccess.includeInferred()) { + collectMatches(matches, queryAccess, state, patternPlan, scanKey, false); + } + return matches.size(); + } + + private void collectMatches(Set matches, LmdbQueryAccess queryAccess, LmdbLftjBindingState state, + LmdbLftjPatternPlan patternPlan, long[] scanKey, boolean explicit) { + try (RecordIterator records = queryAccess.openScan(state.txn(), patternPlan.indexName(), scanKey[0], scanKey[1], + scanKey[2], scanKey[3], explicit)) { + long[] quad; + while ((quad = records.next()) != null) { + matches.add(new QuadKey(quad[0], quad[1], quad[2], quad[3])); + } + } + } + + private static final class QuadKey { + private final long subj; + private final long pred; + private final long obj; + private final long context; + + private QuadKey(long subj, long pred, long obj, long context) { + this.subj = subj; + this.pred = pred; + this.obj = obj; + this.context = context; + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof QuadKey)) { + return false; + } + QuadKey o = (QuadKey) other; + return subj == o.subj && pred == o.pred && obj == o.obj && context == o.context; + } + + @Override + public int hashCode() { + return Objects.hash(subj, pred, obj, context); + } + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java new file mode 100644 index 0000000000..16b8d40c2e --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java @@ -0,0 +1,43 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +final class LmdbLftjMetrics { + + private long candidateScans; + private long witnessScans; + private long emittedBindings; + + void recordCandidateScan() { + candidateScans++; + } + + void recordWitnessScan() { + witnessScans++; + } + + void recordEmitted(long count) { + emittedBindings += count; + } + + long candidateScans() { + return candidateScans; + } + + long witnessScans() { + return witnessScans; + } + + long emittedBindings() { + return emittedBindings; + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java new file mode 100644 index 0000000000..ad64a10ff8 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java @@ -0,0 +1,124 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.ArrayList; +import java.util.List; + +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.Dataset; +import org.eclipse.rdf4j.query.algebra.Join; +import org.eclipse.rdf4j.query.algebra.StatementPattern; +import org.eclipse.rdf4j.query.algebra.TupleExpr; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryOptimizer; +import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource; +import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +final class LmdbLftjOptimizer implements QueryOptimizer { + + private static final Logger logger = LoggerFactory.getLogger(LmdbLftjOptimizer.class); + + private final TripleSource tripleSource; + private final LmdbLftjPlanner planner = new LmdbLftjPlanner(); + + LmdbLftjOptimizer(TripleSource tripleSource) { + this.tripleSource = tripleSource; + } + + @Override + public void optimize(TupleExpr tupleExpr, Dataset dataset, BindingSet bindings) { + if (!(tripleSource instanceof LmdbLftjTripleSource)) { + return; + } + + LmdbQueryAccess queryAccess = ((LmdbLftjTripleSource) tripleSource).queryAccess(); + tupleExpr.visit(new AbstractQueryModelVisitor() { + @Override + public void meet(Join node) { + if (isTransformRoot(node) && transform(node, queryAccess)) { + return; + } + super.meet(node); + } + }); + } + + private boolean isTransformRoot(Join node) { + return !(node.getParentNode() instanceof Join); + } + + private boolean transform(Join node, LmdbQueryAccess queryAccess) { + List operands = new ArrayList<>(); + collectOperands(node, operands); + + List patterns = operands.stream() + .filter(StatementPattern.class::isInstance) + .map(StatementPattern.class::cast) + .toList(); + if (patterns.size() < 3) { + return false; + } + + TupleExpr fallbackExpr = rebuildJoin(patterns.stream().map(TupleExpr::clone).toList()); + LmdbLftjPlanner.PlanningResult plan = planner.plan(fallbackExpr, patterns, queryAccess.configuredIndexes()); + if (!plan.planned()) { + logger.debug("Skipping LMDB LFTJ for {}: {}", node.getSignature(), plan.rejectionReason()); + return false; + } + + LmdbLftjTupleExpr lftjNode = new LmdbLftjTupleExpr(plan.plan()); + List rebuiltOperands = new ArrayList<>(); + boolean inserted = false; + for (TupleExpr operand : operands) { + if (operand instanceof StatementPattern) { + if (!inserted) { + rebuiltOperands.add(lftjNode); + inserted = true; + } + continue; + } + rebuiltOperands.add(operand); + } + if (!inserted) { + rebuiltOperands.add(lftjNode); + } + + node.replaceWith(rebuildJoin(rebuiltOperands)); + return true; + } + + private void collectOperands(TupleExpr expr, List operands) { + if (expr instanceof Join) { + Join join = (Join) expr; + collectOperands(join.getLeftArg(), operands); + collectOperands(join.getRightArg(), operands); + return; + } + operands.add(expr); + } + + private TupleExpr rebuildJoin(List operands) { + if (operands.isEmpty()) { + throw new IllegalArgumentException("LMDB LFTJ requires at least one join operand"); + } + if (operands.size() == 1) { + return operands.get(0); + } + TupleExpr rebuilt = operands.get(0); + for (int i = 1; i < operands.size(); i++) { + rebuilt = new Join(rebuilt, operands.get(i)); + } + return rebuilt; + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerPipeline.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerPipeline.java new file mode 100644 index 0000000000..6a0dbb34cd --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerPipeline.java @@ -0,0 +1,60 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.List; + +import org.eclipse.rdf4j.query.algebra.evaluation.EvaluationStrategy; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryOptimizer; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryOptimizerPipeline; +import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics; +import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.ConstantOptimizer; +import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.QueryJoinOptimizer; +import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.RegexAsStringFunctionOptimizer; +import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.StandardQueryOptimizerPipeline; + +final class LmdbLftjOptimizerPipeline implements QueryOptimizerPipeline { + + private final EvaluationStrategy strategy; + private final TripleSource tripleSource; + private final EvaluationStatistics evaluationStatistics; + + LmdbLftjOptimizerPipeline(EvaluationStrategy strategy, TripleSource tripleSource, + EvaluationStatistics evaluationStatistics) { + this.strategy = strategy; + this.tripleSource = tripleSource; + this.evaluationStatistics = evaluationStatistics; + } + + @Override + public Iterable getOptimizers() { + return List.of( + StandardQueryOptimizerPipeline.BINDING_ASSIGNER, + StandardQueryOptimizerPipeline.BINDING_SET_ASSIGNMENT_INLINER, + new ConstantOptimizer(strategy), + new RegexAsStringFunctionOptimizer(tripleSource.getValueFactory()), + StandardQueryOptimizerPipeline.COMPARE_OPTIMIZER, + StandardQueryOptimizerPipeline.CONJUNCTIVE_CONSTRAINT_SPLITTER, + StandardQueryOptimizerPipeline.DISJUNCTIVE_CONSTRAINT_OPTIMIZER, + StandardQueryOptimizerPipeline.SAME_TERM_FILTER_OPTIMIZER, + StandardQueryOptimizerPipeline.UNION_SCOPE_CHANGE_OPTIMIZER, + StandardQueryOptimizerPipeline.QUERY_MODEL_NORMALIZER, + StandardQueryOptimizerPipeline.PROJECTION_REMOVAL_OPTIMIZER, + new QueryJoinOptimizer(evaluationStatistics, strategy.isTrackResultSize(), tripleSource), + new LmdbLftjOptimizer(tripleSource), + StandardQueryOptimizerPipeline.ITERATIVE_EVALUATION_OPTIMIZER, + StandardQueryOptimizerPipeline.FILTER_OPTIMIZER, + StandardQueryOptimizerPipeline.ORDER_LIMIT_OPTIMIZER + ); + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java new file mode 100644 index 0000000000..371ec95ddf --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java @@ -0,0 +1,162 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; + +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.query.algebra.StatementPattern; +import org.eclipse.rdf4j.query.algebra.Var; + +final class LmdbLftjPatternPlan { + + private final StatementPattern pattern; + private final String indexName; + private final TermRef subject; + private final TermRef predicate; + private final TermRef object; + private final TermRef context; + + LmdbLftjPatternPlan(StatementPattern pattern, String indexName) { + this.pattern = pattern.clone(); + this.indexName = indexName; + this.subject = TermRef.of(TripleStore.SUBJ_IDX, pattern.getSubjectVar()); + this.predicate = TermRef.of(TripleStore.PRED_IDX, pattern.getPredicateVar()); + this.object = TermRef.of(TripleStore.OBJ_IDX, pattern.getObjectVar()); + this.context = TermRef.of(TripleStore.CONTEXT_IDX, pattern.getContextVar()); + } + + StatementPattern pattern() { + return pattern; + } + + String indexName() { + return indexName; + } + + List terms() { + return List.of(subject, predicate, object, context); + } + + List visibleVariableNames() { + return terms().stream() + .filter(TermRef::isVisible) + .map(TermRef::name) + .distinct() + .collect(Collectors.toList()); + } + + boolean containsVariable(String name) { + return terms().stream().anyMatch(term -> term.matchesName(name)); + } + + TermRef term(char field) { + switch (field) { + case 's': + return subject; + case 'p': + return predicate; + case 'o': + return object; + case 'c': + return context; + default: + throw new IllegalArgumentException("Unknown LMDB field: " + field); + } + } + + long[] scanKey(LmdbLftjBindingState state) { + return new long[] { + state.fixedId(subject), + state.fixedId(predicate), + state.fixedId(object), + state.fixedId(context) + }; + } + + long valueFor(String variableName, long[] quad) { + return terms().stream() + .filter(term -> term.matchesName(variableName)) + .findFirst() + .map(term -> quad[term.component()]) + .orElseThrow(() -> new IllegalArgumentException("Pattern does not bind variable " + variableName)); + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof LmdbLftjPatternPlan)) { + return false; + } + LmdbLftjPatternPlan o = (LmdbLftjPatternPlan) other; + return Objects.equals(pattern, o.pattern) && Objects.equals(indexName, o.indexName); + } + + @Override + public int hashCode() { + return Objects.hash(pattern, indexName); + } + + static final class TermRef { + + private final int component; + private final String name; + private final boolean anonymous; + private final Value constantValue; + + private TermRef(int component, String name, boolean anonymous, Value constantValue) { + this.component = component; + this.name = name; + this.anonymous = anonymous; + this.constantValue = constantValue; + } + + static TermRef of(int component, Var var) { + if (var == null) { + return new TermRef(component, null, true, null); + } + if (var.hasValue()) { + return new TermRef(component, null, false, var.getValue()); + } + return new TermRef(component, var.getName(), var.isAnonymous(), null); + } + + int component() { + return component; + } + + String name() { + return name; + } + + Value constantValue() { + return constantValue; + } + + boolean isConstant() { + return constantValue != null; + } + + boolean isVisible() { + return !isConstant() && !anonymous && name != null; + } + + boolean isHidden() { + return !isConstant() && !isVisible(); + } + + boolean matchesName(String variableName) { + return isVisible() && name.equals(variableName); + } + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java new file mode 100644 index 0000000000..2f44a496d1 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java @@ -0,0 +1,88 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; + +import org.eclipse.rdf4j.query.algebra.TupleExpr; + +final class LmdbLftjPlan { + + private final TupleExpr fallbackExpr; + private final Set bindingNames; + private final Set assuredBindingNames; + private final List variableOrder; + private final List patternPlans; + + LmdbLftjPlan(TupleExpr fallbackExpr, Set bindingNames, Set assuredBindingNames, + List variableOrder, List patternPlans) { + this.fallbackExpr = fallbackExpr; + this.bindingNames = Set.copyOf(new LinkedHashSet<>(bindingNames)); + this.assuredBindingNames = Set.copyOf(new LinkedHashSet<>(assuredBindingNames)); + this.variableOrder = List.copyOf(variableOrder); + this.patternPlans = List.copyOf(patternPlans); + } + + TupleExpr fallbackExpr() { + return fallbackExpr; + } + + Set bindingNames() { + return bindingNames; + } + + Set assuredBindingNames() { + return assuredBindingNames; + } + + List variableOrder() { + return variableOrder; + } + + List patternPlans() { + return patternPlans; + } + + List indexNames() { + return patternPlans.stream().map(LmdbLftjPatternPlan::indexName).collect(Collectors.toList()); + } + + int patternCount() { + return patternPlans.size(); + } + + LmdbLftjPlan copy() { + return new LmdbLftjPlan(fallbackExpr.clone(), bindingNames, assuredBindingNames, variableOrder, patternPlans); + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof LmdbLftjPlan)) { + return false; + } + LmdbLftjPlan o = (LmdbLftjPlan) other; + return Objects.equals(fallbackExpr, o.fallbackExpr) + && Objects.equals(bindingNames, o.bindingNames) + && Objects.equals(assuredBindingNames, o.assuredBindingNames) + && Objects.equals(variableOrder, o.variableOrder) + && Objects.equals(patternPlans, o.patternPlans); + } + + @Override + public int hashCode() { + return Objects.hash(fallbackExpr, bindingNames, assuredBindingNames, variableOrder, patternPlans); + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlanner.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlanner.java new file mode 100644 index 0000000000..2fc99b35bc --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlanner.java @@ -0,0 +1,358 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.eclipse.rdf4j.query.algebra.StatementPattern; +import org.eclipse.rdf4j.query.algebra.TupleExpr; +import org.eclipse.rdf4j.query.algebra.Var; + +final class LmdbLftjPlanner { + + private static final Set REQUIRED_INDEXES = Set.of("spoc", "sopc", "psoc", "posc", "ospc", "opsc"); + + static boolean hasRequiredIndexCoverage(Set configuredIndexes) { + return configuredIndexes.containsAll(REQUIRED_INDEXES); + } + + PlanningResult plan(TupleExpr fallbackExpr, Collection patterns, Set configuredIndexes) { + if (patterns.size() < 3) { + return PlanningResult.rejected("too-few-patterns"); + } + if (configuredIndexes.isEmpty()) { + return PlanningResult.rejected("no-indexes"); + } + if (!hasRequiredIndexCoverage(configuredIndexes)) { + return PlanningResult.rejected("insufficient-index-profile"); + } + + List patternList = List.copyOf(patterns); + for (StatementPattern pattern : patternList) { + if (pattern.getScope() == StatementPattern.Scope.NAMED_CONTEXTS) { + return PlanningResult.rejected("named-context-scope"); + } + if (hasRepeatedVariable(pattern)) { + return PlanningResult.rejected("repeated-variable"); + } + } + if (!isCyclic(patternList)) { + return PlanningResult.rejected("acyclic-bgp"); + } + + List visibleVariables = collectVisibleVariables(patternList); + PlanningCandidate candidate = chooseCandidate(patternList, configuredIndexes, visibleVariables); + if (candidate == null) { + return PlanningResult.rejected("incompatible-index-order"); + } + + List patternPlans = new ArrayList<>(patternList.size()); + for (int i = 0; i < patternList.size(); i++) { + patternPlans.add(new LmdbLftjPatternPlan(patternList.get(i), candidate.indexNames.get(i))); + } + + return PlanningResult.planned(new LmdbLftjPlan(fallbackExpr, fallbackExpr.getBindingNames(), + fallbackExpr.getAssuredBindingNames(), candidate.variableOrder, patternPlans)); + } + + private boolean hasRepeatedVariable(StatementPattern pattern) { + Set names = new HashSet<>(); + for (Var var : pattern.getVarList()) { + if (var == null || var.hasValue() || var.isAnonymous()) { + continue; + } + if (!names.add(var.getName())) { + return true; + } + } + return false; + } + + private boolean isCyclic(List patterns) { + Map> variableToPatterns = new java.util.LinkedHashMap<>(); + for (int i = 0; i < patterns.size(); i++) { + Set patternVariables = new LinkedHashSet<>(); + for (Var var : patterns.get(i).getVarList()) { + if (var != null && !var.hasValue() && !var.isAnonymous() && var.getName() != null) { + patternVariables.add(var.getName()); + } + } + String patternNode = patternNode(i); + for (String variable : patternVariables) { + variableToPatterns.computeIfAbsent(variable, key -> new ArrayList<>()).add(patternNode); + } + } + + Map> adjacency = new java.util.LinkedHashMap<>(); + for (Map.Entry> entry : variableToPatterns.entrySet()) { + if (entry.getValue().size() < 2) { + continue; + } + String variableNode = variableNode(entry.getKey()); + for (String patternNode : entry.getValue()) { + adjacency.computeIfAbsent(patternNode, key -> new LinkedHashSet<>()).add(variableNode); + adjacency.computeIfAbsent(variableNode, key -> new LinkedHashSet<>()).add(patternNode); + } + } + + Set visited = new HashSet<>(); + for (String node : adjacency.keySet()) { + if (visited.add(node) && hasCycle(node, null, adjacency, visited)) { + return true; + } + } + return false; + } + + private boolean hasCycle(String node, String parent, Map> adjacency, Set visited) { + for (String neighbor : adjacency.getOrDefault(node, Set.of())) { + if (neighbor.equals(parent)) { + continue; + } + if (!visited.add(neighbor)) { + return true; + } + if (hasCycle(neighbor, node, adjacency, visited)) { + return true; + } + } + return false; + } + + private String patternNode(int index) { + return "p:" + index; + } + + private String variableNode(String name) { + return "v:" + name; + } + + private List collectVisibleVariables(List patterns) { + LinkedHashSet names = new LinkedHashSet<>(); + for (StatementPattern pattern : patterns) { + for (Var var : pattern.getVarList()) { + if (var != null && !var.hasValue() && !var.isAnonymous() && var.getName() != null) { + names.add(var.getName()); + } + } + } + return List.copyOf(names); + } + + private PlanningCandidate chooseCandidate(List patterns, Set configuredIndexes, + List visibleVariables) { + List indexes = configuredIndexes.stream().sorted().toList(); + if (visibleVariables.size() <= 8) { + List current = new ArrayList<>(visibleVariables.size()); + List remaining = new ArrayList<>(visibleVariables); + return permute(patterns, indexes, current, remaining, null); + } + + List greedyOrder = greedyVariableOrder(patterns, visibleVariables); + return evaluateCandidate(patterns, indexes, greedyOrder); + } + + private PlanningCandidate permute(List patterns, List indexes, List current, + List remaining, PlanningCandidate best) { + if (remaining.isEmpty()) { + PlanningCandidate candidate = evaluateCandidate(patterns, indexes, current); + return pickBetter(best, candidate); + } + + for (int i = 0; i < remaining.size(); i++) { + String next = remaining.remove(i); + current.add(next); + best = permute(patterns, indexes, current, remaining, best); + current.remove(current.size() - 1); + remaining.add(i, next); + } + return best; + } + + private List greedyVariableOrder(List patterns, List visibleVariables) { + Map occurrences = visibleVariables.stream() + .collect(java.util.stream.Collectors.toMap(name -> name, name -> patterns.stream() + .filter(pattern -> pattern.getVarList() + .stream() + .anyMatch(var -> var != null && !var.hasValue() && !var.isAnonymous() + && name.equals(var.getName()))) + .count())); + List ordered = new ArrayList<>(visibleVariables); + ordered.sort(Comparator.comparing(occurrences::get) + .reversed() + .thenComparing(Comparator.naturalOrder())); + return ordered; + } + + private PlanningCandidate evaluateCandidate(List patterns, List indexes, + List variableOrder) { + List indexNames = new ArrayList<>(patterns.size()); + int score = 0; + for (StatementPattern pattern : patterns) { + IndexChoice choice = chooseIndex(pattern, indexes, variableOrder); + if (choice == null) { + return null; + } + indexNames.add(choice.indexName); + score += choice.score; + } + return new PlanningCandidate(List.copyOf(variableOrder), List.copyOf(indexNames), score); + } + + private PlanningCandidate pickBetter(PlanningCandidate left, PlanningCandidate right) { + if (left == null) { + return right; + } + if (right == null) { + return left; + } + if (right.score != left.score) { + return right.score > left.score ? right : left; + } + int orderCompare = compareLists(right.variableOrder, left.variableOrder); + if (orderCompare != 0) { + return orderCompare < 0 ? right : left; + } + int indexCompare = compareLists(right.indexNames, left.indexNames); + return indexCompare < 0 ? right : left; + } + + private int compareLists(List left, List right) { + int sizeCompare = Integer.compare(left.size(), right.size()); + if (sizeCompare != 0) { + return sizeCompare; + } + for (int i = 0; i < left.size(); i++) { + int compare = left.get(i).compareTo(right.get(i)); + if (compare != 0) { + return compare; + } + } + return 0; + } + + private IndexChoice chooseIndex(StatementPattern pattern, List indexes, List variableOrder) { + IndexChoice best = null; + for (String indexName : indexes) { + IndexChoice candidate = compatible(pattern, indexName, variableOrder); + if (candidate == null) { + continue; + } + if (best == null || candidate.score > best.score + || (candidate.score == best.score && candidate.indexName.compareTo(best.indexName) < 0)) { + best = candidate; + } + } + return best; + } + + private IndexChoice compatible(StatementPattern pattern, String indexName, List variableOrder) { + int lastRank = -1; + boolean hiddenSeen = false; + int score = 0; + for (int i = 0; i < indexName.length(); i++) { + LmdbLftjPatternPlan.TermRef term = term(pattern, indexName.charAt(i)); + if (term.isConstant()) { + score += 8 - i; + continue; + } + if (term.isHidden()) { + hiddenSeen = true; + continue; + } + if (hiddenSeen) { + return null; + } + int rank = variableOrder.indexOf(term.name()); + if (rank < lastRank) { + return null; + } + lastRank = rank; + score += Math.max(1, variableOrder.size() - rank); + } + return new IndexChoice(indexName, score); + } + + private LmdbLftjPatternPlan.TermRef term(StatementPattern pattern, char field) { + switch (field) { + case 's': + return LmdbLftjPatternPlan.TermRef.of(TripleStore.SUBJ_IDX, pattern.getSubjectVar()); + case 'p': + return LmdbLftjPatternPlan.TermRef.of(TripleStore.PRED_IDX, pattern.getPredicateVar()); + case 'o': + return LmdbLftjPatternPlan.TermRef.of(TripleStore.OBJ_IDX, pattern.getObjectVar()); + case 'c': + return LmdbLftjPatternPlan.TermRef.of(TripleStore.CONTEXT_IDX, pattern.getContextVar()); + default: + throw new IllegalArgumentException("Unknown LMDB field: " + field); + } + } + + static final class PlanningResult { + private final LmdbLftjPlan plan; + private final String rejectionReason; + + private PlanningResult(LmdbLftjPlan plan, String rejectionReason) { + this.plan = plan; + this.rejectionReason = rejectionReason; + } + + static PlanningResult planned(LmdbLftjPlan plan) { + return new PlanningResult(plan, null); + } + + static PlanningResult rejected(String rejectionReason) { + return new PlanningResult(null, rejectionReason); + } + + boolean planned() { + return plan != null; + } + + LmdbLftjPlan plan() { + return plan; + } + + String rejectionReason() { + return rejectionReason; + } + } + + private static final class PlanningCandidate { + private final List variableOrder; + private final List indexNames; + private final int score; + + private PlanningCandidate(List variableOrder, List indexNames, int score) { + this.variableOrder = variableOrder; + this.indexNames = indexNames; + this.score = score; + } + } + + private static final class IndexChoice { + private final String indexName; + private final int score; + + private IndexChoice(String indexName, int score) { + this.indexName = indexName; + this.score = score; + } + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTripleSource.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTripleSource.java new file mode 100644 index 0000000000..52039ae841 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTripleSource.java @@ -0,0 +1,67 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.Comparator; +import java.util.Set; + +import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.common.order.StatementOrder; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.query.QueryEvaluationException; +import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource; + +final class LmdbLftjTripleSource implements TripleSource { + + private final TripleSource delegate; + private final LmdbQueryAccess queryAccess; + + LmdbLftjTripleSource(TripleSource delegate, LmdbQueryAccess queryAccess) { + this.delegate = delegate; + this.queryAccess = queryAccess; + } + + LmdbQueryAccess queryAccess() { + return queryAccess; + } + + @Override + public CloseableIteration getStatements(Resource subj, IRI pred, Value obj, + Resource... contexts) throws QueryEvaluationException { + return delegate.getStatements(subj, pred, obj, contexts); + } + + @Override + public CloseableIteration getStatements(StatementOrder order, Resource subj, IRI pred, + Value obj, Resource... contexts) throws QueryEvaluationException { + return delegate.getStatements(order, subj, pred, obj, contexts); + } + + @Override + public Set getSupportedOrders(Resource subj, IRI pred, Value obj, Resource... contexts) { + return delegate.getSupportedOrders(subj, pred, obj, contexts); + } + + @Override + public Comparator getComparator() { + return delegate.getComparator(); + } + + @Override + public ValueFactory getValueFactory() { + return delegate.getValueFactory(); + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTupleExpr.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTupleExpr.java new file mode 100644 index 0000000000..62943bed16 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTupleExpr.java @@ -0,0 +1,80 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.Objects; +import java.util.Set; + +import org.eclipse.rdf4j.query.algebra.AbstractQueryModelNode; +import org.eclipse.rdf4j.query.algebra.QueryModelNode; +import org.eclipse.rdf4j.query.algebra.QueryModelVisitor; +import org.eclipse.rdf4j.query.algebra.TupleExpr; + +final class LmdbLftjTupleExpr extends AbstractQueryModelNode implements TupleExpr { + + private final LmdbLftjPlan plan; + + LmdbLftjTupleExpr(LmdbLftjPlan plan) { + this.plan = plan; + } + + LmdbLftjPlan plan() { + return plan; + } + + @Override + public Set getBindingNames() { + return plan.bindingNames(); + } + + @Override + public Set getAssuredBindingNames() { + return plan.assuredBindingNames(); + } + + @Override + public void visit(QueryModelVisitor visitor) throws X { + visitor.meetOther(this); + } + + @Override + public void visitChildren(QueryModelVisitor visitor) throws X { + // leaf-like node + } + + @Override + public void replaceChildNode(QueryModelNode current, QueryModelNode replacement) { + throw new IllegalArgumentException("Not a child node: " + current); + } + + @Override + public String getSignature() { + return super.getSignature() + "[varOrder=" + String.join(",", plan.variableOrder()) + + "; patterns=" + plan.patternCount() + + "; indexes=" + String.join(",", plan.indexNames()) + "]"; + } + + @Override + public boolean equals(Object other) { + return other instanceof LmdbLftjTupleExpr && Objects.equals(plan, ((LmdbLftjTupleExpr) other).plan); + } + + @Override + public int hashCode() { + return Objects.hash(plan); + } + + @Override + public LmdbLftjTupleExpr clone() { + return new LmdbLftjTupleExpr(plan.copy()); + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java new file mode 100644 index 0000000000..ebefb4d555 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java @@ -0,0 +1,36 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.Set; + +import org.eclipse.rdf4j.model.Value; + +interface LmdbQueryAccess { + + TripleStore tripleStore(); + + TxnManager.Txn acquireReadTxn(); + + void releaseReadTxn(TxnManager.Txn txn); + + long resolveId(Value value); + + Value resolveValue(long id); + + boolean includeInferred(); + + Set configuredIndexes(); + + RecordIterator openScan(TxnManager.Txn txn, String indexName, long subj, long pred, long obj, long context, + boolean explicit); +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java index 2bdff8fc2b..bb459236d6 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java @@ -301,6 +301,14 @@ public SailSource getInferredSailSource() { return new LmdbSailSource(false); } + TripleStore getTripleStore() { + return tripleStore; + } + + ValueStore getValueStore() { + return valueStore; + } + CloseableIteration getContexts() throws IOException { Txn txn = tripleStore.getTxnManager().createReadTxn(); RecordIterator records = tripleStore.getAllTriplesSortedByContext(txn); diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStore.java index 01a62d68b5..a8767fc5fb 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStore.java @@ -168,8 +168,18 @@ public void setDataDir(File dataDir) { * @return Returns the {@link EvaluationStrategy}. */ public synchronized EvaluationStrategyFactory getEvaluationStrategyFactory() { + if (config.isLftjEnabled() && evalStratFactory != null + && !(evalStratFactory instanceof LmdbLftjEvaluationStrategyFactory)) { + throw new IllegalStateException( + "LMDB LFTJ requires " + LmdbLftjEvaluationStrategyFactory.class.getName() + + " when lftjEnabled=true"); + } if (evalStratFactory == null) { - evalStratFactory = new StrictEvaluationStrategyFactory(getFederatedServiceResolver()); + if (config.isLftjEnabled()) { + evalStratFactory = new LmdbLftjEvaluationStrategyFactory(this); + } else { + evalStratFactory = new StrictEvaluationStrategyFactory(getFederatedServiceResolver()); + } } evalStratFactory.setQuerySolutionCacheThreshold(getIterationCacheSyncThreshold()); evalStratFactory.setTrackResultSize(isTrackResultSize()); @@ -407,6 +417,10 @@ LmdbSailStore getBackingStore() { return backingStore; } + LmdbStoreConfig getLmdbStoreConfig() { + return config; + } + private boolean upgradeStore(File dataDir, String version) throws SailException { // nothing to do, just update version number return true; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java index 9c0577e655..064d6f09ae 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java @@ -10,6 +10,9 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.lmdb; +import java.io.IOException; +import java.util.Set; + import org.eclipse.rdf4j.common.concurrent.locks.Lock; import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.common.iteration.IterationWrapper; @@ -21,8 +24,11 @@ import org.eclipse.rdf4j.query.Dataset; import org.eclipse.rdf4j.query.QueryEvaluationException; import org.eclipse.rdf4j.query.algebra.TupleExpr; +import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource; import org.eclipse.rdf4j.sail.SailException; import org.eclipse.rdf4j.sail.SailReadOnlyException; +import org.eclipse.rdf4j.sail.base.SailDataset; +import org.eclipse.rdf4j.sail.base.SailDatasetTripleSource; import org.eclipse.rdf4j.sail.base.SailSourceConnection; import org.eclipse.rdf4j.sail.helpers.DefaultSailChangedEvent; import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue; @@ -134,7 +140,8 @@ protected CloseableIteration evaluateInternal(TupleExpr tu BindingSet bindings, boolean includeInferred) throws SailException { // ensure that all elements of the binding set are initialized (lazy values are resolved) return new IterationWrapper( - super.evaluateInternal(tupleExpr, dataset, bindings, includeInferred)) { + evaluateWithTripleSource(tupleExpr, dataset, bindings, includeInferred, + rdfDataset -> createTripleSource(rdfDataset, dataset, includeInferred))) { @Override public BindingSet next() throws QueryEvaluationException { BindingSet bs = super.next(); @@ -144,6 +151,106 @@ public BindingSet next() throws QueryEvaluationException { }; } + private TripleSource createTripleSource(SailDataset rdfDataset, Dataset dataset, boolean includeInferred) { + TripleSource delegate = new SailDatasetTripleSource(lmdbStore.getValueFactory(), rdfDataset); + if (!isLftjRuntimeSafe(dataset)) { + return delegate; + } + return new LmdbLftjTripleSource(delegate, createQueryAccess(includeInferred)); + } + + private boolean isLftjRuntimeSafe(Dataset dataset) { + return lmdbStore.getLmdbStoreConfig().isLftjEnabled() + && hasStrongLftjIndexCoverage() + && !hasPendingLocalChangesForLftj() + && isDefaultDataset(dataset); + } + + private boolean hasStrongLftjIndexCoverage() { + return LmdbLftjPlanner.hasRequiredIndexCoverage(lmdbStore.getBackingStore() + .getTripleStore() + .getConfiguredIndexSpecs()); + } + + private boolean hasPendingLocalChangesForLftj() { + return sailChangedEvent.statementsAdded() || sailChangedEvent.statementsRemoved(); + } + + private boolean isDefaultDataset(Dataset dataset) { + return dataset == null || (dataset.getDefaultGraphs().isEmpty() + && dataset.getNamedGraphs().isEmpty() + && dataset.getDefaultRemoveGraphs().isEmpty() + && dataset.getDefaultInsertGraph() == null); + } + + private LmdbQueryAccess createQueryAccess(boolean includeInferred) { + LmdbSailStore backingStore = lmdbStore.getBackingStore(); + TripleStore tripleStore = backingStore.getTripleStore(); + ValueStore valueStore = backingStore.getValueStore(); + return new LmdbQueryAccess() { + @Override + public TripleStore tripleStore() { + return tripleStore; + } + + @Override + public TxnManager.Txn acquireReadTxn() { + try { + return tripleStore.getTxnManager().createReadTxn(); + } catch (IOException e) { + throw new SailException(e); + } + } + + @Override + public void releaseReadTxn(TxnManager.Txn txn) { + txn.close(); + } + + @Override + public long resolveId(Value value) { + try { + return valueStore.getId(value); + } catch (IOException e) { + throw new SailException(e); + } + } + + @Override + public Value resolveValue(long id) { + try { + return valueStore.getValue(id); + } catch (IOException e) { + throw new SailException(e); + } + } + + @Override + public boolean includeInferred() { + return includeInferred; + } + + @Override + public Set configuredIndexes() { + try { + return tripleStore.getConfiguredIndexSpecs(); + } catch (SailException e) { + throw e; + } + } + + @Override + public RecordIterator openScan(TxnManager.Txn txn, String indexName, long subj, long pred, long obj, + long context, boolean explicit) { + try { + return tripleStore.getTriples(txn, indexName, subj, pred, obj, context, explicit); + } catch (IOException e) { + throw new SailException(e); + } + } + }; + } + @Override protected CloseableIteration getStatementsInternal(Resource subj, IRI pred, Value obj, diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java new file mode 100644 index 0000000000..3e8ea131d7 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java @@ -0,0 +1,73 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.TreeSet; + +class LmdbTrieCursor implements AutoCloseable { + + private final LmdbLftjPatternPlan patternPlan; + private final String variableName; + private final LmdbQueryAccess queryAccess; + private final boolean explicit; + + protected long[] values = new long[0]; + private int position; + + LmdbTrieCursor(LmdbLftjPatternPlan patternPlan, String variableName, LmdbQueryAccess queryAccess, + boolean explicit) { + this.patternPlan = patternPlan; + this.variableName = variableName; + this.queryAccess = queryAccess; + this.explicit = explicit; + } + + boolean initialize(LmdbLftjBindingState state) { + TreeSet candidates = new TreeSet<>(); + long[] scanKey = patternPlan.scanKey(state); + try (RecordIterator records = queryAccess.openScan(state.txn(), patternPlan.indexName(), scanKey[0], scanKey[1], + scanKey[2], scanKey[3], explicit)) { + long[] quad; + while ((quad = records.next()) != null) { + candidates.add(patternPlan.valueFor(variableName, quad)); + } + } + values = candidates.stream().mapToLong(Long::longValue).toArray(); + position = 0; + return values.length > 0; + } + + boolean seek(long target) { + while (position < values.length && values[position] < target) { + position++; + } + return position < values.length; + } + + boolean next() { + position++; + return position < values.length; + } + + long value() { + return values[position]; + } + + long[] values() { + return values; + } + + @Override + public void close() { + // values already materialized + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java new file mode 100644 index 0000000000..021ea765a0 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java @@ -0,0 +1,54 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +final class LmdbUnionTrieCursor extends LmdbTrieCursor { + + private final LmdbTrieCursor explicitCursor; + private final LmdbTrieCursor inferredCursor; + + LmdbUnionTrieCursor(LmdbLftjPatternPlan patternPlan, String variableName, LmdbQueryAccess queryAccess) { + super(patternPlan, variableName, queryAccess, true); + this.explicitCursor = new LmdbTrieCursor(patternPlan, variableName, queryAccess, true); + this.inferredCursor = new LmdbTrieCursor(patternPlan, variableName, queryAccess, false); + } + + @Override + boolean initialize(LmdbLftjBindingState state) { + boolean explicitAvailable = explicitCursor.initialize(state); + boolean inferredAvailable = inferredCursor.initialize(state); + if (!explicitAvailable && !inferredAvailable) { + values = new long[0]; + return false; + } + + long[] left = explicitAvailable ? explicitCursor.values() : new long[0]; + long[] right = inferredAvailable ? inferredCursor.values() : new long[0]; + long[] merged = new long[left.length + right.length]; + int size = 0; + int leftIndex = 0; + int rightIndex = 0; + while (leftIndex < left.length || rightIndex < right.length) { + long next; + if (rightIndex >= right.length || (leftIndex < left.length && left[leftIndex] <= right[rightIndex])) { + next = left[leftIndex++]; + } else { + next = right[rightIndex++]; + } + if (size == 0 || merged[size - 1] != next) { + merged[size++] = next; + } + } + values = java.util.Arrays.copyOf(merged, size); + return size > 0; + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java index dbe4b2fcd0..41a0020534 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java @@ -320,6 +320,10 @@ TxnManager getTxnManager() { return txnManager; } + Set getConfiguredIndexSpecs() throws SailException { + return new HashSet<>(getIndexSpecs()); + } + /** * Parses a comma/whitespace-separated list of index specifications. Index specifications are required to consists * of 4 characters: 's', 'p', 'o' and 'c'. @@ -523,6 +527,13 @@ public RecordIterator getTriples(Txn txn, long subj, long pred, long obj, long c return getTriplesUsingIndex(txn, subj, pred, obj, context, explicit, index, doRangeSearch); } + RecordIterator getTriples(Txn txn, String indexName, long subj, long pred, long obj, long context, boolean explicit) + throws IOException { + TripleIndex index = getIndex(indexName); + boolean doRangeSearch = index.getPatternScore(subj, pred, obj, context) > 0; + return getTriplesUsingIndex(txn, subj, pred, obj, context, explicit, index, doRangeSearch); + } + boolean hasTriples(boolean explicit) throws IOException { TripleIndex mainIndex = indexes.get(0); return txnManager.doWith((stack, txn) -> { @@ -537,6 +548,15 @@ private RecordIterator getTriplesUsingIndex(Txn txn, long subj, long pred, long return new LmdbRecordIterator(index, rangeSearch, subj, pred, obj, context, explicit, txn); } + private TripleIndex getIndex(String indexName) { + for (TripleIndex index : indexes) { + if (index.toString().equals(indexName)) { + return index; + } + } + throw new IllegalArgumentException("Unknown LMDB index: " + indexName); + } + /** * Computes start key for a bucket by linear interpolation between a lower and an upper bound. * diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfig.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfig.java index d7af70f2f4..4eab8483fc 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfig.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfig.java @@ -78,6 +78,8 @@ public class LmdbStoreConfig extends BaseSailConfig { private boolean pageCardinalityEstimator = true; + private boolean lftjEnabled = true; + private long valueEvictionInterval = Duration.ofSeconds(60).toMillis(); /*--------------* @@ -213,6 +215,15 @@ public LmdbStoreConfig setPageCardinalityEstimator(boolean pageCardinalityEstima return this; } + public boolean isLftjEnabled() { + return lftjEnabled; + } + + public LmdbStoreConfig setLftjEnabled(boolean lftjEnabled) { + this.lftjEnabled = lftjEnabled; + return this; + } + @Override public Resource export(Model m) { Resource implNode = super.export(m); @@ -252,6 +263,9 @@ public Resource export(Model m) { if (!pageCardinalityEstimator) { m.add(implNode, LmdbStoreSchema.PAGE_CARDINALITY_ESTIMATOR, vf.createLiteral(false)); } + if (!lftjEnabled) { + m.add(implNode, LmdbStoreSchema.LFTJ_ENABLED, vf.createLiteral(false)); + } if (valueEvictionInterval != Duration.ofSeconds(60).toMillis()) { m.add(implNode, LmdbStoreSchema.VALUE_EVICTION_INTERVAL, vf.createLiteral(valueEvictionInterval)); } @@ -369,6 +383,16 @@ public void parse(Model m, Resource implNode) throws SailConfigException { } }); + Models.objectLiteral(m.getStatements(implNode, LmdbStoreSchema.LFTJ_ENABLED, null)).ifPresent(lit -> { + try { + setLftjEnabled(lit.booleanValue()); + } catch (IllegalArgumentException e) { + throw new SailConfigException( + "Boolean value required for " + LmdbStoreSchema.LFTJ_ENABLED + " property, found " + + lit); + } + }); + Models.objectLiteral(m.getStatements(implNode, LmdbStoreSchema.VALUE_EVICTION_INTERVAL, null)) .ifPresent(lit -> { try { diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreSchema.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreSchema.java index 75a64db9ea..41afe20500 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreSchema.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreSchema.java @@ -82,6 +82,11 @@ public class LmdbStoreSchema { */ public final static IRI PAGE_CARDINALITY_ESTIMATOR; + /** + * http://rdf4j.org/config/sail/lmdb#lftjEnabled + */ + public final static IRI LFTJ_ENABLED; + /** * http://rdf4j.org/config/sail/lmdb#valueEvictionInterval */ @@ -100,6 +105,7 @@ public class LmdbStoreSchema { NAMESPACE_ID_CACHE_SIZE = factory.createIRI(NAMESPACE, "namespaceIDCacheSize"); AUTO_GROW = factory.createIRI(NAMESPACE, "autoGrow"); PAGE_CARDINALITY_ESTIMATOR = factory.createIRI(NAMESPACE, "pageCardinalityEstimator"); + LFTJ_ENABLED = factory.createIRI(NAMESPACE, "lftjEnabled"); VALUE_EVICTION_INTERVAL = factory.createIRI(NAMESPACE, "valueEvictionInterval"); } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java index d8d94dd664..2bd2c37317 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java @@ -15,6 +15,9 @@ import static org.junit.Assert.assertTrue; import java.io.File; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Consumer; import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.common.iteration.EmptyIteration; @@ -36,6 +39,7 @@ import org.eclipse.rdf4j.sail.base.SailDataset; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -255,6 +259,166 @@ public void testExplainExecutedHidesEstimateStabilityStats() { } } + @Test + public void testExplainOptimizedDoesNotUseLftjWithDefaultIndexes(@TempDir File dataDir) { + Repository repository = createRepository(dataDir, new LmdbStoreConfig("spoc,posc"), conn -> { + }); + + try (RepositoryConnection conn = repository.getConnection()) { + String actual = conn.prepareTupleQuery(cyclicQuery()) + .explain(Explanation.Level.Optimized) + .toString(); + + assertFalse(actual, actual.contains("LmdbLftjTupleExpr")); + } finally { + repository.shutDown(); + } + } + + @Test + public void testExplainOptimizedDoesNotUseLftjForAcyclicQueryWithStrongIndexes(@TempDir File dataDir) { + LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); + Repository repository = createRepository(dataDir, config, conn -> { + }); + + try (RepositoryConnection conn = repository.getConnection()) { + String actual = conn.prepareTupleQuery(chainQuery()) + .explain(Explanation.Level.Optimized) + .toString(); + + assertFalse(actual, actual.contains("LmdbLftjTupleExpr")); + } finally { + repository.shutDown(); + } + } + + @Test + public void testExplainOptimizedDoesNotUseLftjWithoutFullStrongIndexCoverage(@TempDir File dataDir) { + LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc"); + Repository repository = createRepository(dataDir, config, conn -> { + }); + + try (RepositoryConnection conn = repository.getConnection()) { + String actual = conn.prepareTupleQuery(cyclicQuery()) + .explain(Explanation.Level.Optimized) + .toString(); + + assertFalse(actual, actual.contains("LmdbLftjTupleExpr")); + } finally { + repository.shutDown(); + } + } + + @Test + public void testExplainOptimizedUsesLftjForCyclicQueryWithStrongIndexes(@TempDir File dataDir) { + LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); + Repository repository = createRepository(dataDir, config, conn -> { + }); + + try (RepositoryConnection enabledConnection = repository.getConnection()) { + String actualPlan = enabledConnection.prepareTupleQuery(cyclicQuery()) + .explain(Explanation.Level.Optimized) + .toString(); + assertTrue(actualPlan, actualPlan.contains("LmdbLftjTupleExpr")); + } finally { + repository.shutDown(); + } + } + + @Test + public void testCyclicQueryMatchesResultsWhenLftjActivates(@TempDir File disabledDir, @TempDir File enabledDir) { + LmdbStoreConfig disabled = new LmdbStoreConfig("spoc,posc"); + LmdbStoreConfig enabled = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); + + Repository disabledRepository = createRepository(disabledDir, disabled, this::seedCyclicData); + Repository enabledRepository = createRepository(enabledDir, enabled, this::seedCyclicData); + String query = cyclicQuery(); + + try (RepositoryConnection enabledConnection = enabledRepository.getConnection()) { + String actualPlan = enabledConnection.prepareTupleQuery(query) + .explain(Explanation.Level.Optimized) + .toString(); + assertTrue(actualPlan, actualPlan.contains("LmdbLftjTupleExpr")); + } + + try { + assertEquals(evaluate(disabledRepository, query), evaluate(enabledRepository, query)); + } finally { + disabledRepository.shutDown(); + enabledRepository.shutDown(); + } + } + + @Test + public void testRejectsCustomEvaluationStrategyFactoryWhenLftjEnabled() { + LmdbStoreConfig config = new LmdbStoreConfig(); + LmdbStore store = new LmdbStore(config); + store.setEvaluationStrategyFactory( + new org.eclipse.rdf4j.query.algebra.evaluation.impl.StrictEvaluationStrategyFactory()); + + Assertions.assertThrows(IllegalStateException.class, store::getEvaluationStrategyFactory); + } + + private Repository createRepository(File dataDir, LmdbStoreConfig config, Consumer seed) { + Repository repository = new SailRepository(new LmdbStore(dataDir, config)); + repository.init(); + try (RepositoryConnection connection = repository.getConnection()) { + seed.accept(connection); + } + return repository; + } + + private String chainQuery() { + return """ + SELECT * WHERE { + ?a ?b . + ?b ?c . + ?c ?d . + } + """; + } + + private String cyclicQuery() { + return """ + SELECT ?a ?b ?c WHERE { + ?a ?b . + ?b ?c . + ?c ?a . + } + """; + } + + private void seedCyclicData(RepositoryConnection connection) { + IRI a1 = F.createIRI("urn:a1"); + IRI a2 = F.createIRI("urn:a2"); + IRI b1 = F.createIRI("urn:b1"); + IRI b2 = F.createIRI("urn:b2"); + IRI c1 = F.createIRI("urn:c1"); + IRI c2 = F.createIRI("urn:c2"); + + connection.add(a1, F.createIRI("urn:p1"), b1); + connection.add(a2, F.createIRI("urn:p1"), b2); + connection.add(a1, F.createIRI("urn:p1"), b2); + connection.add(b1, F.createIRI("urn:p2"), c1); + connection.add(b2, F.createIRI("urn:p2"), c2); + connection.add(c1, F.createIRI("urn:p3"), a1); + connection.add(c2, F.createIRI("urn:p3"), a2); + } + + private List evaluate(Repository repository, String query) { + List rows = new ArrayList<>(); + try (RepositoryConnection connection = repository.getConnection(); + TupleQueryResult result = connection.prepareTupleQuery(query).evaluate()) { + while (result.hasNext()) { + var bindingSet = result.next(); + rows.add(bindingSet.getValue("a").stringValue() + "|" + bindingSet.getValue("b").stringValue() + "|" + + bindingSet.getValue("c").stringValue()); + } + } + rows.sort(String::compareTo); + return rows; + } + @AfterEach public void after() { repo.shutDown(); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfigTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfigTest.java index 18133dc948..1efdc989c9 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfigTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfigTest.java @@ -34,6 +34,8 @@ class LmdbStoreConfigTest { private static final IRI LEGACY_APPEND_MODE = Values.iri(LmdbStoreSchema.NAMESPACE + "appendMode"); + private static final IRI LFTJ_ENABLED = Values.iri(LmdbStoreSchema.NAMESPACE + "lftjEnabled"); + private static final IRI NO_READAHEAD = Values.iri(LmdbStoreSchema.NAMESPACE + "noReadahead"); @Test @@ -46,6 +48,11 @@ void noReadaheadDefaultsToDisabled() { assertThat(invokeBooleanGetter(new LmdbStoreConfig(), "getNoReadahead")).isFalse(); } + @Test + void lftjEnabledDefaultsToEnabled() { + assertThat(invokeBooleanGetter(new LmdbStoreConfig(), "isLftjEnabled")).isTrue(); + } + @ParameterizedTest @ValueSource(booleans = { true, false }) void testThatLmdbStoreConfigParseAndExportNoReadahead(final boolean noReadahead) { @@ -70,6 +77,18 @@ void testThatLmdbStoreConfigParseAndExportPageCardinalityEstimator(final boolean ); } + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void testThatLmdbStoreConfigParseAndExportLftjEnabled(final boolean lftjEnabled) { + testParseAndExportReflective( + LFTJ_ENABLED, + Values.literal(lftjEnabled), + "isLftjEnabled", + lftjEnabled, + !lftjEnabled + ); + } + @ParameterizedTest @ValueSource(longs = { 1, 205454, 0, -1231 }) void testThatLmdbStoreConfigParseAndExportValueEvictionInterval(final long valueEvictionInterval) { From 67f5e267c594f6712cebf097e59169dfbe65a41c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 11:24:57 +0200 Subject: [PATCH 03/32] start benchmarking --- .../sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java | 7 +++---- .../lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md | 8 ++++++++ 2 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java index 52db260637..4d3bd051a5 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java @@ -39,10 +39,10 @@ import org.openjdk.jmh.runner.options.OptionsBuilder; @State(Scope.Benchmark) -@Warmup(iterations = 2) +@Warmup(iterations = 2, time = 1, timeUnit = TimeUnit.SECONDS) @BenchmarkMode(Mode.AverageTime) @Fork(value = 1, jvmArgs = { "-Xms2G", "-Xmx2G", "-XX:+UseG1GC" }) -@Measurement(iterations = 3) +@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) @OutputTimeUnit(TimeUnit.MILLISECONDS) public class FoafCliqueQueryBenchmark { @@ -53,7 +53,7 @@ public class FoafCliqueQueryBenchmark { @Param({ "5000" }) public int peopleCount; - @Param({ "15" }) + @Param({ "30" }) public int cliquePercentage; @Param({ "3" }) @@ -153,7 +153,6 @@ private static String cycleQuery(int size) { } builder.append(")\n"); builder.append("}\n"); - builder.append("LIMIT 10\n"); return builder.toString(); } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md new file mode 100644 index 0000000000..ed2045bd4e --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md @@ -0,0 +1,8 @@ +# Develop branch +```text +Benchmark (cliquePercentage) (maxCliqueSize) (minCliqueSize) (peopleCount) (randomKnowsEdges) (seed) Mode Cnt Score Error Units +FoafCliqueQueryBenchmark.cycle3 30 8 3 5000 15000 12345 avgt 3 92.508 ± 22.625 ms/op +FoafCliqueQueryBenchmark.cycle4 30 8 3 5000 15000 12345 avgt 3 644.258 ± 310.206 ms/op +FoafCliqueQueryBenchmark.cycle5 30 8 3 5000 15000 12345 avgt 3 3891.994 ± 1215.676 ms/op +``` + From 9191827a27bb3c71f448f0b15bec5b29ecd95595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 11:53:38 +0200 Subject: [PATCH 04/32] start benchmarking --- .../benchmark/FoafCliqueQueryBenchmark.java | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java index 4d3bd051a5..b32eef0622 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java @@ -39,10 +39,10 @@ import org.openjdk.jmh.runner.options.OptionsBuilder; @State(Scope.Benchmark) -@Warmup(iterations = 2, time = 1, timeUnit = TimeUnit.SECONDS) +@Warmup(iterations = 2, time = 1, timeUnit = TimeUnit.SECONDS) @BenchmarkMode(Mode.AverageTime) @Fork(value = 1, jvmArgs = { "-Xms2G", "-Xmx2G", "-XX:+UseG1GC" }) -@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) @OutputTimeUnit(TimeUnit.MILLISECONDS) public class FoafCliqueQueryBenchmark { @@ -68,6 +68,9 @@ public class FoafCliqueQueryBenchmark { @Param({ "12345" }) public long seed; + @Param({ "true", "false" }) + public boolean lftjEnabled; + private File dataDir; private SailRepository repository; @@ -81,7 +84,7 @@ public static void main(String[] args) throws RunnerException { @Setup(Level.Trial) public void setup() throws IOException { dataDir = Files.createTempDirectory("rdf4j-lmdb-foaf-cliques").toFile(); - repository = new SailRepository(new LmdbStore(dataDir, createLftjBenchmarkConfig())); + repository = new SailRepository(new LmdbStore(dataDir, createLftjBenchmarkConfig(lftjEnabled))); repository.init(); try (SailRepositoryConnection connection = repository.getConnection()) { @@ -105,15 +108,15 @@ public long cycle3() { return executeCount(QUERY_CYCLE_3); } - @Benchmark - public long cycle4() { - return executeCount(QUERY_CYCLE_4); - } - - @Benchmark - public long cycle5() { - return executeCount(QUERY_CYCLE_5); - } +// @Benchmark +// public long cycle4() { +// return executeCount(QUERY_CYCLE_4); +// } +// +// @Benchmark +// public long cycle5() { +// return executeCount(QUERY_CYCLE_5); +// } private long executeCount(String query) { try (SailRepositoryConnection connection = repository.getConnection()) { @@ -121,8 +124,9 @@ private long executeCount(String query) { } } - private static LmdbStoreConfig createLftjBenchmarkConfig() { + private static LmdbStoreConfig createLftjBenchmarkConfig(boolean lftjEnabled) { LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); + config.setLftjEnabled(lftjEnabled); config.setForceSync(false); config.setValueDBSize(1_073_741_824L); config.setTripleDBSize(config.getValueDBSize()); From 82b55cbcbd1fe3c3f29041e6059174a15dc18260 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 11:58:55 +0200 Subject: [PATCH 05/32] add a skill for writing high performance java code --- .codex/skills/high-performance-java/SKILL.md | 119 ++++++++++++++++++ .../high-performance-java/agents/openai.yaml | 4 + .../references/coding-rules.md | 65 ++++++++++ .../references/evidence-workflow.md | 64 ++++++++++ .../references/jdk-21-26-notes.md | 42 +++++++ 5 files changed, 294 insertions(+) create mode 100644 .codex/skills/high-performance-java/SKILL.md create mode 100644 .codex/skills/high-performance-java/agents/openai.yaml create mode 100644 .codex/skills/high-performance-java/references/coding-rules.md create mode 100644 .codex/skills/high-performance-java/references/evidence-workflow.md create mode 100644 .codex/skills/high-performance-java/references/jdk-21-26-notes.md diff --git a/.codex/skills/high-performance-java/SKILL.md b/.codex/skills/high-performance-java/SKILL.md new file mode 100644 index 0000000000..501990d804 --- /dev/null +++ b/.codex/skills/high-performance-java/SKILL.md @@ -0,0 +1,119 @@ +--- +name: high-performance-java +description: Use when writing, reviewing, or reshaping HotSpot Java where throughput, latency, allocation rate, zero-copy, lazy evaluation, non-materialization, intrinsics, SuperWord auto-vectorization, or C2 assembly matter. Bias toward specialized hot-path code, then ground claims in benchmarks and JIT evidence. +--- + +# High-Performance Java + +Use this skill for Java hot paths. Default bias: fewer allocations, fewer copies, less polymorphism, narrower code shape, stronger evidence. + +HotSpot-only v1. Baseline assumptions: +- repo baseline: JDK 21 +- current local runtime may be newer +- low-level claims stay provisional until benchmark + JIT evidence agree + +## Core loop + +1. Identify the workload shape. +2. Find the hot loop or hot call chain. +3. Write the narrow fast path first. +4. Push generic abstraction, materialization, and dispatch out of the loop. +5. Benchmark before claiming improvement. +6. Inspect HotSpot decisions before claiming JVM-level reasons. + +## Default coding bias + +- Prefer zero-copy over copy-transform-copy. +- Prefer reuse over per-item allocation. +- Prefer lazy traversal over full materialization. +- Prefer primitives, flat arrays, and tight counted loops in hot paths. +- Prefer monomorphic calls that inline away. +- Prefer specialized lambda/adaptor code for the active workload. +- Prefer one fast path plus one cold fallback over a single generalized hot path. + +## Hard rules + +- Do not defend a perf change with style arguments alone. +- Do not claim “faster” without a measurement path. +- Do not claim “JIT will optimize this” without checking inlining / compilation evidence. +- Do not keep elegant-but-generic stream pipelines in verified hot loops. +- Do not pay interface / visitor / wrapper overhead inside the hottest loop unless evidence shows it disappears. + +## Design checklist + +Ask these first: +- What allocates on the steady-state path? +- What copies bytes, chars, arrays, or collections? +- What materializes intermediate state that could stay streamed or cursor-based? +- What dispatch stays virtual or megamorphic in the inner loop? +- What loop shape blocks scalar replacement, inlining, or SuperWord vectorization? +- What “generic” branch handles cases the active workload never uses? + +## Workflow + +### 1) Shape the code for HotSpot + +- Split hot and cold paths. +- Hoist invariant checks and decoding outside the loop. +- Replace generic callback stacks with narrow-path adapters. +- Reuse mutable carriers only when ownership is clear. +- Keep loop bodies predictable, contiguous, and exception-light. + +Detailed rules: see [references/coding-rules.md](references/coding-rules.md). + +### 2) Measure + +If you are in this RDF4J repo, use the local benchmark wrapper first: + +```bash +scripts/run-single-benchmark.sh --module --class --method +``` + +If you are outside RDF4J, use JMH or an existing reproducible micro/macro benchmark. + +Measurement workflow: see [references/evidence-workflow.md](references/evidence-workflow.md). + +### 3) Explain with JVM evidence + +When a benchmark moves, inspect what HotSpot actually did: +- compilation tier +- inlining success/failure +- intrinsic usage when relevant +- allocation pressure +- assembly / C2 logs when needed + +Use sibling skill [hotspot-jit-forensics](../hotspot-jit-forensics/SKILL.md) for method-scoped C2 evidence. Use `async-profiler-java-macos` when wall/cpu/alloc evidence is needed on macOS. + +### 4) Report honestly + +Frame conclusions as: +- hypothesis +- benchmark result +- JIT/profile evidence +- confidence + +If assembly is unavailable, say so and fall back to compilation logs, inlining diagnostics, and profile data. + +## Trigger examples + +Use this skill when the user asks to: +- remove allocation pressure from a parser, iterator, encoder, decoder, or query loop +- make a Java path zero-copy or lazy +- specialize code for one workload instead of many +- explain whether a HotSpot optimization actually happened +- ground a Java perf change in benchmark + C2 evidence + +## Reference map + +- Coding rules: [references/coding-rules.md](references/coding-rules.md) +- Evidence workflow: [references/evidence-workflow.md](references/evidence-workflow.md) +- JDK version guardrails: [references/jdk-21-26-notes.md](references/jdk-21-26-notes.md) + +## Output contract + +When you use this skill, the answer should usually include: +- hot-path hypothesis +- concrete code-shape recommendation +- benchmark command or benchmark evidence +- JIT/profile evidence or the missing prerequisite +- a confidence statement tied to the active JDK diff --git a/.codex/skills/high-performance-java/agents/openai.yaml b/.codex/skills/high-performance-java/agents/openai.yaml new file mode 100644 index 0000000000..1cea978b4d --- /dev/null +++ b/.codex/skills/high-performance-java/agents/openai.yaml @@ -0,0 +1,4 @@ +interface: + display_name: "High-Performance Java" + short_description: "Concise hot-path Java coding skill" + default_prompt: "Use $high-performance-java to write or review a Java hot path with benchmark and HotSpot evidence." diff --git a/.codex/skills/high-performance-java/references/coding-rules.md b/.codex/skills/high-performance-java/references/coding-rules.md new file mode 100644 index 0000000000..acfb3434c3 --- /dev/null +++ b/.codex/skills/high-performance-java/references/coding-rules.md @@ -0,0 +1,65 @@ +# Coding Rules + +Use these rules only for real or suspected hot paths. Outside hot code, keep the code simple. + +## Zero-copy rules + +- Pass slices, offsets, lengths, cursors, or views instead of copying into new arrays/strings/collections. +- Decode or parse directly from the source buffer when ownership and lifetime allow it. +- Delay conversion to `String`, boxed numbers, or collection objects until a boundary that actually needs them. +- Prefer bulk operations that map to JDK intrinsics when they replace manual copy/compare loops. + +## Reuse rules + +- Reuse mutable carriers, builders, encoders, decoders, and scratch arrays when one owner controls lifetime. +- Reinitialize reusable state cheaply; do not reconstruct deep object graphs inside the loop. +- Avoid thread-local caches unless the access pattern is proven hot and safe. +- Do not reuse objects across boundaries where aliasing or stale-state bugs become likely. + +## Lazy and non-materializing rules + +- Stream results directly to the consumer when the consumer can handle incremental delivery. +- Prefer iterators/cursors/sinks over `collect then filter/map`. +- Keep intermediate state as indices, spans, or primitive accumulators instead of wrapper objects. +- Materialize once at a boundary; not at each transformation stage. + +## Dispatch and inlining rules + +- Prefer `static`, `private`, or effectively final call targets on the inner path. +- Keep call sites monomorphic when possible; push interface selection above the hot loop. +- Split fast path from generic path when one workload dominates. +- Flatten tiny wrapper/helper layers when they prevent clear inlining. +- Treat interface-heavy visitor chains and generic function stacks as suspects until proven free by evidence. + +## Intrinsic and vectorization rules + +- Prefer primitive arrays and contiguous memory access. +- Write simple counted loops with hoisted bounds and invariant checks. +- Avoid hidden aliasing, side exits, and exception-heavy bodies in vectorizable loops. +- Prefer JDK library methods that HotSpot commonly treats specially over open-coded copies/comparisons/hashes when semantics match. +- Verify vectorization and intrinsic assumptions on the active JDK; do not assume cross-version stability. + +## Lambda specialization rules + +- Generate or choose workload-specific lambdas/adapters when the hot path only needs one shape. +- Prebind constants and remove unused branches from the inner callback. +- Avoid polymorphic chains of `Function` / `Predicate` / `Consumer` in hot loops when a direct method or specialized adapter will do. +- Prefer one specialized lambda per workload over one generalized lambda with internal branching. + +## Anti-patterns + +| Anti-pattern | Hot-path cost | Prefer | +| --- | --- | --- | +| Streams in verified hot loops | allocation, boxing, dispatch | direct counted loop | +| Generic visitor/callback towers | missed inline, megamorphism | split fast path + cold fallback | +| Temporary wrappers per item | allocation pressure | primitive fields or reusable carrier | +| Defensive copies on steady-state path | bandwidth + GC | views/slices/ownership checks | +| Materialize then filter/map | memory + latency | lazy cursor/sink pipeline | +| Repeated decode/encode boundary crossings | redundant work | keep native form longer | +| One abstraction for all workloads | branchy hot path | specialized narrow path | + +## Decision rule + +If a change makes the code uglier but removes copies, allocations, or polymorphism from a measured hot path, it can be worth it. + +If the path is not hot, do not apply these rules aggressively. diff --git a/.codex/skills/high-performance-java/references/evidence-workflow.md b/.codex/skills/high-performance-java/references/evidence-workflow.md new file mode 100644 index 0000000000..cb04b3726f --- /dev/null +++ b/.codex/skills/high-performance-java/references/evidence-workflow.md @@ -0,0 +1,64 @@ +# Evidence Workflow + +Use this workflow before making strong performance claims. + +## RDF4J path + +1. Reproduce with the local benchmark wrapper. + +```bash +scripts/run-single-benchmark.sh --module --class --method +``` + +2. If the benchmark moves but cause is unclear: + - use `--enable-jfr` for benchmark-side JFR capture + - or use `async-profiler-java-macos` for cpu / alloc / wall evidence on macOS +3. If code shape or JIT behavior is the question: + - use [hotspot-jit-forensics](../hotspot-jit-forensics/SKILL.md) + - capture compilation tier, inlining decisions, and method-scoped C2 evidence + +## Generic Java path + +1. Build the smallest reproducible JMH or app-level benchmark. +2. Capture baseline result. +3. Change code shape. +4. Capture candidate result with same JVM, flags, input size, and warmup assumptions. +5. If the delta matters, inspect JIT evidence: + +```bash +java \ + -XX:+UnlockDiagnosticVMOptions \ + -XX:+LogCompilation \ + -XX:LogFile=jit.xml \ + -XX:+PrintCompilation \ + -jar app.jar +``` + +If assembly or per-method diagnostics are needed, move to focused compiler directives and the `hotspot-jit-forensics` workflow. + +## Output contract + +Report these five items: +- benchmark delta: throughput/latency before vs after +- allocation delta: lower / unchanged / unknown +- JIT evidence: inline success/failure, tier, bailout, intrinsic, vectorization clue, or “not inspected” +- exact command or benchmark selector +- confidence: high / medium / low + +## Confidence rules + +- High: repeatable benchmark delta plus matching profile/JIT evidence +- Medium: repeatable benchmark delta without definitive low-level proof +- Low: one run, noisy run, or JVM explanation not verified + +## Fallback when assembly is unavailable + +Do not stop at “assembly unavailable”. + +Still collect: +- `jit.xml` +- compiler directives output +- `PrintCompilation` / inlining diagnostics +- async-profiler or JFR evidence + +Then say the exact missing piece: for example `hsdis` not installed or assembly printing not enabled. diff --git a/.codex/skills/high-performance-java/references/jdk-21-26-notes.md b/.codex/skills/high-performance-java/references/jdk-21-26-notes.md new file mode 100644 index 0000000000..edc743f91e --- /dev/null +++ b/.codex/skills/high-performance-java/references/jdk-21-26-notes.md @@ -0,0 +1,42 @@ +# JDK 21 to 26 Notes + +Treat JDK behavior as version-sensitive. + +## Defaults + +- Repository baseline: JDK 21 +- Current local runtime may be newer; in this workspace it is JDK 26 +- Advice about inlining, intrinsics, vectorization, and loop optimizations must be checked on the active runtime + +## What stays stable enough + +- Fewer allocations usually helps +- Fewer copies usually helps +- Monomorphic hot calls are easier to inline than megamorphic ones +- Primitive, contiguous loop shapes are friendlier to optimization than object-heavy callback stacks + +## What must be verified + +- Whether a specific JDK method lowers to an intrinsic on this runtime +- Whether SuperWord or related loop optimizations fire for this loop shape +- Whether a call chain fully inlines on this runtime +- Whether scalar replacement / escape analysis removes the expected allocation +- Whether benchmark results carry across JDK 21 and JDK 26 + +## Reporting rule + +When giving low-level JVM explanations, say which JDK you are talking about. + +Good: +- `On JDK 26 this loop appears to inline fully and the benchmark improves by 12%.` +- `On the JDK 21 baseline, verify the same claim before treating it as settled.` + +Bad: +- `HotSpot will optimize this.` +- `The JVM should vectorize it.` + +## Upgrade rule + +If a change is intended for the repo baseline, prefer evidence on JDK 21. + +If only a newer runtime is available locally, say that clearly and lower confidence until the baseline JVM is checked. From 03b9e6d21447f1a79d9d02ab45862cb4e9d51517 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 11:59:28 +0200 Subject: [PATCH 06/32] continue optimizing lftj --- .../rdf4j/sail/lmdb/LmdbLftjExecutor.java | 312 +++++++++++++----- .../rdf4j/sail/lmdb/LmdbLftjPatternPlan.java | 66 ++-- .../rdf4j/sail/lmdb/LmdbTrieCursor.java | 96 ++++-- .../rdf4j/sail/lmdb/LmdbUnionTrieCursor.java | 76 +++-- 4 files changed, 394 insertions(+), 156 deletions(-) diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java index 28c515c1b3..eefa3a414e 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java @@ -12,13 +12,10 @@ package org.eclipse.rdf4j.sail.lmdb; import java.util.ArrayList; -import java.util.HashSet; import java.util.List; -import java.util.Objects; -import java.util.Set; import org.eclipse.rdf4j.common.iteration.CloseableIteration; -import org.eclipse.rdf4j.common.iteration.CloseableIteratorIteration; +import org.eclipse.rdf4j.common.iteration.LookAheadIteration; import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.QueryEvaluationException; import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; @@ -51,79 +48,199 @@ private CloseableIteration evaluate(LmdbLftjPlan plan, QueryEvaluati try { state.attachTxn(queryAccess.acquireReadTxn()); - List results = new ArrayList<>(); - LmdbLftjMetrics metrics = new LmdbLftjMetrics(); - search(plan, state, context, queryAccess, metrics, results, 0); - if (results.isEmpty()) { - return QueryEvaluationStep.EMPTY_ITERATION; - } - return new CloseableIteratorIteration<>(results.iterator()); + return new LmdbLftjIteration(plan, state, context, queryAccess, new LmdbLftjMetrics()); } catch (RuntimeException e) { - throw new QueryEvaluationException("LMDB LFTJ execution failed", e); - } finally { state.close(); + throw new QueryEvaluationException("LMDB LFTJ execution failed", e); } } - private void search(LmdbLftjPlan plan, LmdbLftjBindingState state, QueryEvaluationContext context, - LmdbQueryAccess queryAccess, LmdbLftjMetrics metrics, List results, int depth) { - if (depth >= plan.variableOrder().size()) { - long multiplicity = witnessMultiplicity(plan, state, queryAccess, metrics); - if (multiplicity <= 0) { - return; + private final class LmdbLftjIteration extends LookAheadIteration { + + private final LmdbLftjPlan plan; + private final LmdbLftjBindingState state; + private final QueryEvaluationContext context; + private final LmdbQueryAccess queryAccess; + private final LmdbLftjMetrics metrics; + private final List searchVariables; + private final List> cursorsByDepth; + private final boolean[] initializedDepths; + private final boolean[] advanceDepths; + + private int depth; + private BindingSet repeatedBinding; + private long repeatedCount; + + private LmdbLftjIteration(LmdbLftjPlan plan, LmdbLftjBindingState state, QueryEvaluationContext context, + LmdbQueryAccess queryAccess, LmdbLftjMetrics metrics) { + this.plan = plan; + this.state = state; + this.context = context; + this.queryAccess = queryAccess; + this.metrics = metrics; + this.searchVariables = collectSearchVariables(plan, state); + this.cursorsByDepth = createDepthCursors(plan, queryAccess, searchVariables); + this.initializedDepths = new boolean[searchVariables.size()]; + this.advanceDepths = new boolean[searchVariables.size()]; + this.depth = 0; + } + + @Override + protected BindingSet getNextElement() { + if (repeatedCount > 0) { + repeatedCount--; + metrics.recordEmitted(1); + return repeatedBinding; } - for (long i = 0; i < multiplicity; i++) { - results.add(state.materialize(context)); + + try { + return computeNextElement(); + } catch (RuntimeException e) { + throw new QueryEvaluationException("LMDB LFTJ iteration failed", e); } - metrics.recordEmitted(multiplicity); - return; } - String variableName = plan.variableOrder().get(depth); - if (state.isBound(variableName)) { - search(plan, state, context, queryAccess, metrics, results, depth + 1); - return; + @Override + protected void handleClose() { + for (List cursors : cursorsByDepth) { + for (LmdbTrieCursor cursor : cursors) { + cursor.close(); + } + } + state.close(); } - List cursors = createCursors(plan, state, queryAccess, metrics, variableName); - if (cursors.isEmpty()) { - return; + private BindingSet computeNextElement() { + while (depth >= 0) { + if (depth == searchVariables.size()) { + long multiplicity = witnessMultiplicity(plan, state, queryAccess, metrics, searchVariables); + backtrackAfterLeaf(); + if (multiplicity > 0) { + BindingSet result = state.materialize(context); + repeatedBinding = result; + repeatedCount = multiplicity - 1; + metrics.recordEmitted(1); + return result; + } + continue; + } + + if (!initializedDepths[depth]) { + if (!positionDepth(depth, false)) { + backtrackFromDepth(depth); + continue; + } + initializedDepths[depth] = true; + depth++; + continue; + } + + if (advanceDepths[depth]) { + if (!positionDepth(depth, true)) { + backtrackFromDepth(depth); + continue; + } + advanceDepths[depth] = false; + depth++; + continue; + } + + depth++; + } + + return null; } - long current = cursors.stream().mapToLong(LmdbTrieCursor::value).max().orElseThrow(); - while (true) { + private boolean positionDepth(int depth, boolean advanceExisting) { + String variableName = searchVariables.get(depth); + List cursors = cursorsByDepth.get(depth); + state.clear(variableName); + + if (cursors.isEmpty()) { + return false; + } + + if (!advanceExisting) { + for (LmdbTrieCursor cursor : cursors) { + metrics.recordCandidateScan(); + if (!cursor.initialize(state)) { + releaseDepth(depth); + return false; + } + } + } else if (!cursors.get(0).next()) { + releaseDepth(depth); + return false; + } + + long current = Long.MIN_VALUE; + for (LmdbTrieCursor cursor : cursors) { + current = Math.max(current, cursor.value()); + } current = align(cursors, current); if (current < 0) { - return; + releaseDepth(depth); + return false; } + state.assign(variableName, current); - search(plan, state, context, queryAccess, metrics, results, depth + 1); - state.clear(variableName); - if (!cursors.get(0).next()) { + return true; + } + + private void backtrackAfterLeaf() { + depth = searchVariables.size() - 1; + if (depth >= 0) { + advanceDepths[depth] = true; + } + } + + private void backtrackFromDepth(int failedDepth) { + releaseDepth(failedDepth); + depth = failedDepth - 1; + if (depth >= 0) { + advanceDepths[depth] = true; + } + } + + private void releaseDepth(int depth) { + if (depth < 0 || depth >= searchVariables.size()) { return; } - current = cursors.get(0).value(); + state.clear(searchVariables.get(depth)); + initializedDepths[depth] = false; + advanceDepths[depth] = false; + for (LmdbTrieCursor cursor : cursorsByDepth.get(depth)) { + cursor.release(); + } } } - private List createCursors(LmdbLftjPlan plan, LmdbLftjBindingState state, - LmdbQueryAccess queryAccess, - LmdbLftjMetrics metrics, String variableName) { - List cursors = new ArrayList<>(); - for (LmdbLftjPatternPlan patternPlan : plan.patternPlans()) { - if (!patternPlan.containsVariable(variableName)) { - continue; + private List collectSearchVariables(LmdbLftjPlan plan, LmdbLftjBindingState state) { + List searchVariables = new ArrayList<>(plan.variableOrder().size()); + for (String variableName : plan.variableOrder()) { + if (!state.isBound(variableName)) { + searchVariables.add(variableName); } - LmdbTrieCursor cursor = queryAccess.includeInferred() - ? new LmdbUnionTrieCursor(patternPlan, variableName, queryAccess) - : new LmdbTrieCursor(patternPlan, variableName, queryAccess, true); - metrics.recordCandidateScan(); - if (!cursor.initialize(state)) { - return List.of(); + } + return searchVariables; + } + + private List> createDepthCursors(LmdbLftjPlan plan, LmdbQueryAccess queryAccess, + List searchVariables) { + List> cursorsByDepth = new ArrayList<>(searchVariables.size()); + for (String variableName : searchVariables) { + List cursors = new ArrayList<>(); + for (LmdbLftjPatternPlan patternPlan : plan.patternPlans()) { + if (!patternPlan.containsVariable(variableName)) { + continue; + } + cursors.add(queryAccess.includeInferred() + ? new LmdbUnionTrieCursor(patternPlan, variableName, queryAccess) + : new LmdbTrieCursor(patternPlan, variableName, queryAccess, true)); } - cursors.add(cursor); + cursorsByDepth.add(cursors); } - return cursors; + return cursorsByDepth; } private long align(List cursors, long target) { @@ -148,10 +265,10 @@ private long align(List cursors, long target) { } private long witnessMultiplicity(LmdbLftjPlan plan, LmdbLftjBindingState state, LmdbQueryAccess queryAccess, - LmdbLftjMetrics metrics) { + LmdbLftjMetrics metrics, List searchVariables) { long multiplicity = 1; for (LmdbLftjPatternPlan patternPlan : plan.patternPlans()) { - long witnesses = countMatches(patternPlan, state, queryAccess, metrics); + long witnesses = countMatches(patternPlan, state, queryAccess, metrics, searchVariables); if (witnesses == 0) { return 0; } @@ -161,53 +278,74 @@ private long witnessMultiplicity(LmdbLftjPlan plan, LmdbLftjBindingState state, } private long countMatches(LmdbLftjPatternPlan patternPlan, LmdbLftjBindingState state, LmdbQueryAccess queryAccess, - LmdbLftjMetrics metrics) { + LmdbLftjMetrics metrics, List searchVariables) { + if (!patternPlan.hasHiddenTerms() && containsSearchVariable(patternPlan, searchVariables)) { + return 1; + } + long[] scanKey = patternPlan.scanKey(state); - Set matches = new HashSet<>(); metrics.recordWitnessScan(); - collectMatches(matches, queryAccess, state, patternPlan, scanKey, true); - if (queryAccess.includeInferred()) { - collectMatches(matches, queryAccess, state, patternPlan, scanKey, false); + if (!queryAccess.includeInferred()) { + return countMatches(queryAccess, state, patternPlan, scanKey, true); } - return matches.size(); + + return countUnionMatches(queryAccess, state, patternPlan, scanKey); } - private void collectMatches(Set matches, LmdbQueryAccess queryAccess, LmdbLftjBindingState state, - LmdbLftjPatternPlan patternPlan, long[] scanKey, boolean explicit) { - try (RecordIterator records = queryAccess.openScan(state.txn(), patternPlan.indexName(), scanKey[0], scanKey[1], - scanKey[2], scanKey[3], explicit)) { - long[] quad; - while ((quad = records.next()) != null) { - matches.add(new QuadKey(quad[0], quad[1], quad[2], quad[3])); + private boolean containsSearchVariable(LmdbLftjPatternPlan patternPlan, List searchVariables) { + for (String searchVariable : searchVariables) { + if (patternPlan.containsVariable(searchVariable)) { + return true; } } + return false; } - private static final class QuadKey { - private final long subj; - private final long pred; - private final long obj; - private final long context; - - private QuadKey(long subj, long pred, long obj, long context) { - this.subj = subj; - this.pred = pred; - this.obj = obj; - this.context = context; + private long countMatches(LmdbQueryAccess queryAccess, LmdbLftjBindingState state, LmdbLftjPatternPlan patternPlan, + long[] scanKey, boolean explicit) { + long count = 0; + try (RecordIterator records = queryAccess.openScan(state.txn(), patternPlan.indexName(), scanKey[0], scanKey[1], + scanKey[2], scanKey[3], explicit)) { + while (records.next() != null) { + count++; + } } + return count; + } - @Override - public boolean equals(Object other) { - if (!(other instanceof QuadKey)) { - return false; + private long countUnionMatches(LmdbQueryAccess queryAccess, LmdbLftjBindingState state, + LmdbLftjPatternPlan patternPlan, long[] scanKey) { + long count = 0; + try (RecordIterator explicitRecords = queryAccess.openScan(state.txn(), patternPlan.indexName(), scanKey[0], + scanKey[1], scanKey[2], scanKey[3], true); + RecordIterator inferredRecords = queryAccess.openScan(state.txn(), patternPlan.indexName(), scanKey[0], + scanKey[1], scanKey[2], scanKey[3], false)) { + long[] explicitQuad = explicitRecords.next(); + long[] inferredQuad = inferredRecords.next(); + while (explicitQuad != null || inferredQuad != null) { + if (inferredQuad == null || explicitQuad != null && compareQuads(explicitQuad, inferredQuad) <= 0) { + count++; + long[] previous = explicitQuad; + explicitQuad = explicitRecords.next(); + if (inferredQuad != null && compareQuads(previous, inferredQuad) == 0) { + inferredQuad = inferredRecords.next(); + } + } else { + count++; + inferredQuad = inferredRecords.next(); + } } - QuadKey o = (QuadKey) other; - return subj == o.subj && pred == o.pred && obj == o.obj && context == o.context; } + return count; + } - @Override - public int hashCode() { - return Objects.hash(subj, pred, obj, context); + private int compareQuads(long[] left, long[] right) { + for (int i = 0; i < 4; i++) { + int comparison = Long.compare(left[i], right[i]); + if (comparison != 0) { + return comparison; + } } + return 0; } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java index 371ec95ddf..a2bdf7a5c1 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java @@ -11,9 +11,10 @@ // Some portions generated by Codex package org.eclipse.rdf4j.sail.lmdb; +import java.util.ArrayList; +import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; -import java.util.stream.Collectors; import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.query.algebra.StatementPattern; @@ -27,6 +28,7 @@ final class LmdbLftjPatternPlan { private final TermRef predicate; private final TermRef object; private final TermRef context; + private final TermRef[] terms; LmdbLftjPatternPlan(StatementPattern pattern, String indexName) { this.pattern = pattern.clone(); @@ -35,6 +37,7 @@ final class LmdbLftjPatternPlan { this.predicate = TermRef.of(TripleStore.PRED_IDX, pattern.getPredicateVar()); this.object = TermRef.of(TripleStore.OBJ_IDX, pattern.getObjectVar()); this.context = TermRef.of(TripleStore.CONTEXT_IDX, pattern.getContextVar()); + this.terms = new TermRef[] { subject, predicate, object, context }; } StatementPattern pattern() { @@ -46,19 +49,35 @@ String indexName() { } List terms() { - return List.of(subject, predicate, object, context); + return List.of(terms); } List visibleVariableNames() { - return terms().stream() - .filter(TermRef::isVisible) - .map(TermRef::name) - .distinct() - .collect(Collectors.toList()); + LinkedHashSet names = new LinkedHashSet<>(); + for (TermRef term : terms) { + if (term.isVisible()) { + names.add(term.name()); + } + } + return new ArrayList<>(names); } boolean containsVariable(String name) { - return terms().stream().anyMatch(term -> term.matchesName(name)); + for (TermRef term : terms) { + if (term.matchesName(name)) { + return true; + } + } + return false; + } + + int componentFor(String variableName) { + for (TermRef term : terms) { + if (term.matchesName(variableName)) { + return term.component(); + } + } + throw new IllegalArgumentException("Pattern does not bind variable " + variableName); } TermRef term(char field) { @@ -77,20 +96,29 @@ TermRef term(char field) { } long[] scanKey(LmdbLftjBindingState state) { - return new long[] { - state.fixedId(subject), - state.fixedId(predicate), - state.fixedId(object), - state.fixedId(context) - }; + long[] scanKey = new long[4]; + fillScanKey(state, scanKey); + return scanKey; + } + + void fillScanKey(LmdbLftjBindingState state, long[] scanKey) { + scanKey[0] = state.fixedId(subject); + scanKey[1] = state.fixedId(predicate); + scanKey[2] = state.fixedId(object); + scanKey[3] = state.fixedId(context); } long valueFor(String variableName, long[] quad) { - return terms().stream() - .filter(term -> term.matchesName(variableName)) - .findFirst() - .map(term -> quad[term.component()]) - .orElseThrow(() -> new IllegalArgumentException("Pattern does not bind variable " + variableName)); + return quad[componentFor(variableName)]; + } + + boolean hasHiddenTerms() { + for (TermRef term : terms) { + if (term.isHidden()) { + return true; + } + } + return false; } @Override diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java index 3e8ea131d7..a9560dbddc 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java @@ -11,63 +11,105 @@ // Some portions generated by Codex package org.eclipse.rdf4j.sail.lmdb; -import java.util.TreeSet; - class LmdbTrieCursor implements AutoCloseable { private final LmdbLftjPatternPlan patternPlan; - private final String variableName; private final LmdbQueryAccess queryAccess; private final boolean explicit; + private final int valueComponent; + private final long[] scanKey = new long[4]; + + private RecordIterator records; - protected long[] values = new long[0]; - private int position; + protected long currentValue; + protected boolean currentAvailable; LmdbTrieCursor(LmdbLftjPatternPlan patternPlan, String variableName, LmdbQueryAccess queryAccess, boolean explicit) { this.patternPlan = patternPlan; - this.variableName = variableName; this.queryAccess = queryAccess; this.explicit = explicit; + this.valueComponent = patternPlan.componentFor(variableName); } boolean initialize(LmdbLftjBindingState state) { - TreeSet candidates = new TreeSet<>(); - long[] scanKey = patternPlan.scanKey(state); - try (RecordIterator records = queryAccess.openScan(state.txn(), patternPlan.indexName(), scanKey[0], scanKey[1], - scanKey[2], scanKey[3], explicit)) { - long[] quad; - while ((quad = records.next()) != null) { - candidates.add(patternPlan.valueFor(variableName, quad)); - } - } - values = candidates.stream().mapToLong(Long::longValue).toArray(); - position = 0; - return values.length > 0; + release(); + patternPlan.fillScanKey(state, scanKey); + records = queryAccess.openScan(state.txn(), patternPlan.indexName(), scanKey[0], scanKey[1], scanKey[2], + scanKey[3], explicit); + return advanceFirst(); } boolean seek(long target) { - while (position < values.length && values[position] < target) { - position++; + while (currentAvailable && currentValue < target) { + if (!next()) { + return false; + } } - return position < values.length; + return currentAvailable; } boolean next() { - position++; - return position < values.length; + if (!currentAvailable) { + return false; + } + return advanceBeyond(currentValue); } long value() { - return values[position]; + return currentValue; + } + + protected boolean available() { + return currentAvailable; + } + + protected void setCurrentValue(long value) { + this.currentValue = value; + this.currentAvailable = true; + } + + protected void clearCurrent() { + this.currentAvailable = false; } - long[] values() { - return values; + void release() { + clearCurrent(); + if (records != null) { + records.close(); + records = null; + } } @Override public void close() { - // values already materialized + release(); + } + + private boolean advanceFirst() { + long[] quad = nextQuad(); + if (quad == null) { + release(); + return false; + } + setCurrentValue(quad[valueComponent]); + return true; + } + + private boolean advanceBeyond(long previousValue) { + long[] quad; + while ((quad = nextQuad()) != null) { + long nextValue = quad[valueComponent]; + if (nextValue != previousValue) { + setCurrentValue(nextValue); + return true; + } + } + release(); + return false; + } + + private long[] nextQuad() { + return records == null ? null : records.next(); } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java index 021ea765a0..d48aca9bdc 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java @@ -24,31 +24,61 @@ final class LmdbUnionTrieCursor extends LmdbTrieCursor { @Override boolean initialize(LmdbLftjBindingState state) { - boolean explicitAvailable = explicitCursor.initialize(state); - boolean inferredAvailable = inferredCursor.initialize(state); - if (!explicitAvailable && !inferredAvailable) { - values = new long[0]; + explicitCursor.initialize(state); + inferredCursor.initialize(state); + return mergeCurrent(); + } + + @Override + boolean seek(long target) { + if (explicitCursor.available()) { + explicitCursor.seek(target); + } + if (inferredCursor.available()) { + inferredCursor.seek(target); + } + return mergeCurrent(); + } + + @Override + boolean next() { + if (!available()) { return false; } - long[] left = explicitAvailable ? explicitCursor.values() : new long[0]; - long[] right = inferredAvailable ? inferredCursor.values() : new long[0]; - long[] merged = new long[left.length + right.length]; - int size = 0; - int leftIndex = 0; - int rightIndex = 0; - while (leftIndex < left.length || rightIndex < right.length) { - long next; - if (rightIndex >= right.length || (leftIndex < left.length && left[leftIndex] <= right[rightIndex])) { - next = left[leftIndex++]; - } else { - next = right[rightIndex++]; - } - if (size == 0 || merged[size - 1] != next) { - merged[size++] = next; - } - } - values = java.util.Arrays.copyOf(merged, size); - return size > 0; + long previous = value(); + if (explicitCursor.available() && explicitCursor.value() == previous) { + explicitCursor.next(); + } + if (inferredCursor.available() && inferredCursor.value() == previous) { + inferredCursor.next(); + } + return mergeCurrent(); + } + + @Override + void release() { + clearCurrent(); + explicitCursor.release(); + inferredCursor.release(); + } + + private boolean mergeCurrent() { + boolean explicitAvailable = explicitCursor.available(); + boolean inferredAvailable = inferredCursor.available(); + if (!explicitAvailable && !inferredAvailable) { + clearCurrent(); + return false; + } + if (!explicitAvailable) { + setCurrentValue(inferredCursor.value()); + return true; + } + if (!inferredAvailable) { + setCurrentValue(explicitCursor.value()); + return true; + } + setCurrentValue(Math.min(explicitCursor.value(), inferredCursor.value())); + return true; } } From 7aa92c3ca235a8bc003e3bc639bafe8f21993180 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 11:59:31 +0200 Subject: [PATCH 07/32] continue optimizing lftj --- .../rdf4j/sail/lmdb/LmdbLftjExecutorTest.java | 290 ++++++++++++++++++ 1 file changed, 290 insertions(+) create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java new file mode 100644 index 0000000000..e407472537 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java @@ -0,0 +1,290 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import org.eclipse.rdf4j.collection.factory.impl.DefaultCollectionFactory; +import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.common.iteration.EmptyIteration; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.vocabulary.FOAF; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.Dataset; +import org.eclipse.rdf4j.query.algebra.Join; +import org.eclipse.rdf4j.query.algebra.StatementPattern; +import org.eclipse.rdf4j.query.algebra.TupleExpr; +import org.eclipse.rdf4j.query.algebra.Var; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; +import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; +import org.eclipse.rdf4j.query.impl.EmptyBindingSet; +import org.junit.jupiter.api.Test; + +class LmdbLftjExecutorTest { + + private static final ValueFactory VF = SimpleValueFactory.getInstance(); + + @Test + void evaluateShouldStayLazyUntilConsumerReadsResults() { + TestQueryAccess queryAccess = new TestQueryAccess(); + QueryEvaluationStep evaluationStep = createEvaluationStep(queryAccess); + + try (CloseableIteration iteration = evaluationStep.evaluate(EmptyBindingSet.getInstance())) { + assertEquals(0, queryAccess.resolveValueCalls, + "evaluate() should not materialize result values before the consumer reads a row"); + assertEquals(0, queryAccess.releaseReadTxnCalls, + "evaluate() should keep the read transaction open for lazy consumption"); + } + + assertEquals(1, queryAccess.releaseReadTxnCalls, + "closing the iteration should release the read transaction exactly once"); + } + + @Test + void evaluateShouldRespectFullyBoundInputBindings() { + TestQueryAccess queryAccess = new TestQueryAccess(); + QueryEvaluationStep evaluationStep = createEvaluationStep(queryAccess); + + QueryBindingSet matchingBindings = new QueryBindingSet(); + matchingBindings.setBinding("a", VF.createIRI("urn:person:1")); + matchingBindings.setBinding("b", VF.createIRI("urn:person:2")); + matchingBindings.setBinding("c", VF.createIRI("urn:person:3")); + + try (CloseableIteration iteration = evaluationStep.evaluate(matchingBindings)) { + assertTrue(iteration.hasNext(), "fully bound matching cycles should still produce a result"); + } + + QueryBindingSet nonMatchingBindings = new QueryBindingSet(); + nonMatchingBindings.setBinding("a", VF.createIRI("urn:person:1")); + nonMatchingBindings.setBinding("b", VF.createIRI("urn:person:1")); + nonMatchingBindings.setBinding("c", VF.createIRI("urn:person:2")); + + try (CloseableIteration iteration = evaluationStep.evaluate(nonMatchingBindings)) { + assertTrue(!iteration.hasNext(), "fully bound non-matching cycles must not be reported"); + } + } + + @Test + void evaluateShouldCloseLiveScansOnEarlyClose() { + TestQueryAccess queryAccess = new TestQueryAccess(); + QueryEvaluationStep evaluationStep = createEvaluationStep(queryAccess); + + CloseableIteration iteration = evaluationStep.evaluate(EmptyBindingSet.getInstance()); + assertTrue(iteration.hasNext(), "expected a lazy result row"); + iteration.close(); + + assertTrue(queryAccess.openScanCalls > 0, "expected lazy execution to open scans once iteration starts"); + assertEquals(queryAccess.openScanCalls, queryAccess.closedScanCalls, + "closing the iteration should close every live scan"); + assertEquals(1, queryAccess.releaseReadTxnCalls, + "closing a started iteration should still release the read transaction"); + } + + private QueryEvaluationStep createEvaluationStep(TestQueryAccess queryAccess) { + QueryEvaluationContext context = new QueryEvaluationContext.Minimal((Dataset) null); + LmdbLftjEvaluationStrategy strategy = new LmdbLftjEvaluationStrategy( + new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess), + null, + null, + 0L, + new EvaluationStatistics(), + false, + DefaultCollectionFactory::new); + LmdbLftjExecutor executor = new LmdbLftjExecutor(strategy); + return executor.prepare(new LmdbLftjTupleExpr(createPlan()), context); + } + + private LmdbLftjPlan createPlan() { + StatementPattern pattern1 = statementPattern("a", "b"); + StatementPattern pattern2 = statementPattern("b", "c"); + StatementPattern pattern3 = statementPattern("c", "a"); + TupleExpr fallbackExpr = new Join(new Join(pattern1.clone(), pattern2.clone()), pattern3.clone()); + return new LmdbLftjPlan( + fallbackExpr, + fallbackExpr.getBindingNames(), + fallbackExpr.getAssuredBindingNames(), + List.of("a", "b", "c"), + List.of( + new LmdbLftjPatternPlan(pattern1, "spoc"), + new LmdbLftjPatternPlan(pattern2, "spoc"), + new LmdbLftjPatternPlan(pattern3, "spoc"))); + } + + private StatementPattern statementPattern(String subjectName, String objectName) { + return new StatementPattern( + new Var(subjectName), + new Var("pred", FOAF.KNOWS), + new Var(objectName)); + } + + private static final class EmptyTripleSource implements TripleSource { + + @Override + public CloseableIteration getStatements(Resource subj, IRI pred, Value obj, + Resource... contexts) { + return new EmptyIteration<>(); + } + + @Override + public ValueFactory getValueFactory() { + return VF; + } + } + + private static final class TestQueryAccess implements LmdbQueryAccess { + + private final TxnManager txnManager = new TxnManager(0L, TxnManager.Mode.NONE); + private final TxnManager.Txn txn = txnManager.createTxn(1L); + private final List quads = new ArrayList<>(); + private final List valuesById = new ArrayList<>(); + + private int resolveValueCalls; + private int releaseReadTxnCalls; + private int openScanCalls; + private int closedScanCalls; + + private TestQueryAccess() { + valuesById.add(null); + valuesById.add(VF.createIRI("urn:person:1")); + valuesById.add(VF.createIRI("urn:person:2")); + valuesById.add(VF.createIRI("urn:person:3")); + valuesById.add(VF.createIRI("urn:person:4")); + valuesById.add(FOAF.KNOWS); + + for (long subject = 1; subject <= 4; subject++) { + for (long object = 1; object <= 4; object++) { + if (subject != object) { + quads.add(new long[] { subject, 5L, object, 0L }); + } + } + } + } + + @Override + public TripleStore tripleStore() { + return null; + } + + @Override + public TxnManager.Txn acquireReadTxn() { + return txn; + } + + @Override + public void releaseReadTxn(TxnManager.Txn txn) { + assertTrue(txn == this.txn); + releaseReadTxnCalls++; + } + + @Override + public long resolveId(Value value) { + if (FOAF.KNOWS.equals(value)) { + return 5L; + } + + for (int i = 1; i < valuesById.size(); i++) { + if (value.equals(valuesById.get(i))) { + return i; + } + } + + return -1L; + } + + @Override + public Value resolveValue(long id) { + resolveValueCalls++; + return valuesById.get((int) id); + } + + @Override + public boolean includeInferred() { + return false; + } + + @Override + public Set configuredIndexes() { + return Set.of("spoc"); + } + + @Override + public RecordIterator openScan(TxnManager.Txn txn, String indexName, long subj, long pred, long obj, + long context, boolean explicit) { + openScanCalls++; + return new TestRecordIterator(quads, subj, pred, obj, context, this::recordClosedScan); + } + + private void recordClosedScan() { + closedScanCalls++; + } + } + + private static final class TestRecordIterator implements RecordIterator { + + private final List quads; + private final long subj; + private final long pred; + private final long obj; + private final long context; + private final Runnable closeCallback; + + private int position; + private boolean closed; + + private TestRecordIterator(List quads, long subj, long pred, long obj, long context, + Runnable closeCallback) { + this.quads = quads; + this.subj = subj; + this.pred = pred; + this.obj = obj; + this.context = context; + this.closeCallback = closeCallback; + } + + @Override + public long[] next() { + while (position < quads.size()) { + long[] quad = quads.get(position++); + if (matches(subj, quad[0]) && matches(pred, quad[1]) && matches(obj, quad[2]) + && matches(context, quad[3])) { + return quad.clone(); + } + } + return null; + } + + private boolean matches(long expected, long actual) { + return expected < 0 || expected == actual; + } + + @Override + public void close() { + if (!closed) { + closed = true; + closeCallback.run(); + } + } + } +} From 9fde7ed72556f2a65d6970a93a724e033f2d6801 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 12:01:01 +0200 Subject: [PATCH 08/32] continue optimizing lftj --- .../skills/high-performance-java/references/jdk-21-26-notes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.codex/skills/high-performance-java/references/jdk-21-26-notes.md b/.codex/skills/high-performance-java/references/jdk-21-26-notes.md index edc743f91e..8e4fad77f2 100644 --- a/.codex/skills/high-performance-java/references/jdk-21-26-notes.md +++ b/.codex/skills/high-performance-java/references/jdk-21-26-notes.md @@ -5,7 +5,7 @@ Treat JDK behavior as version-sensitive. ## Defaults - Repository baseline: JDK 21 -- Current local runtime may be newer; in this workspace it is JDK 26 +- Current local runtime may be newer (e.g. JDK 26) - Advice about inlining, intrinsics, vectorization, and loop optimizations must be checked on the active runtime ## What stays stable enough From a930d371b8778e560f5e44159313b4b8e4774891 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 13:35:15 +0200 Subject: [PATCH 09/32] continue optimizing lftj --- core/sail/lmdb/pom.xml | 18 ++ .../rdf4j/sail/lmdb/LmdbLftjBindingState.java | 56 ++++-- .../rdf4j/sail/lmdb/LmdbLftjExecutor.java | 80 +++++++-- .../rdf4j/sail/lmdb/LmdbLftjPatternPlan.java | 59 +++++++ .../rdf4j/sail/lmdb/LmdbQueryAccess.java | 2 + .../rdf4j/sail/lmdb/LmdbStoreConnection.java | 5 + .../rdf4j/sail/lmdb/LmdbTrieCursor.java | 161 +++++++++++------ .../rdf4j/sail/lmdb/LmdbUnionTrieCursor.java | 63 ++++--- .../eclipse/rdf4j/sail/lmdb/TripleStore.java | 4 + .../rdf4j/sail/lmdb/LmdbLftjExecutorTest.java | 162 +++++++++++++++++- .../rdf4j/sail/lmdb/LmdbSailStoreTest.java | 28 +++ .../benchmark/FoafCliqueQueryBenchmark.java | 20 +-- 12 files changed, 531 insertions(+), 127 deletions(-) diff --git a/core/sail/lmdb/pom.xml b/core/sail/lmdb/pom.xml index 1c9a343fb8..702da7d065 100644 --- a/core/sail/lmdb/pom.xml +++ b/core/sail/lmdb/pom.xml @@ -222,6 +222,24 @@ maven-assembly-plugin + + org.apache.maven.plugins + maven-compiler-plugin + + + default-testCompile + + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmhVersion} + + + + + + diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java index caaa9c4813..997ffc1291 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java @@ -25,8 +25,11 @@ final class LmdbLftjBindingState { private final LmdbLftjPlan plan; private final BindingSet inputBindings; private final LmdbQueryAccess queryAccess; - private final Map fixedValues = new HashMap<>(); - private final Map assignedValues = new HashMap<>(); + private final Map variableSlots = new HashMap<>(); + private final long[] fixedValues; + private final boolean[] fixedPresent; + private final long[] assignedValues; + private final boolean[] assignedPresent; private final IdentityHashMap constantIds = new IdentityHashMap<>(); private TxnManager.Txn txn; @@ -35,6 +38,14 @@ final class LmdbLftjBindingState { this.plan = plan; this.inputBindings = inputBindings; this.queryAccess = queryAccess; + int variableCount = plan.variableOrder().size(); + this.fixedValues = new long[variableCount]; + this.fixedPresent = new boolean[variableCount]; + this.assignedValues = new long[variableCount]; + this.assignedPresent = new boolean[variableCount]; + for (int i = 0; i < variableCount; i++) { + variableSlots.put(plan.variableOrder().get(i), i); + } } boolean initialize() { @@ -47,6 +58,9 @@ boolean initialize() { } constantIds.put(term, id); } + if (term.isVisible()) { + term.bindSlot(slot(term.name())); + } } } for (String variableName : plan.variableOrder()) { @@ -55,7 +69,9 @@ boolean initialize() { if (id == LmdbValue.UNKNOWN_ID) { return false; } - fixedValues.put(variableName, id); + int slot = slot(variableName); + fixedValues[slot] = id; + fixedPresent[slot] = true; } } return true; @@ -70,30 +86,40 @@ TxnManager.Txn txn() { } boolean isBound(String variableName) { - return assignedValues.containsKey(variableName) || fixedValues.containsKey(variableName); + int slot = slot(variableName); + return assignedPresent[slot] || fixedPresent[slot]; } long value(String variableName) { - if (assignedValues.containsKey(variableName)) { - return assignedValues.get(variableName); + int slot = slot(variableName); + if (assignedPresent[slot]) { + return assignedValues[slot]; } - return fixedValues.get(variableName); + return fixedValues[slot]; } void assign(String variableName, long value) { - assignedValues.put(variableName, value); + int slot = slot(variableName); + assignedValues[slot] = value; + assignedPresent[slot] = true; } void clear(String variableName) { - assignedValues.remove(variableName); + assignedPresent[slot(variableName)] = false; } long fixedId(LmdbLftjPatternPlan.TermRef term) { if (term.isConstant()) { return constantIds.get(term); } - if (term.isVisible() && isBound(term.name())) { - return value(term.name()); + int slot = term.bindingSlot(); + if (slot >= 0) { + if (assignedPresent[slot]) { + return assignedValues[slot]; + } + if (fixedPresent[slot]) { + return fixedValues[slot]; + } } return -1; } @@ -114,4 +140,12 @@ void close() { txn = null; } } + + private int slot(String variableName) { + Integer slot = variableSlots.get(variableName); + if (slot == null) { + throw new IllegalArgumentException("Unknown LMDB LFTJ variable: " + variableName); + } + return slot; + } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java index eefa3a414e..5ab31bfcd0 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java @@ -18,6 +18,7 @@ import org.eclipse.rdf4j.common.iteration.LookAheadIteration; import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.QueryEvaluationException; +import org.eclipse.rdf4j.query.algebra.TupleExpr; import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; @@ -30,12 +31,12 @@ final class LmdbLftjExecutor { } QueryEvaluationStep prepare(LmdbLftjTupleExpr node, QueryEvaluationContext context) { - QueryEvaluationStep fallback = strategy.precompile(node.plan().fallbackExpr().clone(), context); + LazyFallbackStep fallback = new LazyFallbackStep(node.plan().fallbackExpr().clone(), context); return bindings -> evaluate(node.plan(), context, fallback, bindings); } private CloseableIteration evaluate(LmdbLftjPlan plan, QueryEvaluationContext context, - QueryEvaluationStep fallback, BindingSet bindings) { + LazyFallbackStep fallback, BindingSet bindings) { LmdbQueryAccess queryAccess = strategy.queryAccess(); if (queryAccess == null) { return fallback.evaluate(bindings); @@ -63,6 +64,7 @@ private final class LmdbLftjIteration extends LookAheadIteration { private final LmdbQueryAccess queryAccess; private final LmdbLftjMetrics metrics; private final List searchVariables; + private final List patternCursors; private final List> cursorsByDepth; private final boolean[] initializedDepths; private final boolean[] advanceDepths; @@ -79,7 +81,8 @@ private LmdbLftjIteration(LmdbLftjPlan plan, LmdbLftjBindingState state, QueryEv this.queryAccess = queryAccess; this.metrics = metrics; this.searchVariables = collectSearchVariables(plan, state); - this.cursorsByDepth = createDepthCursors(plan, queryAccess, searchVariables); + this.patternCursors = createPatternCursors(plan, queryAccess, state); + this.cursorsByDepth = createDepthCursors(plan, searchVariables, patternCursors); this.initializedDepths = new boolean[searchVariables.size()]; this.advanceDepths = new boolean[searchVariables.size()]; this.depth = 0; @@ -102,10 +105,8 @@ protected BindingSet getNextElement() { @Override protected void handleClose() { - for (List cursors : cursorsByDepth) { - for (LmdbTrieCursor cursor : cursors) { - cursor.close(); - } + for (LmdbTrieCursor cursor : patternCursors) { + cursor.close(); } state.close(); } @@ -163,13 +164,11 @@ private boolean positionDepth(int depth, boolean advanceExisting) { if (!advanceExisting) { for (LmdbTrieCursor cursor : cursors) { metrics.recordCandidateScan(); - if (!cursor.initialize(state)) { - releaseDepth(depth); + if (!cursor.open(variableName)) { return false; } } } else if (!cursors.get(0).next()) { - releaseDepth(depth); return false; } @@ -179,7 +178,6 @@ private boolean positionDepth(int depth, boolean advanceExisting) { } current = align(cursors, current); if (current < 0) { - releaseDepth(depth); return false; } @@ -206,11 +204,12 @@ private void releaseDepth(int depth) { if (depth < 0 || depth >= searchVariables.size()) { return; } - state.clear(searchVariables.get(depth)); + String variableName = searchVariables.get(depth); + state.clear(variableName); initializedDepths[depth] = false; advanceDepths[depth] = false; for (LmdbTrieCursor cursor : cursorsByDepth.get(depth)) { - cursor.release(); + cursor.release(variableName); } } } @@ -225,18 +224,29 @@ private List collectSearchVariables(LmdbLftjPlan plan, LmdbLftjBindingSt return searchVariables; } - private List> createDepthCursors(LmdbLftjPlan plan, LmdbQueryAccess queryAccess, - List searchVariables) { + private List createPatternCursors(LmdbLftjPlan plan, LmdbQueryAccess queryAccess, + LmdbLftjBindingState state) { + List cursors = new ArrayList<>(plan.patternPlans().size()); + for (LmdbLftjPatternPlan patternPlan : plan.patternPlans()) { + cursors.add(queryAccess.includeInferred() + ? new LmdbUnionTrieCursor(patternPlan, queryAccess, state) + : new LmdbTrieCursor(patternPlan, queryAccess, state, true)); + } + return cursors; + } + + private List> createDepthCursors(LmdbLftjPlan plan, List searchVariables, + List patternCursors) { List> cursorsByDepth = new ArrayList<>(searchVariables.size()); + List patternPlans = plan.patternPlans(); for (String variableName : searchVariables) { List cursors = new ArrayList<>(); - for (LmdbLftjPatternPlan patternPlan : plan.patternPlans()) { + for (int i = 0; i < patternPlans.size(); i++) { + LmdbLftjPatternPlan patternPlan = patternPlans.get(i); if (!patternPlan.containsVariable(variableName)) { continue; } - cursors.add(queryAccess.includeInferred() - ? new LmdbUnionTrieCursor(patternPlan, variableName, queryAccess) - : new LmdbTrieCursor(patternPlan, variableName, queryAccess, true)); + cursors.add(patternCursors.get(i)); } cursorsByDepth.add(cursors); } @@ -348,4 +358,36 @@ private int compareQuads(long[] left, long[] right) { } return 0; } + + private final class LazyFallbackStep { + + private final TupleExpr fallbackExpr; + private final QueryEvaluationContext context; + + private volatile QueryEvaluationStep compiledStep; + + private LazyFallbackStep(TupleExpr fallbackExpr, QueryEvaluationContext context) { + this.fallbackExpr = fallbackExpr; + this.context = context; + } + + private CloseableIteration evaluate(BindingSet bindings) { + return step().evaluate(bindings); + } + + private QueryEvaluationStep step() { + QueryEvaluationStep step = compiledStep; + if (step != null) { + return step; + } + synchronized (this) { + step = compiledStep; + if (step == null) { + step = strategy.precompile(fallbackExpr.clone(), context); + compiledStep = step; + } + return step; + } + } + } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java index a2bdf7a5c1..4de14bb58c 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java @@ -12,6 +12,7 @@ package org.eclipse.rdf4j.sail.lmdb; import java.util.ArrayList; +import java.util.Arrays; import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; @@ -29,6 +30,7 @@ final class LmdbLftjPatternPlan { private final TermRef object; private final TermRef context; private final TermRef[] terms; + private final TermRef[] keyTerms; LmdbLftjPatternPlan(StatementPattern pattern, String indexName) { this.pattern = pattern.clone(); @@ -38,6 +40,10 @@ final class LmdbLftjPatternPlan { this.object = TermRef.of(TripleStore.OBJ_IDX, pattern.getObjectVar()); this.context = TermRef.of(TripleStore.CONTEXT_IDX, pattern.getContextVar()); this.terms = new TermRef[] { subject, predicate, object, context }; + this.keyTerms = new TermRef[indexName.length()]; + for (int i = 0; i < indexName.length(); i++) { + keyTerms[i] = term(indexName.charAt(i)); + } } StatementPattern pattern() { @@ -80,6 +86,48 @@ int componentFor(String variableName) { throw new IllegalArgumentException("Pattern does not bind variable " + variableName); } + int keyFieldIndex(String variableName) { + for (int i = 0; i < keyTerms.length; i++) { + if (keyTerms[i].matchesName(variableName)) { + return i; + } + } + throw new IllegalArgumentException("Pattern does not bind variable " + variableName); + } + + void fillRangeBounds(LmdbLftjBindingState state, String variableName, long lowerBound, long[] minKey, + long[] maxKey) { + Arrays.fill(maxKey, Long.MAX_VALUE); + + int keyFieldIndex = keyFieldIndex(variableName); + for (int i = 0; i < keyTerms.length; i++) { + TermRef term = keyTerms[i]; + long fixedId = state.fixedId(term); + if (i < keyFieldIndex && fixedId < 0) { + throw new IllegalStateException("LMDB LFTJ requires a fully fixed prefix before " + variableName + + " in index " + indexName); + } + if (fixedId >= 0) { + minKey[term.component()] = fixedId; + maxKey[term.component()] = fixedId; + } + } + + TermRef currentTerm = keyTerms[keyFieldIndex]; + minKey[currentTerm.component()] = lowerBound; + maxKey[currentTerm.component()] = Long.MAX_VALUE; + } + + boolean matchesPrefix(LmdbTrieKeyCursor cursor, LmdbLftjBindingState state, int keyFieldIndex) { + for (int i = 0; i < keyFieldIndex; i++) { + long fixedId = state.fixedId(keyTerms[i]); + if (fixedId < 0 || cursor.valueAt(i) != fixedId) { + return false; + } + } + return true; + } + TermRef term(char field) { switch (field) { case 's': @@ -141,6 +189,7 @@ static final class TermRef { private final String name; private final boolean anonymous; private final Value constantValue; + private int bindingSlot = -1; private TermRef(int component, String name, boolean anonymous, Value constantValue) { this.component = component; @@ -186,5 +235,15 @@ boolean isHidden() { boolean matchesName(String variableName) { return isVisible() && name.equals(variableName); } + + void bindSlot(int bindingSlot) { + if (isVisible()) { + this.bindingSlot = bindingSlot; + } + } + + int bindingSlot() { + return bindingSlot; + } } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java index ebefb4d555..17cce9ad2e 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java @@ -33,4 +33,6 @@ interface LmdbQueryAccess { RecordIterator openScan(TxnManager.Txn txn, String indexName, long subj, long pred, long obj, long context, boolean explicit); + + LmdbTrieKeyCursor openTrieCursor(TxnManager.Txn txn, String indexName, boolean explicit); } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java index 064d6f09ae..2074abb3fd 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java @@ -248,6 +248,11 @@ public RecordIterator openScan(TxnManager.Txn txn, String indexName, long subj, throw new SailException(e); } } + + @Override + public LmdbTrieKeyCursor openTrieCursor(TxnManager.Txn txn, String indexName, boolean explicit) { + return tripleStore.openTrieCursor(txn, indexName, explicit); + } }; } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java index a9560dbddc..812f733ff6 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java @@ -15,101 +15,150 @@ class LmdbTrieCursor implements AutoCloseable { private final LmdbLftjPatternPlan patternPlan; private final LmdbQueryAccess queryAccess; + private final LmdbLftjBindingState state; private final boolean explicit; - private final int valueComponent; - private final long[] scanKey = new long[4]; + private final long[] lowerBound = new long[4]; + private final long[] upperBound = new long[4]; + private final Frame[] stack = new Frame[4]; - private RecordIterator records; + private LmdbTrieKeyCursor cursor; + private int stackSize; - protected long currentValue; - protected boolean currentAvailable; - - LmdbTrieCursor(LmdbLftjPatternPlan patternPlan, String variableName, LmdbQueryAccess queryAccess, + LmdbTrieCursor(LmdbLftjPatternPlan patternPlan, LmdbQueryAccess queryAccess, LmdbLftjBindingState state, boolean explicit) { this.patternPlan = patternPlan; this.queryAccess = queryAccess; + this.state = state; this.explicit = explicit; - this.valueComponent = patternPlan.componentFor(variableName); + for (int i = 0; i < stack.length; i++) { + stack[i] = new Frame(); + } } - boolean initialize(LmdbLftjBindingState state) { - release(); - patternPlan.fillScanKey(state, scanKey); - records = queryAccess.openScan(state.txn(), patternPlan.indexName(), scanKey[0], scanKey[1], scanKey[2], - scanKey[3], explicit); - return advanceFirst(); + boolean open(String variableName) { + ensureCursor(); + Frame frame = stack[stackSize++]; + frame.variableName = variableName; + frame.keyFieldIndex = patternPlan.keyFieldIndex(variableName); + if (seek(0L)) { + return true; + } + stackSize--; + frame.reset(); + return false; } boolean seek(long target) { - while (currentAvailable && currentValue < target) { - if (!next()) { - return false; - } + if (stackSize == 0) { + return false; + } + + Frame frame = currentFrame(); + if (frame.currentAvailable && cursor.isPositioned() && target <= frame.currentValue) { + return true; } - return currentAvailable; + + patternPlan.fillRangeBounds(state, frame.variableName, target, lowerBound, upperBound); + if (!cursor.position(lowerBound, upperBound, frame.keyFieldIndex + 1)) { + frame.currentAvailable = false; + return false; + } + + frame.currentValue = cursor.valueAt(frame.keyFieldIndex); + frame.currentAvailable = true; + return true; } boolean next() { - if (!currentAvailable) { + Frame frame = currentFrame(); + if (!frame.currentAvailable || frame.currentValue == Long.MAX_VALUE) { + frame.currentAvailable = false; return false; } - return advanceBeyond(currentValue); + + long previousValue = frame.currentValue; + while (cursor.next()) { + long nextValue = cursor.valueAt(frame.keyFieldIndex); + if (nextValue != previousValue) { + frame.currentValue = nextValue; + return true; + } + } + + frame.currentAvailable = false; + return false; } long value() { - return currentValue; + return currentFrame().currentValue; } protected boolean available() { - return currentAvailable; + return stackSize > 0 && currentFrame().currentAvailable; } - protected void setCurrentValue(long value) { - this.currentValue = value; - this.currentAvailable = true; + void release(String variableName) { + if (stackSize == 0) { + return; + } + Frame frame = stack[stackSize - 1]; + if (!frame.matches(variableName)) { + return; + } + stack[--stackSize].reset(); + if (stackSize > 0 && currentFrame().currentAvailable && !restoreCurrentPosition()) { + throw new IllegalStateException("LMDB trie cursor failed to restore parent frame for " + variableName); + } } - protected void clearCurrent() { - this.currentAvailable = false; + @Override + public void close() { + stackSize = 0; + if (cursor != null) { + cursor.close(); + cursor = null; + } } - void release() { - clearCurrent(); - if (records != null) { - records.close(); - records = null; + private void ensureCursor() { + if (cursor == null) { + cursor = queryAccess.openTrieCursor(state.txn(), patternPlan.indexName(), explicit); } } - @Override - public void close() { - release(); + private boolean restoreCurrentPosition() { + Frame frame = currentFrame(); + long target = frame.currentValue; + if (cursor != null && cursor.isPositioned() + && patternPlan.matchesPrefix(cursor, state, frame.keyFieldIndex) + && cursor.valueAt(frame.keyFieldIndex) == target) { + return true; + } + return seek(target); } - private boolean advanceFirst() { - long[] quad = nextQuad(); - if (quad == null) { - release(); - return false; + private Frame currentFrame() { + if (stackSize == 0) { + throw new IllegalStateException("LMDB trie cursor is not positioned"); } - setCurrentValue(quad[valueComponent]); - return true; + return stack[stackSize - 1]; } - private boolean advanceBeyond(long previousValue) { - long[] quad; - while ((quad = nextQuad()) != null) { - long nextValue = quad[valueComponent]; - if (nextValue != previousValue) { - setCurrentValue(nextValue); - return true; - } + private static final class Frame { + private String variableName; + private int keyFieldIndex; + private long currentValue; + private boolean currentAvailable; + + private void reset() { + variableName = null; + keyFieldIndex = -1; + currentValue = 0L; + currentAvailable = false; } - release(); - return false; - } - private long[] nextQuad() { - return records == null ? null : records.next(); + private boolean matches(String variableName) { + return this.variableName != null && this.variableName.equals(variableName); + } } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java index d48aca9bdc..c08ac97715 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java @@ -16,16 +16,19 @@ final class LmdbUnionTrieCursor extends LmdbTrieCursor { private final LmdbTrieCursor explicitCursor; private final LmdbTrieCursor inferredCursor; - LmdbUnionTrieCursor(LmdbLftjPatternPlan patternPlan, String variableName, LmdbQueryAccess queryAccess) { - super(patternPlan, variableName, queryAccess, true); - this.explicitCursor = new LmdbTrieCursor(patternPlan, variableName, queryAccess, true); - this.inferredCursor = new LmdbTrieCursor(patternPlan, variableName, queryAccess, false); + LmdbUnionTrieCursor(LmdbLftjPatternPlan patternPlan, LmdbQueryAccess queryAccess, LmdbLftjBindingState state) { + super(patternPlan, queryAccess, state, true); + this.explicitCursor = new LmdbTrieCursor(patternPlan, queryAccess, state, true); + this.inferredCursor = new LmdbTrieCursor(patternPlan, queryAccess, state, false); } @Override - boolean initialize(LmdbLftjBindingState state) { - explicitCursor.initialize(state); - inferredCursor.initialize(state); + boolean open(String variableName) { + boolean explicitOpened = explicitCursor.open(variableName); + boolean inferredOpened = inferredCursor.open(variableName); + if (!explicitOpened && !inferredOpened) { + return false; + } return mergeCurrent(); } @@ -57,28 +60,34 @@ boolean next() { } @Override - void release() { - clearCurrent(); - explicitCursor.release(); - inferredCursor.release(); + long value() { + if (explicitCursor.available() && inferredCursor.available()) { + return Math.min(explicitCursor.value(), inferredCursor.value()); + } + if (explicitCursor.available()) { + return explicitCursor.value(); + } + return inferredCursor.value(); + } + + @Override + protected boolean available() { + return explicitCursor.available() || inferredCursor.available(); + } + + @Override + void release(String variableName) { + explicitCursor.release(variableName); + inferredCursor.release(variableName); + } + + @Override + public void close() { + explicitCursor.close(); + inferredCursor.close(); } private boolean mergeCurrent() { - boolean explicitAvailable = explicitCursor.available(); - boolean inferredAvailable = inferredCursor.available(); - if (!explicitAvailable && !inferredAvailable) { - clearCurrent(); - return false; - } - if (!explicitAvailable) { - setCurrentValue(inferredCursor.value()); - return true; - } - if (!inferredAvailable) { - setCurrentValue(explicitCursor.value()); - return true; - } - setCurrentValue(Math.min(explicitCursor.value(), inferredCursor.value())); - return true; + return explicitCursor.available() || inferredCursor.available(); } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java index 41a0020534..4c6ec5918c 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java @@ -534,6 +534,10 @@ RecordIterator getTriples(Txn txn, String indexName, long subj, long pred, long return getTriplesUsingIndex(txn, subj, pred, obj, context, explicit, index, doRangeSearch); } + LmdbTrieKeyCursor openTrieCursor(Txn txn, String indexName, boolean explicit) { + return new LmdbTrieDbCursor(getIndex(indexName), explicit, txn); + } + boolean hasTriples(boolean explicit) throws IOException { TripleIndex mainIndex = indexes.get(0); return txnManager.doWith((stack, txn) -> { diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java index e407472537..48b90d6154 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java @@ -15,6 +15,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; +import java.util.Comparator; import java.util.List; import java.util.Set; @@ -102,6 +103,24 @@ void evaluateShouldCloseLiveScansOnEarlyClose() { "closing a started iteration should still release the read transaction"); } + @Test + void evaluateShouldReusePatternScansAcrossBacktracking() { + TestQueryAccess queryAccess = new TestQueryAccess(); + QueryEvaluationStep evaluationStep = createEvaluationStep(queryAccess); + + long count = 0; + try (CloseableIteration iteration = evaluationStep.evaluate(EmptyBindingSet.getInstance())) { + while (iteration.hasNext()) { + iteration.next(); + count++; + } + } + + assertTrue(count > 0, "sanity check: the synthetic clique should still enumerate matching cycles"); + assertEquals(3, queryAccess.openTrieCursorCalls, + "LFTJ should keep one shared scan per pattern instead of reopening scans while backtracking"); + } + private QueryEvaluationStep createEvaluationStep(TestQueryAccess queryAccess) { QueryEvaluationContext context = new QueryEvaluationContext.Minimal((Dataset) null); LmdbLftjEvaluationStrategy strategy = new LmdbLftjEvaluationStrategy( @@ -127,9 +146,9 @@ private LmdbLftjPlan createPlan() { fallbackExpr.getAssuredBindingNames(), List.of("a", "b", "c"), List.of( - new LmdbLftjPatternPlan(pattern1, "spoc"), - new LmdbLftjPatternPlan(pattern2, "spoc"), - new LmdbLftjPatternPlan(pattern3, "spoc"))); + new LmdbLftjPatternPlan(pattern1, "psoc"), + new LmdbLftjPatternPlan(pattern2, "psoc"), + new LmdbLftjPatternPlan(pattern3, "posc"))); } private StatementPattern statementPattern(String subjectName, String objectName) { @@ -163,6 +182,7 @@ private static final class TestQueryAccess implements LmdbQueryAccess { private int resolveValueCalls; private int releaseReadTxnCalls; private int openScanCalls; + private int openTrieCursorCalls; private int closedScanCalls; private TestQueryAccess() { @@ -226,7 +246,7 @@ public boolean includeInferred() { @Override public Set configuredIndexes() { - return Set.of("spoc"); + return Set.of("psoc", "posc"); } @Override @@ -236,6 +256,13 @@ public RecordIterator openScan(TxnManager.Txn txn, String indexName, long subj, return new TestRecordIterator(quads, subj, pred, obj, context, this::recordClosedScan); } + @Override + public LmdbTrieKeyCursor openTrieCursor(TxnManager.Txn txn, String indexName, boolean explicit) { + openScanCalls++; + openTrieCursorCalls++; + return new TestTrieKeyCursor(quads, indexName, this::recordClosedScan); + } + private void recordClosedScan() { closedScanCalls++; } @@ -287,4 +314,131 @@ public void close() { } } } + + private static final class TestTrieKeyCursor implements LmdbTrieKeyCursor { + + private final List quads; + private final int[] order; + private final Runnable closeCallback; + + private long[] lowerBound; + private long[] upperBound; + private int prefixLength; + private int position; + private long[] current; + private boolean closed; + + private TestTrieKeyCursor(List quads, String indexName, Runnable closeCallback) { + this.quads = quads.stream() + .map(long[]::clone) + .sorted(Comparator.comparingLong((long[] quad) -> quad[componentIndex(indexName, 0)]) + .thenComparingLong(quad -> quad[componentIndex(indexName, 1)]) + .thenComparingLong(quad -> quad[componentIndex(indexName, 2)]) + .thenComparingLong(quad -> quad[componentIndex(indexName, 3)])) + .toList(); + this.order = new int[] { + componentIndex(indexName, 0), + componentIndex(indexName, 1), + componentIndex(indexName, 2), + componentIndex(indexName, 3) + }; + this.closeCallback = closeCallback; + } + + @Override + public boolean position(long[] lowerBound, long[] upperBound, int prefixLength) { + this.lowerBound = lowerBound.clone(); + this.upperBound = upperBound.clone(); + this.prefixLength = prefixLength; + this.position = 0; + this.current = null; + + while (position < quads.size()) { + long[] quad = quads.get(position); + if (comparePrefix(quad, this.lowerBound, this.prefixLength) < 0) { + position++; + continue; + } + if (compare(quad, this.upperBound) > 0) { + return false; + } + current = quad; + return true; + } + return false; + } + + @Override + public boolean next() { + if (current == null) { + return false; + } + + while (++position < quads.size()) { + long[] quad = quads.get(position); + if (compare(quad, upperBound) > 0) { + current = null; + return false; + } + current = quad; + return true; + } + + current = null; + return false; + } + + @Override + public boolean isPositioned() { + return current != null; + } + + @Override + public long valueAt(int keyFieldIndex) { + return current[order[keyFieldIndex]]; + } + + @Override + public void close() { + if (!closed) { + closed = true; + closeCallback.run(); + } + } + + private int compare(long[] quad, long[] bounds) { + for (int component : order) { + int comparison = Long.compare(quad[component], bounds[component]); + if (comparison != 0) { + return comparison; + } + } + return 0; + } + + private int comparePrefix(long[] quad, long[] bounds, int prefixLength) { + for (int i = 0; i < prefixLength; i++) { + int comparison = Long.compare(quad[order[i]], bounds[order[i]]); + if (comparison != 0) { + return comparison; + } + } + return 0; + } + + private static int componentIndex(String indexName, int keyFieldIndex) { + switch (indexName.charAt(keyFieldIndex)) { + case 's': + return 0; + case 'p': + return 1; + case 'o': + return 2; + case 'c': + return 3; + default: + throw new IllegalArgumentException("Unsupported LMDB index field: " + indexName.charAt(keyFieldIndex)); + } + } + } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java index 2bd2c37317..8b15e0bef0 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java @@ -325,6 +325,24 @@ public void testExplainOptimizedUsesLftjForCyclicQueryWithStrongIndexes(@TempDir } } + @Test + public void testExplainOptimizedUsesStableCycleIndexOrder(@TempDir File dataDir) { + LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); + Repository repository = createRepository(dataDir, config, conn -> { + }); + + try (RepositoryConnection connection = repository.getConnection()) { + String actualPlan = connection.prepareTupleQuery(stableCycleQuery()) + .explain(Explanation.Level.Optimized) + .toString(); + assertTrue(actualPlan, actualPlan.contains("LmdbLftjTupleExpr")); + assertTrue(actualPlan, actualPlan.contains("varOrder=a,b,c")); + assertTrue(actualPlan, actualPlan.contains("indexes=psoc,psoc,posc")); + } finally { + repository.shutDown(); + } + } + @Test public void testCyclicQueryMatchesResultsWhenLftjActivates(@TempDir File disabledDir, @TempDir File enabledDir) { LmdbStoreConfig disabled = new LmdbStoreConfig("spoc,posc"); @@ -388,6 +406,16 @@ private String cyclicQuery() { """; } + private String stableCycleQuery() { + return """ + SELECT ?a ?b ?c WHERE { + ?a ?b . + ?b ?c . + ?c ?a . + } + """; + } + private void seedCyclicData(RepositoryConnection connection) { IRI a1 = F.createIRI("urn:a1"); IRI a2 = F.createIRI("urn:a2"); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java index b32eef0622..79745fe631 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java @@ -39,7 +39,7 @@ import org.openjdk.jmh.runner.options.OptionsBuilder; @State(Scope.Benchmark) -@Warmup(iterations = 2, time = 1, timeUnit = TimeUnit.SECONDS) +@Warmup(iterations = 30, time = 1, timeUnit = TimeUnit.SECONDS) @BenchmarkMode(Mode.AverageTime) @Fork(value = 1, jvmArgs = { "-Xms2G", "-Xmx2G", "-XX:+UseG1GC" }) @Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) @@ -108,15 +108,15 @@ public long cycle3() { return executeCount(QUERY_CYCLE_3); } -// @Benchmark -// public long cycle4() { -// return executeCount(QUERY_CYCLE_4); -// } -// -// @Benchmark -// public long cycle5() { -// return executeCount(QUERY_CYCLE_5); -// } + @Benchmark + public long cycle4() { + return executeCount(QUERY_CYCLE_4); + } + + //@Benchmark + public long cycle5() { + return executeCount(QUERY_CYCLE_5); + } private long executeCount(String query) { try (SailRepositoryConnection connection = repository.getConnection()) { From 06ca8084d28daf0ef1920e9118f78d62b575ea0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 13:35:23 +0200 Subject: [PATCH 10/32] continue optimizing lftj --- .../rdf4j/sail/lmdb/LmdbTrieDbCursor.java | 236 ++++++++++++++++++ .../rdf4j/sail/lmdb/LmdbTrieKeyCursor.java | 26 ++ .../FoafCliqueLftjCorrectnessTest.java | 102 ++++++++ 3 files changed, 364 insertions(+) create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieDbCursor.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieKeyCursor.java create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieDbCursor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieDbCursor.java new file mode 100644 index 0000000000..c5db463789 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieDbCursor.java @@ -0,0 +1,236 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import static org.lwjgl.util.lmdb.LMDB.MDB_NEXT; +import static org.lwjgl.util.lmdb.LMDB.MDB_SET_RANGE; +import static org.lwjgl.util.lmdb.LMDB.MDB_SUCCESS; +import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_close; +import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_get; +import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_open; + +import java.nio.ByteBuffer; + +import org.eclipse.rdf4j.common.concurrent.locks.StampedLongAdderLockManager; +import org.eclipse.rdf4j.sail.SailException; +import org.lwjgl.PointerBuffer; +import org.lwjgl.system.MemoryStack; +import org.lwjgl.util.lmdb.MDBVal; + +final class LmdbTrieDbCursor implements LmdbTrieKeyCursor { + + private final Pool pool = Pool.get(); + private final TripleStore.TripleIndex index; + private final MDBVal keyData; + private final MDBVal valueData; + private final ByteBuffer lowerBoundBuffer; + private final TxnManager.Txn txnRef; + private final StampedLongAdderLockManager txnLockManager; + private final long txn; + private final int dbi; + private final long cursor; + private final Thread ownerThread = Thread.currentThread(); + private final long[] keyValues = new long[4]; + private final long[] upperKeyValues = new long[4]; + private final int[] keyComponents; + + private boolean closed; + private boolean positioned; + + LmdbTrieDbCursor(TripleStore.TripleIndex index, boolean explicit, TxnManager.Txn txnRef) { + this.index = index; + this.txnRef = txnRef; + this.txnLockManager = txnRef.lockManager(); + this.txn = txnRef.get(); + this.dbi = index.getDB(explicit); + this.keyData = pool.getVal(); + this.valueData = pool.getVal(); + this.lowerBoundBuffer = pool.getKeyBuffer(); + this.keyComponents = componentIndexes(index.getFieldSeq()); + this.cursor = openCursor(); + } + + @Override + public boolean position(long[] lowerBound, long[] upperBound, int prefixLength) { + long readStamp = readLock(); + try { + if (closed) { + return false; + } + + lowerBoundBuffer.clear(); + writeLowerBoundPrefix(lowerBound, prefixLength); + lowerBoundBuffer.flip(); + keyData.mv_data(lowerBoundBuffer); + loadUpperKeyValues(upperBound); + + int result = mdb_cursor_get(cursor, keyData, valueData, MDB_SET_RANGE); + if (result == MDB_SUCCESS) { + decodeCurrentKey(); + positioned = withinUpperBound(); + } else { + positioned = false; + } + return positioned; + } finally { + txnLockManager.unlockRead(readStamp); + } + } + + @Override + public boolean next() { + long readStamp = readLock(); + try { + if (closed || !positioned) { + return false; + } + + int result = mdb_cursor_get(cursor, keyData, valueData, MDB_NEXT); + if (result == MDB_SUCCESS) { + decodeCurrentKey(); + positioned = withinUpperBound(); + } else { + positioned = false; + } + return positioned; + } finally { + txnLockManager.unlockRead(readStamp); + } + } + + @Override + public boolean isPositioned() { + return positioned; + } + + @Override + public long valueAt(int keyFieldIndex) { + if (closed || !positioned) { + throw new IllegalStateException("No positioned trie cursor key available"); + } + return keyValues[keyFieldIndex]; + } + + @Override + public void close() { + closeInternal(true); + } + + private boolean withinUpperBound() { + for (int i = 0; i < keyValues.length; i++) { + int comparison = Long.compare(keyValues[i], upperKeyValues[i]); + if (comparison != 0) { + return comparison < 0; + } + } + return true; + } + + private void decodeCurrentKey() { + ByteBuffer key = keyData.mv_data().duplicate(); + keyValues[0] = Varint.readUnsigned(key); + keyValues[1] = Varint.readUnsigned(key); + keyValues[2] = Varint.readUnsigned(key); + keyValues[3] = Varint.readUnsigned(key); + } + + private void loadUpperKeyValues(long[] upperBound) { + for (int i = 0; i < upperKeyValues.length; i++) { + upperKeyValues[i] = upperBound[keyComponents[i]]; + } + } + + private void writeLowerBoundPrefix(long[] lowerBound, int prefixLength) { + for (int i = 0; i < prefixLength; i++) { + Varint.writeUnsigned(lowerBoundBuffer, lowerBound[keyComponents[i]]); + } + } + + private long openCursor() { + long readStamp = readLock(); + try (MemoryStack stack = MemoryStack.stackPush()) { + PointerBuffer pp = stack.mallocPointer(1); + int result = mdb_cursor_open(txn, dbi, pp); + if (result != MDB_SUCCESS) { + throw new SailException("Unable to open LMDB trie cursor: " + result); + } + return pp.get(0); + } finally { + txnLockManager.unlockRead(readStamp); + } + } + + private long readLock() { + try { + return txnLockManager.readLock(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new SailException(e); + } + } + + private void closeInternal(boolean maybeCalledAsync) { + if (closed) { + return; + } + + long writeStamp = 0L; + boolean writeLocked = false; + if (maybeCalledAsync && ownerThread != Thread.currentThread()) { + try { + writeStamp = txnLockManager.writeLock(); + writeLocked = true; + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new SailException(e); + } + } + + try { + if (!closed) { + mdb_cursor_close(cursor); + pool.free(keyData); + pool.free(valueData); + pool.free(lowerBoundBuffer); + closed = true; + positioned = false; + } + } finally { + if (writeLocked) { + txnLockManager.unlockWrite(writeStamp); + } + } + } + + private static int[] componentIndexes(char[] fieldSeq) { + int[] components = new int[fieldSeq.length]; + for (int i = 0; i < fieldSeq.length; i++) { + switch (fieldSeq[i]) { + case 's': + components[i] = TripleStore.SUBJ_IDX; + break; + case 'p': + components[i] = TripleStore.PRED_IDX; + break; + case 'o': + components[i] = TripleStore.OBJ_IDX; + break; + case 'c': + components[i] = TripleStore.CONTEXT_IDX; + break; + default: + throw new IllegalArgumentException("Unknown LMDB index field: " + fieldSeq[i]); + } + } + return components; + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieKeyCursor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieKeyCursor.java new file mode 100644 index 0000000000..fca8bb3491 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieKeyCursor.java @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +interface LmdbTrieKeyCursor extends AutoCloseable { + + boolean position(long[] lowerBound, long[] upperBound, int prefixLength); + + boolean next(); + + boolean isPositioned(); + + long valueAt(int keyFieldIndex); + + @Override + void close(); +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java new file mode 100644 index 0000000000..ba7e74cde4 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java @@ -0,0 +1,102 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb.benchmark; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.File; + +import org.eclipse.rdf4j.repository.Repository; +import org.eclipse.rdf4j.repository.RepositoryConnection; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; +import org.eclipse.rdf4j.sail.lmdb.LmdbStore; +import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +class FoafCliqueLftjCorrectnessTest { + + @Test + void cycle3ShouldMatchRegularJoinCount(@TempDir File disabledDir, @TempDir File enabledDir) { + Repository disabledRepository = createRepository(disabledDir, false); + Repository enabledRepository = createRepository(enabledDir, true); + + try { + populate(disabledRepository); + populate(enabledRepository); + + long expected = executeCount(disabledRepository, cycleQuery(3)); + long actual = executeCount(enabledRepository, cycleQuery(3)); + + assertEquals(expected, actual, "LFTJ must preserve the cycle3 result count"); + } finally { + disabledRepository.shutDown(); + enabledRepository.shutDown(); + } + } + + private Repository createRepository(File dataDir, boolean lftjEnabled) { + LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); + config.setLftjEnabled(lftjEnabled); + config.setForceSync(false); + config.setValueDBSize(1_073_741_824L); + config.setTripleDBSize(config.getValueDBSize()); + + Repository repository = new SailRepository(new LmdbStore(dataDir, config)); + repository.init(); + return repository; + } + + private void populate(Repository repository) { + try (SailRepositoryConnection connection = (SailRepositoryConnection) repository.getConnection()) { + new FoafCliqueDataGenerator(300, 30, 3, 6, 900, 12345L).populate(connection); + } + } + + private long executeCount(Repository repository, String query) { + try (RepositoryConnection connection = repository.getConnection()) { + return connection.prepareTupleQuery(query).evaluate().stream().count(); + } + } + + private static String cycleQuery(int size) { + StringBuilder builder = new StringBuilder(); + builder.append("PREFIX foaf: \n"); + builder.append("SELECT * WHERE {\n"); + for (int i = 0; i < size; i++) { + builder.append(" ?") + .append(variableName(i)) + .append(" foaf:knows ?") + .append(variableName((i + 1) % size)) + .append(" .\n"); + } + builder.append(" FILTER ("); + boolean first = true; + for (int i = 0; i < size; i++) { + for (int j = i + 1; j < size; j++) { + if (!first) { + builder.append(" && "); + } + builder.append("?").append(variableName(i)).append(" != ?").append(variableName(j)); + first = false; + } + } + builder.append(")\n"); + builder.append("}\n"); + return builder.toString(); + } + + private static char variableName(int index) { + return (char) ('a' + index); + } +} From 100bf1da2a13c8d15ce4b6020af3be8ca97e9f6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 15:12:01 +0200 Subject: [PATCH 11/32] lftj is faster --- .../rdf4j/sail/lmdb/LmdbCachedFrontier.java | 58 +++ .../rdf4j/sail/lmdb/LmdbCachedTrieCursor.java | 128 +++++++ .../sail/lmdb/LmdbDerivedBinaryRelation.java | 173 +++++++++ .../rdf4j/sail/lmdb/LmdbLftjCursor.java | 28 ++ .../rdf4j/sail/lmdb/LmdbLftjExecutor.java | 119 ++----- .../rdf4j/sail/lmdb/LmdbLftjMetrics.java | 63 ++++ .../rdf4j/sail/lmdb/LmdbLftjOptimizer.java | 9 +- .../rdf4j/sail/lmdb/LmdbLftjPatternPlan.java | 76 ++++ .../eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java | 3 +- .../sail/lmdb/LmdbLftjPreparedPlanCache.java | 94 +++++ .../sail/lmdb/LmdbPrefixFrontierProvider.java | 329 ++++++++++++++++++ .../rdf4j/sail/lmdb/LmdbQueryAccess.java | 8 + .../eclipse/rdf4j/sail/lmdb/LmdbStore.java | 6 + .../rdf4j/sail/lmdb/LmdbStoreConnection.java | 10 + .../rdf4j/sail/lmdb/LmdbTrieCursor.java | 17 +- .../rdf4j/sail/lmdb/LmdbUnionTrieCursor.java | 10 +- .../rdf4j/sail/lmdb/LmdbLftjExecutorTest.java | 86 ++++- .../sail/lmdb/LmdbLftjOptimizerTest.java | 153 ++++++++ .../benchmark/FoafCliqueQueryBenchmark.java | 2 +- 19 files changed, 1262 insertions(+), 110 deletions(-) create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedFrontier.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedTrieCursor.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbDerivedBinaryRelation.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCursor.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPreparedPlanCache.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbPrefixFrontierProvider.java create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedFrontier.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedFrontier.java new file mode 100644 index 0000000000..9103aa68e0 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedFrontier.java @@ -0,0 +1,58 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.Arrays; + +final class LmdbCachedFrontier { + + static final LmdbCachedFrontier EMPTY = new LmdbCachedFrontier(new long[0], null); + + private final long[] values; + private final long[] counts; + + LmdbCachedFrontier(long[] values, long[] counts) { + this.values = values; + this.counts = counts; + } + + boolean isEmpty() { + return values.length == 0; + } + + int size() { + return values.length; + } + + long valueAt(int index) { + return values[index]; + } + + long countAt(int index) { + return counts == null ? 1L : counts[index]; + } + + int seek(long target) { + int index = Arrays.binarySearch(values, target); + return index >= 0 ? index : -index - 1; + } + + long countFor(long value) { + if (counts == null) { + int index = Arrays.binarySearch(values, value); + return index >= 0 ? 1L : 0L; + } + + int index = Arrays.binarySearch(values, value); + return index >= 0 ? counts[index] : 0L; + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedTrieCursor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedTrieCursor.java new file mode 100644 index 0000000000..f923202eea --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedTrieCursor.java @@ -0,0 +1,128 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +final class LmdbCachedTrieCursor implements LmdbLftjCursor { + + private final LmdbLftjPatternPlan patternPlan; + private final LmdbPrefixFrontierProvider provider; + private final Frame[] stack = new Frame[4]; + + private int stackSize; + + LmdbCachedTrieCursor(LmdbLftjPatternPlan patternPlan, LmdbPrefixFrontierProvider provider) { + this.patternPlan = patternPlan; + this.provider = provider; + for (int i = 0; i < stack.length; i++) { + stack[i] = new Frame(); + } + } + + @Override + public boolean open(String variableName) { + LmdbCachedFrontier frontier = provider.frontier(patternPlan, variableName); + if (frontier.isEmpty()) { + return false; + } + + Frame frame = stack[stackSize++]; + frame.variableName = variableName; + frame.frontier = frontier; + frame.position = 0; + return true; + } + + @Override + public boolean seek(long target) { + if (stackSize == 0) { + return false; + } + + Frame frame = currentFrame(); + int position = frame.frontier.seek(target); + if (position >= frame.frontier.size()) { + frame.position = -1; + return false; + } + + frame.position = position; + return true; + } + + @Override + public boolean next() { + Frame frame = currentFrame(); + if (frame.position < 0) { + return false; + } + + int nextPosition = frame.position + 1; + if (nextPosition >= frame.frontier.size()) { + frame.position = -1; + return false; + } + + frame.position = nextPosition; + return true; + } + + @Override + public long value() { + Frame frame = currentFrame(); + if (frame.position < 0) { + throw new IllegalStateException("LMDB cached trie cursor is not positioned"); + } + return frame.frontier.valueAt(frame.position); + } + + @Override + public void release(String variableName) { + if (stackSize == 0) { + return; + } + Frame frame = stack[stackSize - 1]; + if (!frame.matches(variableName)) { + return; + } + stack[--stackSize].reset(); + } + + @Override + public void close() { + while (stackSize > 0) { + stack[--stackSize].reset(); + } + } + + private Frame currentFrame() { + if (stackSize == 0) { + throw new IllegalStateException("LMDB cached trie cursor is not positioned"); + } + return stack[stackSize - 1]; + } + + private static final class Frame { + private String variableName; + private LmdbCachedFrontier frontier; + private int position; + + private void reset() { + variableName = null; + frontier = null; + position = -1; + } + + private boolean matches(String variableName) { + return this.variableName != null && this.variableName.equals(variableName); + } + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbDerivedBinaryRelation.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbDerivedBinaryRelation.java new file mode 100644 index 0000000000..1a800c2744 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbDerivedBinaryRelation.java @@ -0,0 +1,173 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +final class LmdbDerivedBinaryRelation { + + private final int sourceComponent; + private final int targetComponent; + private final LmdbCachedFrontier rootFrontier; + private final Map adjacency; + + LmdbDerivedBinaryRelation(int sourceComponent, int targetComponent, LmdbCachedFrontier rootFrontier, + Map adjacency) { + this.sourceComponent = sourceComponent; + this.targetComponent = targetComponent; + this.rootFrontier = rootFrontier; + this.adjacency = adjacency; + } + + int sourceComponent() { + return sourceComponent; + } + + int targetComponent() { + return targetComponent; + } + + LmdbCachedFrontier rootFrontier() { + return rootFrontier; + } + + LmdbCachedFrontier frontier(long sourceValue) { + return adjacency.getOrDefault(sourceValue, LmdbCachedFrontier.EMPTY); + } + + long count(long sourceValue, long targetValue) { + return frontier(sourceValue).countFor(targetValue); + } + + static final class Builder { + + private final int sourceComponent; + private final int targetComponent; + private final LongArrayBuilder roots = new LongArrayBuilder(); + private final Map adjacency = new HashMap<>(); + private final LongArrayBuilder targets = new LongArrayBuilder(); + private final LongArrayBuilder counts = new LongArrayBuilder(); + + private boolean sourceOpen; + private boolean pairOpen; + private long currentSource; + private long currentTarget; + private long currentCount; + + Builder(int sourceComponent, int targetComponent) { + this.sourceComponent = sourceComponent; + this.targetComponent = targetComponent; + } + + void add(long sourceValue, long targetValue) { + if (!sourceOpen || sourceValue != currentSource) { + finishPair(); + finishSource(); + sourceOpen = true; + currentSource = sourceValue; + roots.add(sourceValue); + } + + if (!pairOpen || targetValue != currentTarget) { + finishPair(); + pairOpen = true; + currentTarget = targetValue; + currentCount = 1; + return; + } + + currentCount++; + } + + LmdbDerivedBinaryRelation build() { + finishPair(); + finishSource(); + return new LmdbDerivedBinaryRelation(sourceComponent, targetComponent, + new LmdbCachedFrontier(roots.toArray(), null), adjacency); + } + + private void finishPair() { + if (!pairOpen) { + return; + } + targets.add(currentTarget); + counts.add(currentCount); + pairOpen = false; + currentTarget = 0L; + currentCount = 0L; + } + + private void finishSource() { + if (!sourceOpen) { + return; + } + adjacency.put(currentSource, new LmdbCachedFrontier(targets.toArray(), counts.toArray())); + targets.clear(); + counts.clear(); + sourceOpen = false; + currentSource = 0L; + } + } + + static final class RelationKey { + private final String indexName; + private final boolean includeInferred; + private final long predicateId; + + RelationKey(String indexName, boolean includeInferred, long predicateId) { + this.indexName = indexName; + this.includeInferred = includeInferred; + this.predicateId = predicateId; + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof RelationKey)) { + return false; + } + RelationKey o = (RelationKey) other; + return includeInferred == o.includeInferred && predicateId == o.predicateId + && Objects.equals(indexName, o.indexName); + } + + @Override + public int hashCode() { + return Objects.hash(indexName, includeInferred, predicateId); + } + } + + static final class LongArrayBuilder { + private long[] values = new long[8]; + private int size; + + void add(long value) { + if (size == values.length) { + long[] newValues = new long[values.length * 2]; + System.arraycopy(values, 0, newValues, 0, values.length); + values = newValues; + } + values[size++] = value; + } + + long[] toArray() { + long[] result = new long[size]; + System.arraycopy(values, 0, result, 0, size); + return result; + } + + void clear() { + size = 0; + } + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCursor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCursor.java new file mode 100644 index 0000000000..dd04b6ebb5 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCursor.java @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +interface LmdbLftjCursor extends AutoCloseable { + + boolean open(String variableName); + + boolean seek(long target); + + boolean next(); + + long value(); + + void release(String variableName); + + @Override + void close(); +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java index 5ab31bfcd0..fe12c21ce2 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java @@ -63,9 +63,10 @@ private final class LmdbLftjIteration extends LookAheadIteration { private final QueryEvaluationContext context; private final LmdbQueryAccess queryAccess; private final LmdbLftjMetrics metrics; + private final LmdbPrefixFrontierProvider frontierProvider; private final List searchVariables; - private final List patternCursors; - private final List> cursorsByDepth; + private final List patternCursors; + private final List> cursorsByDepth; private final boolean[] initializedDepths; private final boolean[] advanceDepths; @@ -80,8 +81,9 @@ private LmdbLftjIteration(LmdbLftjPlan plan, LmdbLftjBindingState state, QueryEv this.context = context; this.queryAccess = queryAccess; this.metrics = metrics; + this.frontierProvider = new LmdbPrefixFrontierProvider(queryAccess, state, metrics); this.searchVariables = collectSearchVariables(plan, state); - this.patternCursors = createPatternCursors(plan, queryAccess, state); + this.patternCursors = createPatternCursors(plan, frontierProvider); this.cursorsByDepth = createDepthCursors(plan, searchVariables, patternCursors); this.initializedDepths = new boolean[searchVariables.size()]; this.advanceDepths = new boolean[searchVariables.size()]; @@ -105,7 +107,7 @@ protected BindingSet getNextElement() { @Override protected void handleClose() { - for (LmdbTrieCursor cursor : patternCursors) { + for (LmdbLftjCursor cursor : patternCursors) { cursor.close(); } state.close(); @@ -114,7 +116,7 @@ protected void handleClose() { private BindingSet computeNextElement() { while (depth >= 0) { if (depth == searchVariables.size()) { - long multiplicity = witnessMultiplicity(plan, state, queryAccess, metrics, searchVariables); + long multiplicity = witnessMultiplicity(plan, metrics, frontierProvider); backtrackAfterLeaf(); if (multiplicity > 0) { BindingSet result = state.materialize(context); @@ -154,7 +156,7 @@ private BindingSet computeNextElement() { private boolean positionDepth(int depth, boolean advanceExisting) { String variableName = searchVariables.get(depth); - List cursors = cursorsByDepth.get(depth); + List cursors = cursorsByDepth.get(depth); state.clear(variableName); if (cursors.isEmpty()) { @@ -162,7 +164,7 @@ private boolean positionDepth(int depth, boolean advanceExisting) { } if (!advanceExisting) { - for (LmdbTrieCursor cursor : cursors) { + for (LmdbLftjCursor cursor : cursors) { metrics.recordCandidateScan(); if (!cursor.open(variableName)) { return false; @@ -173,7 +175,7 @@ private boolean positionDepth(int depth, boolean advanceExisting) { } long current = Long.MIN_VALUE; - for (LmdbTrieCursor cursor : cursors) { + for (LmdbLftjCursor cursor : cursors) { current = Math.max(current, cursor.value()); } current = align(cursors, current); @@ -208,7 +210,7 @@ private void releaseDepth(int depth) { state.clear(variableName); initializedDepths[depth] = false; advanceDepths[depth] = false; - for (LmdbTrieCursor cursor : cursorsByDepth.get(depth)) { + for (LmdbLftjCursor cursor : cursorsByDepth.get(depth)) { cursor.release(variableName); } } @@ -224,23 +226,20 @@ private List collectSearchVariables(LmdbLftjPlan plan, LmdbLftjBindingSt return searchVariables; } - private List createPatternCursors(LmdbLftjPlan plan, LmdbQueryAccess queryAccess, - LmdbLftjBindingState state) { - List cursors = new ArrayList<>(plan.patternPlans().size()); + private List createPatternCursors(LmdbLftjPlan plan, LmdbPrefixFrontierProvider frontierProvider) { + List cursors = new ArrayList<>(plan.patternPlans().size()); for (LmdbLftjPatternPlan patternPlan : plan.patternPlans()) { - cursors.add(queryAccess.includeInferred() - ? new LmdbUnionTrieCursor(patternPlan, queryAccess, state) - : new LmdbTrieCursor(patternPlan, queryAccess, state, true)); + cursors.add(new LmdbCachedTrieCursor(patternPlan, frontierProvider)); } return cursors; } - private List> createDepthCursors(LmdbLftjPlan plan, List searchVariables, - List patternCursors) { - List> cursorsByDepth = new ArrayList<>(searchVariables.size()); + private List> createDepthCursors(LmdbLftjPlan plan, List searchVariables, + List patternCursors) { + List> cursorsByDepth = new ArrayList<>(searchVariables.size()); List patternPlans = plan.patternPlans(); for (String variableName : searchVariables) { - List cursors = new ArrayList<>(); + List cursors = new ArrayList<>(); for (int i = 0; i < patternPlans.size(); i++) { LmdbLftjPatternPlan patternPlan = patternPlans.get(i); if (!patternPlan.containsVariable(variableName)) { @@ -253,12 +252,12 @@ private List> createDepthCursors(LmdbLftjPlan plan, List cursors, long target) { + private long align(List cursors, long target) { long current = target; while (true) { boolean allMatch = true; long max = current; - for (LmdbTrieCursor cursor : cursors) { + for (LmdbLftjCursor cursor : cursors) { if (!cursor.seek(current)) { return -1; } @@ -274,11 +273,11 @@ private long align(List cursors, long target) { } } - private long witnessMultiplicity(LmdbLftjPlan plan, LmdbLftjBindingState state, LmdbQueryAccess queryAccess, - LmdbLftjMetrics metrics, List searchVariables) { + private long witnessMultiplicity(LmdbLftjPlan plan, LmdbLftjMetrics metrics, + LmdbPrefixFrontierProvider frontierProvider) { long multiplicity = 1; for (LmdbLftjPatternPlan patternPlan : plan.patternPlans()) { - long witnesses = countMatches(patternPlan, state, queryAccess, metrics, searchVariables); + long witnesses = countMatches(patternPlan, metrics, frontierProvider); if (witnesses == 0) { return 0; } @@ -287,76 +286,10 @@ private long witnessMultiplicity(LmdbLftjPlan plan, LmdbLftjBindingState state, return multiplicity; } - private long countMatches(LmdbLftjPatternPlan patternPlan, LmdbLftjBindingState state, LmdbQueryAccess queryAccess, - LmdbLftjMetrics metrics, List searchVariables) { - if (!patternPlan.hasHiddenTerms() && containsSearchVariable(patternPlan, searchVariables)) { - return 1; - } - - long[] scanKey = patternPlan.scanKey(state); + private long countMatches(LmdbLftjPatternPlan patternPlan, LmdbLftjMetrics metrics, + LmdbPrefixFrontierProvider frontierProvider) { metrics.recordWitnessScan(); - if (!queryAccess.includeInferred()) { - return countMatches(queryAccess, state, patternPlan, scanKey, true); - } - - return countUnionMatches(queryAccess, state, patternPlan, scanKey); - } - - private boolean containsSearchVariable(LmdbLftjPatternPlan patternPlan, List searchVariables) { - for (String searchVariable : searchVariables) { - if (patternPlan.containsVariable(searchVariable)) { - return true; - } - } - return false; - } - - private long countMatches(LmdbQueryAccess queryAccess, LmdbLftjBindingState state, LmdbLftjPatternPlan patternPlan, - long[] scanKey, boolean explicit) { - long count = 0; - try (RecordIterator records = queryAccess.openScan(state.txn(), patternPlan.indexName(), scanKey[0], scanKey[1], - scanKey[2], scanKey[3], explicit)) { - while (records.next() != null) { - count++; - } - } - return count; - } - - private long countUnionMatches(LmdbQueryAccess queryAccess, LmdbLftjBindingState state, - LmdbLftjPatternPlan patternPlan, long[] scanKey) { - long count = 0; - try (RecordIterator explicitRecords = queryAccess.openScan(state.txn(), patternPlan.indexName(), scanKey[0], - scanKey[1], scanKey[2], scanKey[3], true); - RecordIterator inferredRecords = queryAccess.openScan(state.txn(), patternPlan.indexName(), scanKey[0], - scanKey[1], scanKey[2], scanKey[3], false)) { - long[] explicitQuad = explicitRecords.next(); - long[] inferredQuad = inferredRecords.next(); - while (explicitQuad != null || inferredQuad != null) { - if (inferredQuad == null || explicitQuad != null && compareQuads(explicitQuad, inferredQuad) <= 0) { - count++; - long[] previous = explicitQuad; - explicitQuad = explicitRecords.next(); - if (inferredQuad != null && compareQuads(previous, inferredQuad) == 0) { - inferredQuad = inferredRecords.next(); - } - } else { - count++; - inferredQuad = inferredRecords.next(); - } - } - } - return count; - } - - private int compareQuads(long[] left, long[] right) { - for (int i = 0; i < 4; i++) { - int comparison = Long.compare(left[i], right[i]); - if (comparison != 0) { - return comparison; - } - } - return 0; + return frontierProvider.countMatches(patternPlan); } private final class LazyFallbackStep { diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java index 16b8d40c2e..700d5c004c 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java @@ -16,6 +16,13 @@ final class LmdbLftjMetrics { private long candidateScans; private long witnessScans; private long emittedBindings; + private long frontierLoads; + private long frontierHits; + private long countLoads; + private long countHits; + private long relationLoads; + private long relationHits; + private long relationUses; void recordCandidateScan() { candidateScans++; @@ -29,6 +36,34 @@ void recordEmitted(long count) { emittedBindings += count; } + void recordFrontierLoad() { + frontierLoads++; + } + + void recordFrontierHit() { + frontierHits++; + } + + void recordCountLoad() { + countLoads++; + } + + void recordCountHit() { + countHits++; + } + + void recordRelationLoad() { + relationLoads++; + } + + void recordRelationHit() { + relationHits++; + } + + void recordRelationUse() { + relationUses++; + } + long candidateScans() { return candidateScans; } @@ -40,4 +75,32 @@ long witnessScans() { long emittedBindings() { return emittedBindings; } + + long frontierLoads() { + return frontierLoads; + } + + long frontierHits() { + return frontierHits; + } + + long countLoads() { + return countLoads; + } + + long countHits() { + return countHits; + } + + long relationLoads() { + return relationLoads; + } + + long relationHits() { + return relationHits; + } + + long relationUses() { + return relationUses; + } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java index ad64a10ff8..0f70939439 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java @@ -13,6 +13,7 @@ import java.util.ArrayList; import java.util.List; +import java.util.Set; import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.Dataset; @@ -70,8 +71,14 @@ private boolean transform(Join node, LmdbQueryAccess queryAccess) { return false; } + Set configuredIndexes = queryAccess.configuredIndexes(); TupleExpr fallbackExpr = rebuildJoin(patterns.stream().map(TupleExpr::clone).toList()); - LmdbLftjPlanner.PlanningResult plan = planner.plan(fallbackExpr, patterns, queryAccess.configuredIndexes()); + String cacheKey = LmdbLftjPreparedPlanCache.normalizedKey(patterns, configuredIndexes); + LmdbLftjPlanner.PlanningResult plan = queryAccess.cachedPlanningResult(cacheKey); + if (plan == null) { + plan = planner.plan(fallbackExpr, patterns, configuredIndexes); + queryAccess.cachePlanningResult(cacheKey, plan); + } if (!plan.planned()) { logger.debug("Skipping LMDB LFTJ for {}: {}", node.getSignature(), plan.rejectionReason()); return false; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java index 4de14bb58c..dc8dd9cc5f 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java @@ -54,6 +54,10 @@ String indexName() { return indexName; } + LmdbLftjPatternPlan copy() { + return new LmdbLftjPatternPlan(pattern, indexName); + } + List terms() { return List.of(terms); } @@ -95,6 +99,78 @@ int keyFieldIndex(String variableName) { throw new IllegalArgumentException("Pattern does not bind variable " + variableName); } + int keyFieldIndex(int component) { + for (int i = 0; i < keyTerms.length; i++) { + if (keyTerms[i].component() == component) { + return i; + } + } + throw new IllegalArgumentException("Pattern does not bind LMDB component " + component); + } + + TermRef keyTerm(int keyFieldIndex) { + return keyTerms[keyFieldIndex]; + } + + TermRef termForComponent(int component) { + switch (component) { + case TripleStore.SUBJ_IDX: + return subject; + case TripleStore.PRED_IDX: + return predicate; + case TripleStore.OBJ_IDX: + return object; + case TripleStore.CONTEXT_IDX: + return context; + default: + throw new IllegalArgumentException("Unknown LMDB component: " + component); + } + } + + TermRef subjectTerm() { + return subject; + } + + TermRef predicateTerm() { + return predicate; + } + + TermRef objectTerm() { + return object; + } + + int fixedPrefixLength(LmdbLftjBindingState state) { + for (int i = 0; i < keyTerms.length; i++) { + if (state.fixedId(keyTerms[i]) < 0) { + return i; + } + } + return keyTerms.length; + } + + void fillMatchRange(LmdbLftjBindingState state, long[] minKey, long[] maxKey) { + Arrays.fill(maxKey, Long.MAX_VALUE); + for (TermRef term : keyTerms) { + long fixedId = state.fixedId(term); + if (fixedId >= 0) { + minKey[term.component()] = fixedId; + maxKey[term.component()] = fixedId; + } else if (!term.isHidden()) { + throw new IllegalStateException("LMDB LFTJ requires all visible terms to be fixed for match counting"); + } + } + } + + boolean canUseDerivedBinaryRelation() { + return predicate.isConstant() + && subject.isVisible() + && object.isVisible() + && !context.isVisible() + && !subject.isHidden() + && !object.isHidden() + && ("psoc".equals(indexName) || "posc".equals(indexName)); + } + void fillRangeBounds(LmdbLftjBindingState state, String variableName, long lowerBound, long[] minKey, long[] maxKey) { Arrays.fill(maxKey, Long.MAX_VALUE); diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java index 2f44a496d1..1924924285 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java @@ -65,7 +65,8 @@ int patternCount() { } LmdbLftjPlan copy() { - return new LmdbLftjPlan(fallbackExpr.clone(), bindingNames, assuredBindingNames, variableOrder, patternPlans); + return new LmdbLftjPlan(fallbackExpr.clone(), bindingNames, assuredBindingNames, variableOrder, + patternPlans.stream().map(LmdbLftjPatternPlan::copy).collect(Collectors.toList())); } @Override diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPreparedPlanCache.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPreparedPlanCache.java new file mode 100644 index 0000000000..28fd6ed478 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPreparedPlanCache.java @@ -0,0 +1,94 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.query.algebra.StatementPattern; +import org.eclipse.rdf4j.query.algebra.Var; + +final class LmdbLftjPreparedPlanCache { + + private static final int MAX_ENTRIES = 256; + + private final Map entries = new LinkedHashMap<>(32, 0.75f, true) { + @Override + protected boolean removeEldestEntry(Map.Entry eldest) { + return size() > MAX_ENTRIES; + } + }; + + synchronized LmdbLftjPlanner.PlanningResult get(String cacheKey) { + return copy(entries.get(cacheKey)); + } + + synchronized void put(String cacheKey, LmdbLftjPlanner.PlanningResult result) { + entries.put(cacheKey, copy(result)); + } + + synchronized void clear() { + entries.clear(); + } + + static String normalizedKey(List patterns, Set configuredIndexes) { + Map aliases = new LinkedHashMap<>(); + StringBuilder builder = new StringBuilder(configuredIndexes.size() * 6 + patterns.size() * 32); + builder.append("indexes="); + configuredIndexes.stream().sorted().forEach(indexName -> builder.append(indexName).append(',')); + builder.append(";patterns="); + int[] nextAlias = { 0 }; + for (StatementPattern pattern : patterns) { + builder.append('[').append(pattern.getScope().name()).append(';'); + appendTerm(builder, pattern.getSubjectVar(), aliases, nextAlias); + appendTerm(builder, pattern.getPredicateVar(), aliases, nextAlias); + appendTerm(builder, pattern.getObjectVar(), aliases, nextAlias); + appendTerm(builder, pattern.getContextVar(), aliases, nextAlias); + builder.append(']'); + } + return builder.toString(); + } + + private static void appendTerm(StringBuilder builder, Var var, Map aliases, int[] nextAlias) { + if (var == null) { + builder.append("null;"); + return; + } + if (var.hasValue()) { + builder.append("const=").append(valueKey(var.getValue())).append(';'); + return; + } + if (var.isAnonymous() || var.getName() == null) { + builder.append("hidden;"); + return; + } + String alias = aliases.computeIfAbsent(var.getName(), ignored -> "v" + nextAlias[0]++); + builder.append(alias).append(';'); + } + + private static String valueKey(Value value) { + return value.getClass().getSimpleName() + ':' + value; + } + + private LmdbLftjPlanner.PlanningResult copy(LmdbLftjPlanner.PlanningResult result) { + if (result == null) { + return null; + } + if (!result.planned()) { + return LmdbLftjPlanner.PlanningResult.rejected(result.rejectionReason()); + } + return LmdbLftjPlanner.PlanningResult.planned(result.plan().copy()); + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbPrefixFrontierProvider.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbPrefixFrontierProvider.java new file mode 100644 index 0000000000..fb52dfe952 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbPrefixFrontierProvider.java @@ -0,0 +1,329 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +final class LmdbPrefixFrontierProvider { + + private final LmdbQueryAccess queryAccess; + private final LmdbLftjBindingState state; + private final LmdbLftjMetrics metrics; + private final Map frontierCache = new HashMap<>(); + private final Map countCache = new HashMap<>(); + private final Map relationCache = new HashMap<>(); + + LmdbPrefixFrontierProvider(LmdbQueryAccess queryAccess, LmdbLftjBindingState state, LmdbLftjMetrics metrics) { + this.queryAccess = queryAccess; + this.state = state; + this.metrics = metrics; + } + + LmdbCachedFrontier frontier(LmdbLftjPatternPlan patternPlan, String variableName) { + FrontierKey key = FrontierKey.create(patternPlan, variableName, state); + LmdbCachedFrontier frontier = frontierCache.get(key); + if (frontier != null) { + metrics.recordFrontierHit(); + return frontier; + } + + metrics.recordFrontierLoad(); + frontier = loadFrontier(patternPlan, variableName); + frontierCache.put(key, frontier); + return frontier; + } + + long countMatches(LmdbLftjPatternPlan patternPlan) { + CountKey key = CountKey.create(patternPlan, state); + Long count = countCache.get(key); + if (count != null) { + metrics.recordCountHit(); + return count; + } + + metrics.recordCountLoad(); + long loaded = loadCount(patternPlan); + countCache.put(key, loaded); + return loaded; + } + + private LmdbCachedFrontier loadFrontier(LmdbLftjPatternPlan patternPlan, String variableName) { + LmdbCachedFrontier derived = derivedFrontier(patternPlan, variableName); + if (derived != null) { + metrics.recordRelationUse(); + return derived; + } + + int keyFieldIndex = patternPlan.keyFieldIndex(variableName); + long[] lowerBound = new long[4]; + long[] upperBound = new long[4]; + patternPlan.fillRangeBounds(state, variableName, 0L, lowerBound, upperBound); + + LmdbDerivedBinaryRelation.LongArrayBuilder values = new LmdbDerivedBinaryRelation.LongArrayBuilder(); + long[] last = { Long.MIN_VALUE }; + forEachUniqueRow(patternPlan, lowerBound, upperBound, keyFieldIndex + 1, row -> { + long value = row[keyFieldIndex]; + if (value != last[0]) { + values.add(value); + last[0] = value; + } + }); + return new LmdbCachedFrontier(values.toArray(), null); + } + + private long loadCount(LmdbLftjPatternPlan patternPlan) { + Long derived = derivedCount(patternPlan); + if (derived != null) { + metrics.recordRelationUse(); + return derived; + } + + long[] lowerBound = new long[4]; + long[] upperBound = new long[4]; + patternPlan.fillMatchRange(state, lowerBound, upperBound); + + long[] count = { 0L }; + forEachUniqueRow(patternPlan, lowerBound, upperBound, patternPlan.fixedPrefixLength(state), row -> count[0]++); + return count[0]; + } + + private LmdbCachedFrontier derivedFrontier(LmdbLftjPatternPlan patternPlan, String variableName) { + if (!patternPlan.canUseDerivedBinaryRelation()) { + return null; + } + + LmdbDerivedBinaryRelation relation = relation(patternPlan); + LmdbLftjPatternPlan.TermRef sourceTerm = patternPlan.termForComponent(relation.sourceComponent()); + LmdbLftjPatternPlan.TermRef targetTerm = patternPlan.termForComponent(relation.targetComponent()); + if (sourceTerm.matchesName(variableName)) { + return relation.rootFrontier(); + } + + if (!targetTerm.matchesName(variableName)) { + return null; + } + + long sourceValue = state.fixedId(sourceTerm); + return sourceValue < 0 ? null : relation.frontier(sourceValue); + } + + private Long derivedCount(LmdbLftjPatternPlan patternPlan) { + if (!patternPlan.canUseDerivedBinaryRelation()) { + return null; + } + + long subjectValue = state.fixedId(patternPlan.subjectTerm()); + long objectValue = state.fixedId(patternPlan.objectTerm()); + if (subjectValue < 0 || objectValue < 0) { + return null; + } + + LmdbDerivedBinaryRelation relation = relation(patternPlan); + long sourceValue = relation.sourceComponent() == TripleStore.SUBJ_IDX ? subjectValue : objectValue; + long targetValue = relation.targetComponent() == TripleStore.OBJ_IDX ? objectValue : subjectValue; + return relation.count(sourceValue, targetValue); + } + + private LmdbDerivedBinaryRelation relation(LmdbLftjPatternPlan patternPlan) { + long predicateId = state.fixedId(patternPlan.predicateTerm()); + LmdbDerivedBinaryRelation.RelationKey key = new LmdbDerivedBinaryRelation.RelationKey(patternPlan.indexName(), + queryAccess.includeInferred(), predicateId); + LmdbDerivedBinaryRelation relation = relationCache.get(key); + if (relation != null) { + metrics.recordRelationHit(); + return relation; + } + + metrics.recordRelationLoad(); + int sourceComponent = patternPlan.keyTerm(1).component(); + int targetComponent = patternPlan.keyTerm(2).component(); + LmdbDerivedBinaryRelation.Builder builder = new LmdbDerivedBinaryRelation.Builder(sourceComponent, + targetComponent); + long[] lowerBound = new long[4]; + long[] upperBound = new long[4]; + Arrays.fill(upperBound, Long.MAX_VALUE); + lowerBound[TripleStore.PRED_IDX] = predicateId; + upperBound[TripleStore.PRED_IDX] = predicateId; + int sourceKeyField = patternPlan.keyFieldIndex(sourceComponent); + int targetKeyField = patternPlan.keyFieldIndex(targetComponent); + forEachUniqueRow(patternPlan, lowerBound, upperBound, 1, + row -> builder.add(row[sourceKeyField], row[targetKeyField])); + relation = builder.build(); + relationCache.put(key, relation); + return relation; + } + + private void forEachUniqueRow(LmdbLftjPatternPlan patternPlan, long[] lowerBound, long[] upperBound, + int prefixLength, + RowConsumer consumer) { + try (CursorReader explicit = new CursorReader( + queryAccess.openTrieCursor(state.txn(), patternPlan.indexName(), true), + lowerBound, upperBound, prefixLength); + CursorReader inferred = queryAccess.includeInferred() + ? new CursorReader(queryAccess.openTrieCursor(state.txn(), patternPlan.indexName(), false), + lowerBound, upperBound, prefixLength) + : CursorReader.empty()) { + while (explicit.available() || inferred.available()) { + if (!inferred.available() || explicit.available() && compareRows(explicit.row(), inferred.row()) <= 0) { + boolean duplicate = inferred.available() && compareRows(explicit.row(), inferred.row()) == 0; + consumer.accept(explicit.row()); + explicit.advance(); + if (duplicate) { + inferred.advance(); + } + } else { + consumer.accept(inferred.row()); + inferred.advance(); + } + } + } + } + + private int compareRows(long[] left, long[] right) { + for (int i = 0; i < 4; i++) { + int comparison = Long.compare(left[i], right[i]); + if (comparison != 0) { + return comparison; + } + } + return 0; + } + + private interface RowConsumer { + void accept(long[] row); + } + + private static final class CursorReader implements AutoCloseable { + + private final LmdbTrieKeyCursor cursor; + private final long[] row = new long[4]; + private boolean available; + + private CursorReader(LmdbTrieKeyCursor cursor, long[] lowerBound, long[] upperBound, int prefixLength) { + this.cursor = cursor; + available = cursor.position(lowerBound, upperBound, prefixLength); + if (available) { + readRow(); + } + } + + private CursorReader() { + cursor = null; + available = false; + } + + static CursorReader empty() { + return new CursorReader(); + } + + boolean available() { + return available; + } + + long[] row() { + return row; + } + + void advance() { + if (!available || !cursor.next()) { + available = false; + return; + } + readRow(); + } + + @Override + public void close() { + if (cursor != null) { + cursor.close(); + } + } + + private void readRow() { + for (int i = 0; i < 4; i++) { + row[i] = cursor.valueAt(i); + } + } + } + + private static final class FrontierKey { + private final LmdbLftjPatternPlan patternPlan; + private final String variableName; + private final long[] prefix; + + private FrontierKey(LmdbLftjPatternPlan patternPlan, String variableName, long[] prefix) { + this.patternPlan = patternPlan; + this.variableName = variableName; + this.prefix = prefix; + } + + static FrontierKey create(LmdbLftjPatternPlan patternPlan, String variableName, LmdbLftjBindingState state) { + int keyFieldIndex = patternPlan.keyFieldIndex(variableName); + long[] prefix = new long[keyFieldIndex]; + for (int i = 0; i < keyFieldIndex; i++) { + prefix[i] = state.fixedId(patternPlan.keyTerm(i)); + } + return new FrontierKey(patternPlan, variableName, prefix); + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof FrontierKey)) { + return false; + } + FrontierKey o = (FrontierKey) other; + return Objects.equals(patternPlan, o.patternPlan) + && Objects.equals(variableName, o.variableName) + && Arrays.equals(prefix, o.prefix); + } + + @Override + public int hashCode() { + return Objects.hash(patternPlan, variableName, Arrays.hashCode(prefix)); + } + } + + private static final class CountKey { + private final LmdbLftjPatternPlan patternPlan; + private final long[] fixedValues; + + private CountKey(LmdbLftjPatternPlan patternPlan, long[] fixedValues) { + this.patternPlan = patternPlan; + this.fixedValues = fixedValues; + } + + static CountKey create(LmdbLftjPatternPlan patternPlan, LmdbLftjBindingState state) { + long[] fixedValues = new long[4]; + for (int i = 0; i < 4; i++) { + fixedValues[i] = state.fixedId(patternPlan.termForComponent(i)); + } + return new CountKey(patternPlan, fixedValues); + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof CountKey)) { + return false; + } + CountKey o = (CountKey) other; + return Objects.equals(patternPlan, o.patternPlan) && Arrays.equals(fixedValues, o.fixedValues); + } + + @Override + public int hashCode() { + return Objects.hash(patternPlan, Arrays.hashCode(fixedValues)); + } + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java index 17cce9ad2e..14f9639c56 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java @@ -35,4 +35,12 @@ RecordIterator openScan(TxnManager.Txn txn, String indexName, long subj, long pr boolean explicit); LmdbTrieKeyCursor openTrieCursor(TxnManager.Txn txn, String indexName, boolean explicit); + + default LmdbLftjPlanner.PlanningResult cachedPlanningResult(String cacheKey) { + return null; + } + + default void cachePlanningResult(String cacheKey, LmdbLftjPlanner.PlanningResult result) { + // optional prepared-plan cache + } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStore.java index a8767fc5fb..42e2f88c7c 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStore.java @@ -101,6 +101,7 @@ public class LmdbStore extends AbstractNotifyingSail implements FederatedService * Lock manager used to prevent concurrent {@link #getTransactionLock(IsolationLevel)} calls. */ private final ReentrantLock txnLockManager = new ReentrantLock(); + private final LmdbLftjPreparedPlanCache preparedPlanCache = new LmdbLftjPreparedPlanCache(); /** * Holds locks for all isolated transactions. @@ -340,6 +341,7 @@ protected void shutDownInternal() throws SailException { } } } + preparedPlanCache.clear(); logger.debug("LmdbStore shut down"); } @@ -367,6 +369,10 @@ public ValueFactory getValueFactory() { return store.getValueFactory(); } + LmdbLftjPreparedPlanCache preparedPlanCache() { + return preparedPlanCache; + } + /** * This call will block when {@link IsolationLevels#NONE} is provided when there are active transactions with a * higher isolation and block when a higher isolation is provided when there are active transactions with diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java index 2074abb3fd..cb35e08bb7 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java @@ -253,6 +253,16 @@ public RecordIterator openScan(TxnManager.Txn txn, String indexName, long subj, public LmdbTrieKeyCursor openTrieCursor(TxnManager.Txn txn, String indexName, boolean explicit) { return tripleStore.openTrieCursor(txn, indexName, explicit); } + + @Override + public LmdbLftjPlanner.PlanningResult cachedPlanningResult(String cacheKey) { + return lmdbStore.preparedPlanCache().get(cacheKey); + } + + @Override + public void cachePlanningResult(String cacheKey, LmdbLftjPlanner.PlanningResult result) { + lmdbStore.preparedPlanCache().put(cacheKey, result); + } }; } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java index 812f733ff6..b1c53dc56c 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java @@ -11,7 +11,7 @@ // Some portions generated by Codex package org.eclipse.rdf4j.sail.lmdb; -class LmdbTrieCursor implements AutoCloseable { +class LmdbTrieCursor implements LmdbLftjCursor { private final LmdbLftjPatternPlan patternPlan; private final LmdbQueryAccess queryAccess; @@ -35,7 +35,8 @@ class LmdbTrieCursor implements AutoCloseable { } } - boolean open(String variableName) { + @Override + public boolean open(String variableName) { ensureCursor(); Frame frame = stack[stackSize++]; frame.variableName = variableName; @@ -48,7 +49,8 @@ boolean open(String variableName) { return false; } - boolean seek(long target) { + @Override + public boolean seek(long target) { if (stackSize == 0) { return false; } @@ -69,7 +71,8 @@ boolean seek(long target) { return true; } - boolean next() { + @Override + public boolean next() { Frame frame = currentFrame(); if (!frame.currentAvailable || frame.currentValue == Long.MAX_VALUE) { frame.currentAvailable = false; @@ -89,7 +92,8 @@ boolean next() { return false; } - long value() { + @Override + public long value() { return currentFrame().currentValue; } @@ -97,7 +101,8 @@ protected boolean available() { return stackSize > 0 && currentFrame().currentAvailable; } - void release(String variableName) { + @Override + public void release(String variableName) { if (stackSize == 0) { return; } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java index c08ac97715..27b5119a9c 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java @@ -23,7 +23,7 @@ final class LmdbUnionTrieCursor extends LmdbTrieCursor { } @Override - boolean open(String variableName) { + public boolean open(String variableName) { boolean explicitOpened = explicitCursor.open(variableName); boolean inferredOpened = inferredCursor.open(variableName); if (!explicitOpened && !inferredOpened) { @@ -33,7 +33,7 @@ boolean open(String variableName) { } @Override - boolean seek(long target) { + public boolean seek(long target) { if (explicitCursor.available()) { explicitCursor.seek(target); } @@ -44,7 +44,7 @@ boolean seek(long target) { } @Override - boolean next() { + public boolean next() { if (!available()) { return false; } @@ -60,7 +60,7 @@ boolean next() { } @Override - long value() { + public long value() { if (explicitCursor.available() && inferredCursor.available()) { return Math.min(explicitCursor.value(), inferredCursor.value()); } @@ -76,7 +76,7 @@ protected boolean available() { } @Override - void release(String variableName) { + public void release(String variableName) { explicitCursor.release(variableName); inferredCursor.release(variableName); } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java index 48b90d6154..5ebc05ccbe 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java @@ -117,11 +117,51 @@ void evaluateShouldReusePatternScansAcrossBacktracking() { } assertTrue(count > 0, "sanity check: the synthetic clique should still enumerate matching cycles"); - assertEquals(3, queryAccess.openTrieCursorCalls, - "LFTJ should keep one shared scan per pattern instead of reopening scans while backtracking"); + assertTrue(queryAccess.openTrieCursorCalls <= 3, + "LFTJ should keep shared trie state instead of reopening scans while backtracking"); + } + + @Test + void evaluateShouldReuseDerivedRelationsAcrossEquivalentPatterns() { + TestQueryAccess queryAccess = new TestQueryAccess(); + QueryEvaluationStep evaluationStep = createEvaluationStep(queryAccess); + + long count = 0; + try (CloseableIteration iteration = evaluationStep.evaluate(EmptyBindingSet.getInstance())) { + while (iteration.hasNext()) { + iteration.next(); + count++; + } + } + + assertTrue(count > 0, "sanity check: the synthetic clique should still enumerate matching cycles"); + assertTrue(queryAccess.openTrieCursorCalls <= 2, + "equivalent foaf:knows patterns should share derived relations instead of rebuilding one trie per pattern"); + } + + @Test + void evaluateShouldAvoidRecordScansForHiddenContextMultiplicity() { + TestQueryAccess queryAccess = TestQueryAccess.withDuplicateContexts(); + QueryEvaluationStep evaluationStep = createEvaluationStep(queryAccess, createPlanWithHiddenContexts()); + + long count = 0; + try (CloseableIteration iteration = evaluationStep.evaluate(EmptyBindingSet.getInstance())) { + while (iteration.hasNext()) { + iteration.next(); + count++; + } + } + + assertEquals(648, count, "hidden context multiplicity should still be preserved"); + assertEquals(0, queryAccess.recordScanCalls, + "hidden context multiplicity should come from cached frontier counts, not RecordIterator rescans"); } private QueryEvaluationStep createEvaluationStep(TestQueryAccess queryAccess) { + return createEvaluationStep(queryAccess, createPlan()); + } + + private QueryEvaluationStep createEvaluationStep(TestQueryAccess queryAccess, LmdbLftjPlan plan) { QueryEvaluationContext context = new QueryEvaluationContext.Minimal((Dataset) null); LmdbLftjEvaluationStrategy strategy = new LmdbLftjEvaluationStrategy( new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess), @@ -132,7 +172,7 @@ private QueryEvaluationStep createEvaluationStep(TestQueryAccess queryAccess) { false, DefaultCollectionFactory::new); LmdbLftjExecutor executor = new LmdbLftjExecutor(strategy); - return executor.prepare(new LmdbLftjTupleExpr(createPlan()), context); + return executor.prepare(new LmdbLftjTupleExpr(plan), context); } private LmdbLftjPlan createPlan() { @@ -158,6 +198,30 @@ private StatementPattern statementPattern(String subjectName, String objectName) new Var(objectName)); } + private LmdbLftjPlan createPlanWithHiddenContexts() { + StatementPattern pattern1 = statementPattern("a", "b", "ctx1"); + StatementPattern pattern2 = statementPattern("b", "c", "ctx2"); + StatementPattern pattern3 = statementPattern("c", "a", "ctx3"); + TupleExpr fallbackExpr = new Join(new Join(pattern1.clone(), pattern2.clone()), pattern3.clone()); + return new LmdbLftjPlan( + fallbackExpr, + fallbackExpr.getBindingNames(), + fallbackExpr.getAssuredBindingNames(), + List.of("a", "b", "c"), + List.of( + new LmdbLftjPatternPlan(pattern1, "psoc"), + new LmdbLftjPatternPlan(pattern2, "psoc"), + new LmdbLftjPatternPlan(pattern3, "posc"))); + } + + private StatementPattern statementPattern(String subjectName, String objectName, String hiddenContextName) { + return new StatementPattern( + new Var(subjectName), + new Var("pred", FOAF.KNOWS), + new Var(objectName), + Var.of(hiddenContextName, true)); + } + private static final class EmptyTripleSource implements TripleSource { @Override @@ -182,26 +246,41 @@ private static final class TestQueryAccess implements LmdbQueryAccess { private int resolveValueCalls; private int releaseReadTxnCalls; private int openScanCalls; + private int recordScanCalls; private int openTrieCursorCalls; private int closedScanCalls; private TestQueryAccess() { + this(false); + } + + private TestQueryAccess(boolean duplicateContexts) { valuesById.add(null); valuesById.add(VF.createIRI("urn:person:1")); valuesById.add(VF.createIRI("urn:person:2")); valuesById.add(VF.createIRI("urn:person:3")); valuesById.add(VF.createIRI("urn:person:4")); valuesById.add(FOAF.KNOWS); + valuesById.add(VF.createIRI("urn:ctx:1")); + valuesById.add(VF.createIRI("urn:ctx:2")); for (long subject = 1; subject <= 4; subject++) { for (long object = 1; object <= 4; object++) { if (subject != object) { quads.add(new long[] { subject, 5L, object, 0L }); + if (duplicateContexts) { + quads.add(new long[] { subject, 5L, object, 6L }); + quads.add(new long[] { subject, 5L, object, 7L }); + } } } } } + private static TestQueryAccess withDuplicateContexts() { + return new TestQueryAccess(true); + } + @Override public TripleStore tripleStore() { return null; @@ -253,6 +332,7 @@ public Set configuredIndexes() { public RecordIterator openScan(TxnManager.Txn txn, String indexName, long subj, long pred, long obj, long context, boolean explicit) { openScanCalls++; + recordScanCalls++; return new TestRecordIterator(quads, subj, pred, obj, context, this::recordClosedScan); } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java new file mode 100644 index 0000000000..4bb5eb70d0 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java @@ -0,0 +1,153 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import org.eclipse.rdf4j.common.iteration.EmptyIteration; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.vocabulary.FOAF; +import org.eclipse.rdf4j.query.Dataset; +import org.eclipse.rdf4j.query.QueryEvaluationException; +import org.eclipse.rdf4j.query.algebra.Join; +import org.eclipse.rdf4j.query.algebra.QueryRoot; +import org.eclipse.rdf4j.query.algebra.StatementPattern; +import org.eclipse.rdf4j.query.algebra.TupleExpr; +import org.eclipse.rdf4j.query.algebra.Var; +import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource; +import org.eclipse.rdf4j.query.impl.EmptyBindingSet; +import org.junit.jupiter.api.Test; + +class LmdbLftjOptimizerTest { + + @Test + void optimizeShouldReusePreparedPlanAcrossEquivalentVariableRenames() { + TestQueryAccess queryAccess = new TestQueryAccess(); + LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer( + new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess)); + + TupleExpr first = new QueryRoot(cycle("a", "b", "c")); + TupleExpr second = new QueryRoot(cycle("x", "y", "z")); + + optimizer.optimize(first, (Dataset) null, EmptyBindingSet.getInstance()); + optimizer.optimize(second, (Dataset) null, EmptyBindingSet.getInstance()); + + assertEquals(1, queryAccess.cachedPlanPuts, + "first cyclic plan should populate the prepared-plan cache once"); + assertEquals(1, queryAccess.cachedPlanHits, + "equivalent cyclic shape should reuse the cached prepared plan on the second optimize"); + assertInstanceOf(LmdbLftjTupleExpr.class, assertInstanceOf(QueryRoot.class, first).getArg()); + assertInstanceOf(LmdbLftjTupleExpr.class, assertInstanceOf(QueryRoot.class, second).getArg()); + } + + private TupleExpr cycle(String a, String b, String c) { + StatementPattern pattern1 = statementPattern(a, b); + StatementPattern pattern2 = statementPattern(b, c); + StatementPattern pattern3 = statementPattern(c, a); + return new Join(new Join(pattern1, pattern2), pattern3); + } + + private StatementPattern statementPattern(String subjectName, String objectName) { + return new StatementPattern(new Var(subjectName), new Var("pred", FOAF.KNOWS), new Var(objectName)); + } + + private static final class EmptyTripleSource implements TripleSource { + + @Override + public org.eclipse.rdf4j.common.iteration.CloseableIteration getStatements(Resource subj, + IRI pred, Value obj, Resource... contexts) throws QueryEvaluationException { + return new EmptyIteration<>(); + } + + @Override + public org.eclipse.rdf4j.model.ValueFactory getValueFactory() { + return SimpleValueFactory.getInstance(); + } + } + + private static final class TestQueryAccess implements LmdbQueryAccess { + + private final Map cachedPlans = new HashMap<>(); + private int cachedPlanHits; + private int cachedPlanPuts; + + @Override + public TripleStore tripleStore() { + return null; + } + + @Override + public TxnManager.Txn acquireReadTxn() { + throw new UnsupportedOperationException(); + } + + @Override + public void releaseReadTxn(TxnManager.Txn txn) { + throw new UnsupportedOperationException(); + } + + @Override + public long resolveId(Value value) { + throw new UnsupportedOperationException(); + } + + @Override + public Value resolveValue(long id) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean includeInferred() { + return false; + } + + @Override + public Set configuredIndexes() { + return Set.of("spoc", "sopc", "psoc", "posc", "ospc", "opsc"); + } + + @Override + public RecordIterator openScan(TxnManager.Txn txn, String indexName, long subj, long pred, long obj, + long context, boolean explicit) { + throw new UnsupportedOperationException(); + } + + @Override + public LmdbTrieKeyCursor openTrieCursor(TxnManager.Txn txn, String indexName, boolean explicit) { + throw new UnsupportedOperationException(); + } + + @Override + public LmdbLftjPlanner.PlanningResult cachedPlanningResult(String cacheKey) { + LmdbLftjPlanner.PlanningResult result = cachedPlans.get(cacheKey); + if (result != null) { + cachedPlanHits++; + } + return result; + } + + @Override + public void cachePlanningResult(String cacheKey, LmdbLftjPlanner.PlanningResult result) { + cachedPlanPuts++; + cachedPlans.put(cacheKey, result); + } + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java index 79745fe631..5a4b28f188 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java @@ -113,7 +113,7 @@ public long cycle4() { return executeCount(QUERY_CYCLE_4); } - //@Benchmark + @Benchmark public long cycle5() { return executeCount(QUERY_CYCLE_5); } From d4b9849cf99357055f21043eac834d3867c4379f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 15:14:26 +0200 Subject: [PATCH 12/32] lftj is faster --- .../rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java index 5a4b28f188..7c01bcfec9 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java @@ -39,7 +39,7 @@ import org.openjdk.jmh.runner.options.OptionsBuilder; @State(Scope.Benchmark) -@Warmup(iterations = 30, time = 1, timeUnit = TimeUnit.SECONDS) +@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) @BenchmarkMode(Mode.AverageTime) @Fork(value = 1, jvmArgs = { "-Xms2G", "-Xmx2G", "-XX:+UseG1GC" }) @Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) From d57102fb2065f381345b5a36e26da6a5856c4510 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 15:16:53 +0200 Subject: [PATCH 13/32] lftj is faster --- .../benchmark/FoafCliqueQueryBenchmarkResults.md | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md index ed2045bd4e..885b80bb0e 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md @@ -1,8 +1,7 @@ -# Develop branch -```text -Benchmark (cliquePercentage) (maxCliqueSize) (minCliqueSize) (peopleCount) (randomKnowsEdges) (seed) Mode Cnt Score Error Units -FoafCliqueQueryBenchmark.cycle3 30 8 3 5000 15000 12345 avgt 3 92.508 ± 22.625 ms/op -FoafCliqueQueryBenchmark.cycle4 30 8 3 5000 15000 12345 avgt 3 644.258 ± 310.206 ms/op -FoafCliqueQueryBenchmark.cycle5 30 8 3 5000 15000 12345 avgt 3 3891.994 ± 1215.676 ms/op -``` - +Benchmark (cliquePercentage) (lftjEnabled) (maxCliqueSize) (minCliqueSize) (peopleCount) (randomKnowsEdges) (seed) Mode Cnt Score Error Units +FoafCliqueQueryBenchmark.cycle3 30 true 8 3 5000 15000 12345 avgt 3 36.818 ± 43.052 ms/op +FoafCliqueQueryBenchmark.cycle3 30 false 8 3 5000 15000 12345 avgt 3 90.331 ± 3.032 ms/op +FoafCliqueQueryBenchmark.cycle4 30 true 8 3 5000 15000 12345 avgt 3 180.250 ± 30.860 ms/op +FoafCliqueQueryBenchmark.cycle4 30 false 8 3 5000 15000 12345 avgt 3 618.881 ± 51.191 ms/op +FoafCliqueQueryBenchmark.cycle5 30 true 8 3 5000 15000 12345 avgt 3 1260.939 ± 263.311 ms/op +FoafCliqueQueryBenchmark.cycle5 30 false 8 3 5000 15000 12345 avgt 3 3751.511 ± 141.681 ms/op From e831cc8f5f390958e014224abde5039e162f3021 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 15:38:48 +0200 Subject: [PATCH 14/32] fixes --- .../sail/lmdb/LmdbLftjPreparedPlanCache.java | 30 ++++++----- .../sail/lmdb/LmdbLftjOptimizerTest.java | 50 +++++++++++++++++-- 2 files changed, 62 insertions(+), 18 deletions(-) diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPreparedPlanCache.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPreparedPlanCache.java index 28fd6ed478..b983bff540 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPreparedPlanCache.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPreparedPlanCache.java @@ -44,24 +44,29 @@ synchronized void clear() { } static String normalizedKey(List patterns, Set configuredIndexes) { - Map aliases = new LinkedHashMap<>(); StringBuilder builder = new StringBuilder(configuredIndexes.size() * 6 + patterns.size() * 32); builder.append("indexes="); configuredIndexes.stream().sorted().forEach(indexName -> builder.append(indexName).append(',')); builder.append(";patterns="); - int[] nextAlias = { 0 }; - for (StatementPattern pattern : patterns) { - builder.append('[').append(pattern.getScope().name()).append(';'); - appendTerm(builder, pattern.getSubjectVar(), aliases, nextAlias); - appendTerm(builder, pattern.getPredicateVar(), aliases, nextAlias); - appendTerm(builder, pattern.getObjectVar(), aliases, nextAlias); - appendTerm(builder, pattern.getContextVar(), aliases, nextAlias); - builder.append(']'); - } + patterns.stream() + .map(LmdbLftjPreparedPlanCache::patternKey) + .sorted() + .forEach(builder::append); + return builder.toString(); + } + + private static String patternKey(StatementPattern pattern) { + StringBuilder builder = new StringBuilder(48); + builder.append('[').append(pattern.getScope().name()).append(';'); + appendTerm(builder, pattern.getSubjectVar()); + appendTerm(builder, pattern.getPredicateVar()); + appendTerm(builder, pattern.getObjectVar()); + appendTerm(builder, pattern.getContextVar()); + builder.append(']'); return builder.toString(); } - private static void appendTerm(StringBuilder builder, Var var, Map aliases, int[] nextAlias) { + private static void appendTerm(StringBuilder builder, Var var) { if (var == null) { builder.append("null;"); return; @@ -74,8 +79,7 @@ private static void appendTerm(StringBuilder builder, Var var, Map "v" + nextAlias[0]++); - builder.append(alias).append(';'); + builder.append("var=").append(var.getName()).append(';'); } private static String valueKey(Value value) { diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java index 4bb5eb70d0..af5156f0f6 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java @@ -15,6 +15,7 @@ import static org.junit.jupiter.api.Assertions.assertInstanceOf; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Set; @@ -38,24 +39,45 @@ class LmdbLftjOptimizerTest { + private static final SimpleValueFactory VF = SimpleValueFactory.getInstance(); + @Test - void optimizeShouldReusePreparedPlanAcrossEquivalentVariableRenames() { + void optimizeShouldNotReusePreparedPlanAcrossEquivalentVariableRenames() { TestQueryAccess queryAccess = new TestQueryAccess(); LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer( new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess)); TupleExpr first = new QueryRoot(cycle("a", "b", "c")); TupleExpr second = new QueryRoot(cycle("x", "y", "z")); + Set expectedBindingNames = second.getBindingNames(); + + optimizer.optimize(first, (Dataset) null, EmptyBindingSet.getInstance()); + optimizer.optimize(second, (Dataset) null, EmptyBindingSet.getInstance()); + + assertEquals(2, queryAccess.cachedPlanPuts, + "renamed visible variables must produce a fresh prepared plan"); + assertEquals(0, queryAccess.cachedPlanHits, + "prepared-plan reuse across renamed variables leaks stale binding names"); + assertEquals(List.of("x", "y", "z"), lftjNode(second).plan().variableOrder()); + assertEquals(expectedBindingNames, lftjNode(second).plan().bindingNames()); + } + + @Test + void optimizeShouldReusePreparedPlanAcrossEquivalentJoinReorders() { + TestQueryAccess queryAccess = new TestQueryAccess(); + LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer( + new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess)); + + TupleExpr first = new QueryRoot(distinctPredicateCycle("a", "b", "c")); + TupleExpr second = new QueryRoot(reorderedDistinctPredicateCycle("a", "b", "c")); optimizer.optimize(first, (Dataset) null, EmptyBindingSet.getInstance()); optimizer.optimize(second, (Dataset) null, EmptyBindingSet.getInstance()); assertEquals(1, queryAccess.cachedPlanPuts, - "first cyclic plan should populate the prepared-plan cache once"); + "equivalent cyclic shapes should share one prepared plan entry"); assertEquals(1, queryAccess.cachedPlanHits, - "equivalent cyclic shape should reuse the cached prepared plan on the second optimize"); - assertInstanceOf(LmdbLftjTupleExpr.class, assertInstanceOf(QueryRoot.class, first).getArg()); - assertInstanceOf(LmdbLftjTupleExpr.class, assertInstanceOf(QueryRoot.class, second).getArg()); + "commuted join order should still hit the normalized prepared-plan cache"); } private TupleExpr cycle(String a, String b, String c) { @@ -69,6 +91,24 @@ private StatementPattern statementPattern(String subjectName, String objectName) return new StatementPattern(new Var(subjectName), new Var("pred", FOAF.KNOWS), new Var(objectName)); } + private TupleExpr distinctPredicateCycle(String a, String b, String c) { + StatementPattern pattern1 = new StatementPattern(new Var(a), new Var("p1", VF.createIRI("urn:p1")), new Var(b)); + StatementPattern pattern2 = new StatementPattern(new Var(b), new Var("p2", VF.createIRI("urn:p2")), new Var(c)); + StatementPattern pattern3 = new StatementPattern(new Var(c), new Var("p3", VF.createIRI("urn:p3")), new Var(a)); + return new Join(new Join(pattern1, pattern2), pattern3); + } + + private TupleExpr reorderedDistinctPredicateCycle(String a, String b, String c) { + StatementPattern pattern1 = new StatementPattern(new Var(a), new Var("p1", VF.createIRI("urn:p1")), new Var(b)); + StatementPattern pattern2 = new StatementPattern(new Var(b), new Var("p2", VF.createIRI("urn:p2")), new Var(c)); + StatementPattern pattern3 = new StatementPattern(new Var(c), new Var("p3", VF.createIRI("urn:p3")), new Var(a)); + return new Join(new Join(pattern2, pattern3), pattern1); + } + + private LmdbLftjTupleExpr lftjNode(TupleExpr tupleExpr) { + return assertInstanceOf(LmdbLftjTupleExpr.class, assertInstanceOf(QueryRoot.class, tupleExpr).getArg()); + } + private static final class EmptyTripleSource implements TripleSource { @Override From 71ab6b7e72329e3d6c8752cb277be52ad0e79db3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 16:53:30 +0200 Subject: [PATCH 15/32] new best --- .../rdf4j/sail/lmdb/LmdbCachedTrieCursor.java | 18 +- .../rdf4j/sail/lmdb/LmdbLftjBindingState.java | 44 +- .../rdf4j/sail/lmdb/LmdbLftjCursor.java | 4 +- .../rdf4j/sail/lmdb/LmdbLftjExecutor.java | 94 ++-- .../rdf4j/sail/lmdb/LmdbLftjPatternPlan.java | 53 ++- .../sail/lmdb/LmdbPrefixFrontierProvider.java | 54 ++- .../rdf4j/sail/lmdb/LmdbTrieCursor.java | 22 +- .../rdf4j/sail/lmdb/LmdbUnionTrieCursor.java | 12 +- .../sail/lmdb/LmdbLftjExecutorBenchmark.java | 129 ++++++ .../rdf4j/sail/lmdb/LmdbLftjExecutorTest.java | 427 +----------------- .../sail/lmdb/LmdbLftjSpecializationTest.java | 89 ++++ .../sail/lmdb/LmdbLftjSyntheticScenario.java | 413 +++++++++++++++++ .../FoafCliqueLftjCorrectnessTest.java | 20 +- .../FoafCliqueQueryBenchmarkResults.md | 12 +- 14 files changed, 872 insertions(+), 519 deletions(-) create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorBenchmark.java create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSpecializationTest.java create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSyntheticScenario.java diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedTrieCursor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedTrieCursor.java index f923202eea..05c3d0ef47 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedTrieCursor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedTrieCursor.java @@ -28,14 +28,14 @@ final class LmdbCachedTrieCursor implements LmdbLftjCursor { } @Override - public boolean open(String variableName) { - LmdbCachedFrontier frontier = provider.frontier(patternPlan, variableName); + public boolean open(int bindingSlot) { + LmdbCachedFrontier frontier = provider.frontier(patternPlan, bindingSlot); if (frontier.isEmpty()) { return false; } Frame frame = stack[stackSize++]; - frame.variableName = variableName; + frame.bindingSlot = bindingSlot; frame.frontier = frontier; frame.position = 0; return true; @@ -85,12 +85,12 @@ public long value() { } @Override - public void release(String variableName) { + public void release(int bindingSlot) { if (stackSize == 0) { return; } Frame frame = stack[stackSize - 1]; - if (!frame.matches(variableName)) { + if (!frame.matches(bindingSlot)) { return; } stack[--stackSize].reset(); @@ -111,18 +111,18 @@ private Frame currentFrame() { } private static final class Frame { - private String variableName; + private int bindingSlot = -1; private LmdbCachedFrontier frontier; private int position; private void reset() { - variableName = null; + bindingSlot = -1; frontier = null; position = -1; } - private boolean matches(String variableName) { - return this.variableName != null && this.variableName.equals(variableName); + private boolean matches(int bindingSlot) { + return this.bindingSlot == bindingSlot; } } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java index 997ffc1291..93adb8dac3 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java @@ -25,6 +25,7 @@ final class LmdbLftjBindingState { private final LmdbLftjPlan plan; private final BindingSet inputBindings; private final LmdbQueryAccess queryAccess; + private final String[] variableNames; private final Map variableSlots = new HashMap<>(); private final long[] fixedValues; private final boolean[] fixedPresent; @@ -38,13 +39,14 @@ final class LmdbLftjBindingState { this.plan = plan; this.inputBindings = inputBindings; this.queryAccess = queryAccess; - int variableCount = plan.variableOrder().size(); + this.variableNames = plan.variableOrder().toArray(new String[0]); + int variableCount = variableNames.length; this.fixedValues = new long[variableCount]; this.fixedPresent = new boolean[variableCount]; this.assignedValues = new long[variableCount]; this.assignedPresent = new boolean[variableCount]; for (int i = 0; i < variableCount; i++) { - variableSlots.put(plan.variableOrder().get(i), i); + variableSlots.put(variableNames[i], i); } } @@ -63,7 +65,7 @@ boolean initialize() { } } } - for (String variableName : plan.variableOrder()) { + for (String variableName : variableNames) { if (inputBindings.hasBinding(variableName)) { long id = queryAccess.resolveId(inputBindings.getValue(variableName)); if (id == LmdbValue.UNKNOWN_ID) { @@ -86,12 +88,18 @@ TxnManager.Txn txn() { } boolean isBound(String variableName) { - int slot = slot(variableName); + return isBound(slot(variableName)); + } + + boolean isBound(int slot) { return assignedPresent[slot] || fixedPresent[slot]; } long value(String variableName) { - int slot = slot(variableName); + return value(slot(variableName)); + } + + long value(int slot) { if (assignedPresent[slot]) { return assignedValues[slot]; } @@ -99,13 +107,20 @@ long value(String variableName) { } void assign(String variableName, long value) { - int slot = slot(variableName); + assign(slot(variableName), value); + } + + void assign(int slot, long value) { assignedValues[slot] = value; assignedPresent[slot] = true; } void clear(String variableName) { - assignedPresent[slot(variableName)] = false; + clear(slot(variableName)); + } + + void clear(int slot) { + assignedPresent[slot] = false; } long fixedId(LmdbLftjPatternPlan.TermRef term) { @@ -126,14 +141,23 @@ long fixedId(LmdbLftjPatternPlan.TermRef term) { BindingSet materialize(QueryEvaluationContext context) { MutableBindingSet result = context.createBindingSet(inputBindings); - for (String variableName : plan.variableOrder()) { - if (!result.hasBinding(variableName) && isBound(variableName)) { - context.setBinding(variableName).accept(queryAccess.resolveValue(value(variableName)), result); + for (int slot = 0; slot < variableNames.length; slot++) { + String variableName = variableNames[slot]; + if (!result.hasBinding(variableName) && isBound(slot)) { + context.setBinding(variableName).accept(queryAccess.resolveValue(value(slot)), result); } } return result; } + int variableCount() { + return variableNames.length; + } + + String variableName(int slot) { + return variableNames[slot]; + } + void close() { if (txn != null) { queryAccess.releaseReadTxn(txn); diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCursor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCursor.java index dd04b6ebb5..6d3156661c 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCursor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCursor.java @@ -13,7 +13,7 @@ interface LmdbLftjCursor extends AutoCloseable { - boolean open(String variableName); + boolean open(int bindingSlot); boolean seek(long target); @@ -21,7 +21,7 @@ interface LmdbLftjCursor extends AutoCloseable { long value(); - void release(String variableName); + void release(int bindingSlot); @Override void close(); diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java index fe12c21ce2..fdab68a114 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java @@ -12,6 +12,7 @@ package org.eclipse.rdf4j.sail.lmdb; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.eclipse.rdf4j.common.iteration.CloseableIteration; @@ -64,9 +65,9 @@ private final class LmdbLftjIteration extends LookAheadIteration { private final LmdbQueryAccess queryAccess; private final LmdbLftjMetrics metrics; private final LmdbPrefixFrontierProvider frontierProvider; - private final List searchVariables; - private final List patternCursors; - private final List> cursorsByDepth; + private final int[] searchSlots; + private final LmdbLftjCursor[] patternCursors; + private final DepthRuntime[] depthRuntimes; private final boolean[] initializedDepths; private final boolean[] advanceDepths; @@ -82,11 +83,11 @@ private LmdbLftjIteration(LmdbLftjPlan plan, LmdbLftjBindingState state, QueryEv this.queryAccess = queryAccess; this.metrics = metrics; this.frontierProvider = new LmdbPrefixFrontierProvider(queryAccess, state, metrics); - this.searchVariables = collectSearchVariables(plan, state); + this.searchSlots = collectSearchSlots(plan, state); this.patternCursors = createPatternCursors(plan, frontierProvider); - this.cursorsByDepth = createDepthCursors(plan, searchVariables, patternCursors); - this.initializedDepths = new boolean[searchVariables.size()]; - this.advanceDepths = new boolean[searchVariables.size()]; + this.depthRuntimes = createDepthRuntimes(plan, searchSlots, patternCursors); + this.initializedDepths = new boolean[searchSlots.length]; + this.advanceDepths = new boolean[searchSlots.length]; this.depth = 0; } @@ -115,7 +116,7 @@ protected void handleClose() { private BindingSet computeNextElement() { while (depth >= 0) { - if (depth == searchVariables.size()) { + if (depth == searchSlots.length) { long multiplicity = witnessMultiplicity(plan, metrics, frontierProvider); backtrackAfterLeaf(); if (multiplicity > 0) { @@ -155,22 +156,23 @@ private BindingSet computeNextElement() { } private boolean positionDepth(int depth, boolean advanceExisting) { - String variableName = searchVariables.get(depth); - List cursors = cursorsByDepth.get(depth); - state.clear(variableName); + DepthRuntime depthRuntime = depthRuntimes[depth]; + int bindingSlot = depthRuntime.bindingSlot; + LmdbLftjCursor[] cursors = depthRuntime.cursors; + state.clear(bindingSlot); - if (cursors.isEmpty()) { + if (cursors.length == 0) { return false; } if (!advanceExisting) { for (LmdbLftjCursor cursor : cursors) { metrics.recordCandidateScan(); - if (!cursor.open(variableName)) { + if (!cursor.open(bindingSlot)) { return false; } } - } else if (!cursors.get(0).next()) { + } else if (!cursors[0].next()) { return false; } @@ -183,12 +185,12 @@ private boolean positionDepth(int depth, boolean advanceExisting) { return false; } - state.assign(variableName, current); + state.assign(bindingSlot, current); return true; } private void backtrackAfterLeaf() { - depth = searchVariables.size() - 1; + depth = searchSlots.length - 1; if (depth >= 0) { advanceDepths[depth] = true; } @@ -203,56 +205,58 @@ private void backtrackFromDepth(int failedDepth) { } private void releaseDepth(int depth) { - if (depth < 0 || depth >= searchVariables.size()) { + if (depth < 0 || depth >= searchSlots.length) { return; } - String variableName = searchVariables.get(depth); - state.clear(variableName); + DepthRuntime depthRuntime = depthRuntimes[depth]; + state.clear(depthRuntime.bindingSlot); initializedDepths[depth] = false; advanceDepths[depth] = false; - for (LmdbLftjCursor cursor : cursorsByDepth.get(depth)) { - cursor.release(variableName); + for (LmdbLftjCursor cursor : depthRuntime.cursors) { + cursor.release(depthRuntime.bindingSlot); } } } - private List collectSearchVariables(LmdbLftjPlan plan, LmdbLftjBindingState state) { - List searchVariables = new ArrayList<>(plan.variableOrder().size()); - for (String variableName : plan.variableOrder()) { - if (!state.isBound(variableName)) { - searchVariables.add(variableName); + private int[] collectSearchSlots(LmdbLftjPlan plan, LmdbLftjBindingState state) { + int[] searchSlots = new int[plan.variableOrder().size()]; + int count = 0; + for (int slot = 0; slot < state.variableCount(); slot++) { + if (!state.isBound(slot)) { + searchSlots[count++] = slot; } } - return searchVariables; + return Arrays.copyOf(searchSlots, count); } - private List createPatternCursors(LmdbLftjPlan plan, LmdbPrefixFrontierProvider frontierProvider) { - List cursors = new ArrayList<>(plan.patternPlans().size()); - for (LmdbLftjPatternPlan patternPlan : plan.patternPlans()) { - cursors.add(new LmdbCachedTrieCursor(patternPlan, frontierProvider)); + private LmdbLftjCursor[] createPatternCursors(LmdbLftjPlan plan, LmdbPrefixFrontierProvider frontierProvider) { + LmdbLftjCursor[] cursors = new LmdbLftjCursor[plan.patternPlans().size()]; + List patternPlans = plan.patternPlans(); + for (int i = 0; i < patternPlans.size(); i++) { + cursors[i] = new LmdbCachedTrieCursor(patternPlans.get(i), frontierProvider); } return cursors; } - private List> createDepthCursors(LmdbLftjPlan plan, List searchVariables, - List patternCursors) { - List> cursorsByDepth = new ArrayList<>(searchVariables.size()); + private DepthRuntime[] createDepthRuntimes(LmdbLftjPlan plan, int[] searchSlots, LmdbLftjCursor[] patternCursors) { + DepthRuntime[] depthRuntimes = new DepthRuntime[searchSlots.length]; List patternPlans = plan.patternPlans(); - for (String variableName : searchVariables) { + for (int depth = 0; depth < searchSlots.length; depth++) { + int bindingSlot = searchSlots[depth]; List cursors = new ArrayList<>(); for (int i = 0; i < patternPlans.size(); i++) { LmdbLftjPatternPlan patternPlan = patternPlans.get(i); - if (!patternPlan.containsVariable(variableName)) { + if (!patternPlan.containsBindingSlot(bindingSlot)) { continue; } - cursors.add(patternCursors.get(i)); + cursors.add(patternCursors[i]); } - cursorsByDepth.add(cursors); + depthRuntimes[depth] = new DepthRuntime(bindingSlot, cursors.toArray(new LmdbLftjCursor[0])); } - return cursorsByDepth; + return depthRuntimes; } - private long align(List cursors, long target) { + private long align(LmdbLftjCursor[] cursors, long target) { long current = target; while (true) { boolean allMatch = true; @@ -292,6 +296,16 @@ private long countMatches(LmdbLftjPatternPlan patternPlan, LmdbLftjMetrics metri return frontierProvider.countMatches(patternPlan); } + private static final class DepthRuntime { + private final int bindingSlot; + private final LmdbLftjCursor[] cursors; + + private DepthRuntime(int bindingSlot, LmdbLftjCursor[] cursors) { + this.bindingSlot = bindingSlot; + this.cursors = cursors; + } + } + private final class LazyFallbackStep { private final TupleExpr fallbackExpr; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java index dc8dd9cc5f..a488e70e7b 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPatternPlan.java @@ -81,6 +81,15 @@ boolean containsVariable(String name) { return false; } + boolean containsBindingSlot(int bindingSlot) { + for (TermRef term : terms) { + if (term.bindingSlot() == bindingSlot) { + return true; + } + } + return false; + } + int componentFor(String variableName) { for (TermRef term : terms) { if (term.matchesName(variableName)) { @@ -90,6 +99,15 @@ int componentFor(String variableName) { throw new IllegalArgumentException("Pattern does not bind variable " + variableName); } + int componentForBindingSlot(int bindingSlot) { + for (TermRef term : terms) { + if (term.bindingSlot() == bindingSlot) { + return term.component(); + } + } + throw new IllegalArgumentException("Pattern does not bind slot " + bindingSlot); + } + int keyFieldIndex(String variableName) { for (int i = 0; i < keyTerms.length; i++) { if (keyTerms[i].matchesName(variableName)) { @@ -99,7 +117,16 @@ int keyFieldIndex(String variableName) { throw new IllegalArgumentException("Pattern does not bind variable " + variableName); } - int keyFieldIndex(int component) { + int keyFieldIndexForBindingSlot(int bindingSlot) { + for (int i = 0; i < keyTerms.length; i++) { + if (keyTerms[i].bindingSlot() == bindingSlot) { + return i; + } + } + throw new IllegalArgumentException("Pattern does not bind slot " + bindingSlot); + } + + int keyFieldIndexForComponent(int component) { for (int i = 0; i < keyTerms.length; i++) { if (keyTerms[i].component() == component) { return i; @@ -194,6 +221,30 @@ void fillRangeBounds(LmdbLftjBindingState state, String variableName, long lower maxKey[currentTerm.component()] = Long.MAX_VALUE; } + void fillRangeBounds(LmdbLftjBindingState state, int bindingSlot, long lowerBound, long[] minKey, + long[] maxKey) { + Arrays.fill(maxKey, Long.MAX_VALUE); + + int keyFieldIndex = keyFieldIndexForBindingSlot(bindingSlot); + for (int i = 0; i < keyTerms.length; i++) { + TermRef term = keyTerms[i]; + long fixedId = state.fixedId(term); + if (i < keyFieldIndex && fixedId < 0) { + throw new IllegalStateException( + "LMDB LFTJ requires a fully fixed prefix before slot " + bindingSlot + " in index " + + indexName); + } + if (fixedId >= 0) { + minKey[term.component()] = fixedId; + maxKey[term.component()] = fixedId; + } + } + + TermRef currentTerm = keyTerms[keyFieldIndex]; + minKey[currentTerm.component()] = lowerBound; + maxKey[currentTerm.component()] = Long.MAX_VALUE; + } + boolean matchesPrefix(LmdbTrieKeyCursor cursor, LmdbLftjBindingState state, int keyFieldIndex) { for (int i = 0; i < keyFieldIndex; i++) { long fixedId = state.fixedId(keyTerms[i]); diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbPrefixFrontierProvider.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbPrefixFrontierProvider.java index fb52dfe952..eabe03ac1f 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbPrefixFrontierProvider.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbPrefixFrontierProvider.java @@ -31,8 +31,8 @@ final class LmdbPrefixFrontierProvider { this.metrics = metrics; } - LmdbCachedFrontier frontier(LmdbLftjPatternPlan patternPlan, String variableName) { - FrontierKey key = FrontierKey.create(patternPlan, variableName, state); + LmdbCachedFrontier frontier(LmdbLftjPatternPlan patternPlan, int bindingSlot) { + FrontierKey key = FrontierKey.create(patternPlan, bindingSlot, state); LmdbCachedFrontier frontier = frontierCache.get(key); if (frontier != null) { metrics.recordFrontierHit(); @@ -40,7 +40,7 @@ LmdbCachedFrontier frontier(LmdbLftjPatternPlan patternPlan, String variableName } metrics.recordFrontierLoad(); - frontier = loadFrontier(patternPlan, variableName); + frontier = loadFrontier(patternPlan, bindingSlot); frontierCache.put(key, frontier); return frontier; } @@ -59,17 +59,17 @@ long countMatches(LmdbLftjPatternPlan patternPlan) { return loaded; } - private LmdbCachedFrontier loadFrontier(LmdbLftjPatternPlan patternPlan, String variableName) { - LmdbCachedFrontier derived = derivedFrontier(patternPlan, variableName); + private LmdbCachedFrontier loadFrontier(LmdbLftjPatternPlan patternPlan, int bindingSlot) { + LmdbCachedFrontier derived = derivedFrontier(patternPlan, bindingSlot); if (derived != null) { metrics.recordRelationUse(); return derived; } - int keyFieldIndex = patternPlan.keyFieldIndex(variableName); + int keyFieldIndex = patternPlan.keyFieldIndexForBindingSlot(bindingSlot); long[] lowerBound = new long[4]; long[] upperBound = new long[4]; - patternPlan.fillRangeBounds(state, variableName, 0L, lowerBound, upperBound); + patternPlan.fillRangeBounds(state, bindingSlot, 0L, lowerBound, upperBound); LmdbDerivedBinaryRelation.LongArrayBuilder values = new LmdbDerivedBinaryRelation.LongArrayBuilder(); long[] last = { Long.MIN_VALUE }; @@ -99,19 +99,19 @@ private long loadCount(LmdbLftjPatternPlan patternPlan) { return count[0]; } - private LmdbCachedFrontier derivedFrontier(LmdbLftjPatternPlan patternPlan, String variableName) { - if (!patternPlan.canUseDerivedBinaryRelation()) { + private LmdbCachedFrontier derivedFrontier(LmdbLftjPatternPlan patternPlan, int bindingSlot) { + if (!canUseDerivedRelation(patternPlan)) { return null; } LmdbDerivedBinaryRelation relation = relation(patternPlan); LmdbLftjPatternPlan.TermRef sourceTerm = patternPlan.termForComponent(relation.sourceComponent()); LmdbLftjPatternPlan.TermRef targetTerm = patternPlan.termForComponent(relation.targetComponent()); - if (sourceTerm.matchesName(variableName)) { + if (sourceTerm.bindingSlot() == bindingSlot) { return relation.rootFrontier(); } - if (!targetTerm.matchesName(variableName)) { + if (targetTerm.bindingSlot() != bindingSlot) { return null; } @@ -120,7 +120,7 @@ private LmdbCachedFrontier derivedFrontier(LmdbLftjPatternPlan patternPlan, Stri } private Long derivedCount(LmdbLftjPatternPlan patternPlan) { - if (!patternPlan.canUseDerivedBinaryRelation()) { + if (!canUseDerivedRelation(patternPlan)) { return null; } @@ -138,8 +138,10 @@ private Long derivedCount(LmdbLftjPatternPlan patternPlan) { private LmdbDerivedBinaryRelation relation(LmdbLftjPatternPlan patternPlan) { long predicateId = state.fixedId(patternPlan.predicateTerm()); - LmdbDerivedBinaryRelation.RelationKey key = new LmdbDerivedBinaryRelation.RelationKey(patternPlan.indexName(), - queryAccess.includeInferred(), predicateId); + LmdbDerivedBinaryRelation.RelationKey key = new LmdbDerivedBinaryRelation.RelationKey( + patternPlan.indexName(), + queryAccess.includeInferred(), + predicateId); LmdbDerivedBinaryRelation relation = relationCache.get(key); if (relation != null) { metrics.recordRelationHit(); @@ -156,8 +158,8 @@ private LmdbDerivedBinaryRelation relation(LmdbLftjPatternPlan patternPlan) { Arrays.fill(upperBound, Long.MAX_VALUE); lowerBound[TripleStore.PRED_IDX] = predicateId; upperBound[TripleStore.PRED_IDX] = predicateId; - int sourceKeyField = patternPlan.keyFieldIndex(sourceComponent); - int targetKeyField = patternPlan.keyFieldIndex(targetComponent); + int sourceKeyField = patternPlan.keyFieldIndexForComponent(sourceComponent); + int targetKeyField = patternPlan.keyFieldIndexForComponent(targetComponent); forEachUniqueRow(patternPlan, lowerBound, upperBound, 1, row -> builder.add(row[sourceKeyField], row[targetKeyField])); relation = builder.build(); @@ -165,6 +167,10 @@ private LmdbDerivedBinaryRelation relation(LmdbLftjPatternPlan patternPlan) { return relation; } + private boolean canUseDerivedRelation(LmdbLftjPatternPlan patternPlan) { + return patternPlan.canUseDerivedBinaryRelation(); + } + private void forEachUniqueRow(LmdbLftjPatternPlan patternPlan, long[] lowerBound, long[] upperBound, int prefixLength, RowConsumer consumer) { @@ -260,22 +266,22 @@ private void readRow() { private static final class FrontierKey { private final LmdbLftjPatternPlan patternPlan; - private final String variableName; + private final int bindingSlot; private final long[] prefix; - private FrontierKey(LmdbLftjPatternPlan patternPlan, String variableName, long[] prefix) { + private FrontierKey(LmdbLftjPatternPlan patternPlan, int bindingSlot, long[] prefix) { this.patternPlan = patternPlan; - this.variableName = variableName; + this.bindingSlot = bindingSlot; this.prefix = prefix; } - static FrontierKey create(LmdbLftjPatternPlan patternPlan, String variableName, LmdbLftjBindingState state) { - int keyFieldIndex = patternPlan.keyFieldIndex(variableName); + static FrontierKey create(LmdbLftjPatternPlan patternPlan, int bindingSlot, LmdbLftjBindingState state) { + int keyFieldIndex = patternPlan.keyFieldIndexForBindingSlot(bindingSlot); long[] prefix = new long[keyFieldIndex]; for (int i = 0; i < keyFieldIndex; i++) { prefix[i] = state.fixedId(patternPlan.keyTerm(i)); } - return new FrontierKey(patternPlan, variableName, prefix); + return new FrontierKey(patternPlan, bindingSlot, prefix); } @Override @@ -285,13 +291,13 @@ public boolean equals(Object other) { } FrontierKey o = (FrontierKey) other; return Objects.equals(patternPlan, o.patternPlan) - && Objects.equals(variableName, o.variableName) + && bindingSlot == o.bindingSlot && Arrays.equals(prefix, o.prefix); } @Override public int hashCode() { - return Objects.hash(patternPlan, variableName, Arrays.hashCode(prefix)); + return Objects.hash(patternPlan, bindingSlot, Arrays.hashCode(prefix)); } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java index b1c53dc56c..788872b3ed 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbTrieCursor.java @@ -36,11 +36,11 @@ class LmdbTrieCursor implements LmdbLftjCursor { } @Override - public boolean open(String variableName) { + public boolean open(int bindingSlot) { ensureCursor(); Frame frame = stack[stackSize++]; - frame.variableName = variableName; - frame.keyFieldIndex = patternPlan.keyFieldIndex(variableName); + frame.bindingSlot = bindingSlot; + frame.keyFieldIndex = patternPlan.keyFieldIndexForBindingSlot(bindingSlot); if (seek(0L)) { return true; } @@ -60,7 +60,7 @@ public boolean seek(long target) { return true; } - patternPlan.fillRangeBounds(state, frame.variableName, target, lowerBound, upperBound); + patternPlan.fillRangeBounds(state, frame.bindingSlot, target, lowerBound, upperBound); if (!cursor.position(lowerBound, upperBound, frame.keyFieldIndex + 1)) { frame.currentAvailable = false; return false; @@ -102,17 +102,17 @@ protected boolean available() { } @Override - public void release(String variableName) { + public void release(int bindingSlot) { if (stackSize == 0) { return; } Frame frame = stack[stackSize - 1]; - if (!frame.matches(variableName)) { + if (!frame.matches(bindingSlot)) { return; } stack[--stackSize].reset(); if (stackSize > 0 && currentFrame().currentAvailable && !restoreCurrentPosition()) { - throw new IllegalStateException("LMDB trie cursor failed to restore parent frame for " + variableName); + throw new IllegalStateException("LMDB trie cursor failed to restore parent frame for slot " + bindingSlot); } } @@ -150,20 +150,20 @@ private Frame currentFrame() { } private static final class Frame { - private String variableName; + private int bindingSlot = -1; private int keyFieldIndex; private long currentValue; private boolean currentAvailable; private void reset() { - variableName = null; + bindingSlot = -1; keyFieldIndex = -1; currentValue = 0L; currentAvailable = false; } - private boolean matches(String variableName) { - return this.variableName != null && this.variableName.equals(variableName); + private boolean matches(int bindingSlot) { + return this.bindingSlot == bindingSlot; } } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java index 27b5119a9c..fc8df786ed 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUnionTrieCursor.java @@ -23,9 +23,9 @@ final class LmdbUnionTrieCursor extends LmdbTrieCursor { } @Override - public boolean open(String variableName) { - boolean explicitOpened = explicitCursor.open(variableName); - boolean inferredOpened = inferredCursor.open(variableName); + public boolean open(int bindingSlot) { + boolean explicitOpened = explicitCursor.open(bindingSlot); + boolean inferredOpened = inferredCursor.open(bindingSlot); if (!explicitOpened && !inferredOpened) { return false; } @@ -76,9 +76,9 @@ protected boolean available() { } @Override - public void release(String variableName) { - explicitCursor.release(variableName); - inferredCursor.release(variableName); + public void release(int bindingSlot) { + explicitCursor.release(bindingSlot); + inferredCursor.release(bindingSlot); } @Override diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorBenchmark.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorBenchmark.java new file mode 100644 index 0000000000..5dc63bde08 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorBenchmark.java @@ -0,0 +1,129 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.Set; +import java.util.concurrent.TimeUnit; + +import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; +import org.eclipse.rdf4j.query.impl.EmptyBindingSet; +import org.openjdk.jmh.Main; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +@State(Scope.Benchmark) +@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@BenchmarkMode(Mode.AverageTime) +@Fork(value = 1, jvmArgs = { "-Xms1G", "-Xmx1G", "-XX:+UseG1GC" }) +@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +public class LmdbLftjExecutorBenchmark { + + @State(Scope.Thread) + public static class CycleState { + + @Param({ "true", "false" }) + public boolean derivedRelationEnabled; + + private QueryEvaluationStep evaluationStep; + + @Setup(Level.Trial) + public void setup() { + evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep( + new BenchmarkQueryAccess(false, derivedRelationEnabled), + LmdbLftjSyntheticScenario.createPlan()); + } + } + + @State(Scope.Thread) + public static class HiddenContextState { + + @Param({ "true", "false" }) + public boolean derivedRelationEnabled; + + private QueryEvaluationStep evaluationStep; + + @Setup(Level.Trial) + public void setup() { + evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep( + new BenchmarkQueryAccess(true, derivedRelationEnabled), + LmdbLftjSyntheticScenario.createPlanWithHiddenContexts()); + } + } + + @Benchmark + public long cycle3(CycleState state, Blackhole blackhole) throws Exception { + return consume(state.evaluationStep, blackhole); + } + + @Benchmark + public long cycle3HiddenContexts(HiddenContextState state, Blackhole blackhole) throws Exception { + return consume(state.evaluationStep, blackhole); + } + + public static void main(String[] args) throws Exception { + if (args != null && args.length > 0) { + Main.main(args); + return; + } + + new Runner(new OptionsBuilder() + .include(LmdbLftjExecutorBenchmark.class.getSimpleName()) + .forks(1) + .build()).run(); + } + + private static long consume(QueryEvaluationStep evaluationStep, Blackhole blackhole) throws Exception { + long count = 0; + try (CloseableIteration iteration = evaluationStep.evaluate(EmptyBindingSet.getInstance())) { + while (iteration.hasNext()) { + BindingSet bindingSet = iteration.next(); + blackhole.consume(bindingSet); + count++; + } + } + return count; + } + + private static final class BenchmarkQueryAccess extends LmdbLftjSyntheticScenario.TestQueryAccess { + + private static final Set DERIVED_RELATION_INDEXES = Set.of("psoc", "posc"); + private static final Set GENERIC_INDEXES = Set.of("psoc"); + + private final Set configuredIndexes; + + private BenchmarkQueryAccess(boolean duplicateContexts, boolean derivedRelationEnabled) { + super(duplicateContexts); + this.configuredIndexes = derivedRelationEnabled ? DERIVED_RELATION_INDEXES : GENERIC_INDEXES; + } + + @Override + public Set configuredIndexes() { + return configuredIndexes; + } + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java index 5ebc05ccbe..f98f0719ba 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java @@ -14,43 +14,19 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; -import java.util.Set; - -import org.eclipse.rdf4j.collection.factory.impl.DefaultCollectionFactory; import org.eclipse.rdf4j.common.iteration.CloseableIteration; -import org.eclipse.rdf4j.common.iteration.EmptyIteration; -import org.eclipse.rdf4j.model.IRI; -import org.eclipse.rdf4j.model.Resource; -import org.eclipse.rdf4j.model.Statement; -import org.eclipse.rdf4j.model.Value; -import org.eclipse.rdf4j.model.ValueFactory; -import org.eclipse.rdf4j.model.impl.SimpleValueFactory; -import org.eclipse.rdf4j.model.vocabulary.FOAF; import org.eclipse.rdf4j.query.BindingSet; -import org.eclipse.rdf4j.query.Dataset; -import org.eclipse.rdf4j.query.algebra.Join; -import org.eclipse.rdf4j.query.algebra.StatementPattern; -import org.eclipse.rdf4j.query.algebra.TupleExpr; -import org.eclipse.rdf4j.query.algebra.Var; import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet; import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; -import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource; -import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics; -import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; import org.eclipse.rdf4j.query.impl.EmptyBindingSet; import org.junit.jupiter.api.Test; class LmdbLftjExecutorTest { - private static final ValueFactory VF = SimpleValueFactory.getInstance(); - @Test void evaluateShouldStayLazyUntilConsumerReadsResults() { - TestQueryAccess queryAccess = new TestQueryAccess(); - QueryEvaluationStep evaluationStep = createEvaluationStep(queryAccess); + LmdbLftjSyntheticScenario.TestQueryAccess queryAccess = new LmdbLftjSyntheticScenario.TestQueryAccess(); + QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess); try (CloseableIteration iteration = evaluationStep.evaluate(EmptyBindingSet.getInstance())) { assertEquals(0, queryAccess.resolveValueCalls, @@ -65,22 +41,22 @@ void evaluateShouldStayLazyUntilConsumerReadsResults() { @Test void evaluateShouldRespectFullyBoundInputBindings() { - TestQueryAccess queryAccess = new TestQueryAccess(); - QueryEvaluationStep evaluationStep = createEvaluationStep(queryAccess); + LmdbLftjSyntheticScenario.TestQueryAccess queryAccess = new LmdbLftjSyntheticScenario.TestQueryAccess(); + QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess); QueryBindingSet matchingBindings = new QueryBindingSet(); - matchingBindings.setBinding("a", VF.createIRI("urn:person:1")); - matchingBindings.setBinding("b", VF.createIRI("urn:person:2")); - matchingBindings.setBinding("c", VF.createIRI("urn:person:3")); + matchingBindings.setBinding("a", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:1")); + matchingBindings.setBinding("b", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:2")); + matchingBindings.setBinding("c", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:3")); try (CloseableIteration iteration = evaluationStep.evaluate(matchingBindings)) { assertTrue(iteration.hasNext(), "fully bound matching cycles should still produce a result"); } QueryBindingSet nonMatchingBindings = new QueryBindingSet(); - nonMatchingBindings.setBinding("a", VF.createIRI("urn:person:1")); - nonMatchingBindings.setBinding("b", VF.createIRI("urn:person:1")); - nonMatchingBindings.setBinding("c", VF.createIRI("urn:person:2")); + nonMatchingBindings.setBinding("a", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:1")); + nonMatchingBindings.setBinding("b", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:1")); + nonMatchingBindings.setBinding("c", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:2")); try (CloseableIteration iteration = evaluationStep.evaluate(nonMatchingBindings)) { assertTrue(!iteration.hasNext(), "fully bound non-matching cycles must not be reported"); @@ -89,8 +65,8 @@ void evaluateShouldRespectFullyBoundInputBindings() { @Test void evaluateShouldCloseLiveScansOnEarlyClose() { - TestQueryAccess queryAccess = new TestQueryAccess(); - QueryEvaluationStep evaluationStep = createEvaluationStep(queryAccess); + LmdbLftjSyntheticScenario.TestQueryAccess queryAccess = new LmdbLftjSyntheticScenario.TestQueryAccess(); + QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess); CloseableIteration iteration = evaluationStep.evaluate(EmptyBindingSet.getInstance()); assertTrue(iteration.hasNext(), "expected a lazy result row"); @@ -105,8 +81,8 @@ void evaluateShouldCloseLiveScansOnEarlyClose() { @Test void evaluateShouldReusePatternScansAcrossBacktracking() { - TestQueryAccess queryAccess = new TestQueryAccess(); - QueryEvaluationStep evaluationStep = createEvaluationStep(queryAccess); + LmdbLftjSyntheticScenario.TestQueryAccess queryAccess = new LmdbLftjSyntheticScenario.TestQueryAccess(); + QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess); long count = 0; try (CloseableIteration iteration = evaluationStep.evaluate(EmptyBindingSet.getInstance())) { @@ -123,8 +99,8 @@ void evaluateShouldReusePatternScansAcrossBacktracking() { @Test void evaluateShouldReuseDerivedRelationsAcrossEquivalentPatterns() { - TestQueryAccess queryAccess = new TestQueryAccess(); - QueryEvaluationStep evaluationStep = createEvaluationStep(queryAccess); + LmdbLftjSyntheticScenario.TestQueryAccess queryAccess = new LmdbLftjSyntheticScenario.TestQueryAccess(); + QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess); long count = 0; try (CloseableIteration iteration = evaluationStep.evaluate(EmptyBindingSet.getInstance())) { @@ -141,8 +117,10 @@ void evaluateShouldReuseDerivedRelationsAcrossEquivalentPatterns() { @Test void evaluateShouldAvoidRecordScansForHiddenContextMultiplicity() { - TestQueryAccess queryAccess = TestQueryAccess.withDuplicateContexts(); - QueryEvaluationStep evaluationStep = createEvaluationStep(queryAccess, createPlanWithHiddenContexts()); + LmdbLftjSyntheticScenario.TestQueryAccess queryAccess = LmdbLftjSyntheticScenario.TestQueryAccess + .withDuplicateContexts(); + QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess, + LmdbLftjSyntheticScenario.createPlanWithHiddenContexts()); long count = 0; try (CloseableIteration iteration = evaluationStep.evaluate(EmptyBindingSet.getInstance())) { @@ -156,369 +134,4 @@ void evaluateShouldAvoidRecordScansForHiddenContextMultiplicity() { assertEquals(0, queryAccess.recordScanCalls, "hidden context multiplicity should come from cached frontier counts, not RecordIterator rescans"); } - - private QueryEvaluationStep createEvaluationStep(TestQueryAccess queryAccess) { - return createEvaluationStep(queryAccess, createPlan()); - } - - private QueryEvaluationStep createEvaluationStep(TestQueryAccess queryAccess, LmdbLftjPlan plan) { - QueryEvaluationContext context = new QueryEvaluationContext.Minimal((Dataset) null); - LmdbLftjEvaluationStrategy strategy = new LmdbLftjEvaluationStrategy( - new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess), - null, - null, - 0L, - new EvaluationStatistics(), - false, - DefaultCollectionFactory::new); - LmdbLftjExecutor executor = new LmdbLftjExecutor(strategy); - return executor.prepare(new LmdbLftjTupleExpr(plan), context); - } - - private LmdbLftjPlan createPlan() { - StatementPattern pattern1 = statementPattern("a", "b"); - StatementPattern pattern2 = statementPattern("b", "c"); - StatementPattern pattern3 = statementPattern("c", "a"); - TupleExpr fallbackExpr = new Join(new Join(pattern1.clone(), pattern2.clone()), pattern3.clone()); - return new LmdbLftjPlan( - fallbackExpr, - fallbackExpr.getBindingNames(), - fallbackExpr.getAssuredBindingNames(), - List.of("a", "b", "c"), - List.of( - new LmdbLftjPatternPlan(pattern1, "psoc"), - new LmdbLftjPatternPlan(pattern2, "psoc"), - new LmdbLftjPatternPlan(pattern3, "posc"))); - } - - private StatementPattern statementPattern(String subjectName, String objectName) { - return new StatementPattern( - new Var(subjectName), - new Var("pred", FOAF.KNOWS), - new Var(objectName)); - } - - private LmdbLftjPlan createPlanWithHiddenContexts() { - StatementPattern pattern1 = statementPattern("a", "b", "ctx1"); - StatementPattern pattern2 = statementPattern("b", "c", "ctx2"); - StatementPattern pattern3 = statementPattern("c", "a", "ctx3"); - TupleExpr fallbackExpr = new Join(new Join(pattern1.clone(), pattern2.clone()), pattern3.clone()); - return new LmdbLftjPlan( - fallbackExpr, - fallbackExpr.getBindingNames(), - fallbackExpr.getAssuredBindingNames(), - List.of("a", "b", "c"), - List.of( - new LmdbLftjPatternPlan(pattern1, "psoc"), - new LmdbLftjPatternPlan(pattern2, "psoc"), - new LmdbLftjPatternPlan(pattern3, "posc"))); - } - - private StatementPattern statementPattern(String subjectName, String objectName, String hiddenContextName) { - return new StatementPattern( - new Var(subjectName), - new Var("pred", FOAF.KNOWS), - new Var(objectName), - Var.of(hiddenContextName, true)); - } - - private static final class EmptyTripleSource implements TripleSource { - - @Override - public CloseableIteration getStatements(Resource subj, IRI pred, Value obj, - Resource... contexts) { - return new EmptyIteration<>(); - } - - @Override - public ValueFactory getValueFactory() { - return VF; - } - } - - private static final class TestQueryAccess implements LmdbQueryAccess { - - private final TxnManager txnManager = new TxnManager(0L, TxnManager.Mode.NONE); - private final TxnManager.Txn txn = txnManager.createTxn(1L); - private final List quads = new ArrayList<>(); - private final List valuesById = new ArrayList<>(); - - private int resolveValueCalls; - private int releaseReadTxnCalls; - private int openScanCalls; - private int recordScanCalls; - private int openTrieCursorCalls; - private int closedScanCalls; - - private TestQueryAccess() { - this(false); - } - - private TestQueryAccess(boolean duplicateContexts) { - valuesById.add(null); - valuesById.add(VF.createIRI("urn:person:1")); - valuesById.add(VF.createIRI("urn:person:2")); - valuesById.add(VF.createIRI("urn:person:3")); - valuesById.add(VF.createIRI("urn:person:4")); - valuesById.add(FOAF.KNOWS); - valuesById.add(VF.createIRI("urn:ctx:1")); - valuesById.add(VF.createIRI("urn:ctx:2")); - - for (long subject = 1; subject <= 4; subject++) { - for (long object = 1; object <= 4; object++) { - if (subject != object) { - quads.add(new long[] { subject, 5L, object, 0L }); - if (duplicateContexts) { - quads.add(new long[] { subject, 5L, object, 6L }); - quads.add(new long[] { subject, 5L, object, 7L }); - } - } - } - } - } - - private static TestQueryAccess withDuplicateContexts() { - return new TestQueryAccess(true); - } - - @Override - public TripleStore tripleStore() { - return null; - } - - @Override - public TxnManager.Txn acquireReadTxn() { - return txn; - } - - @Override - public void releaseReadTxn(TxnManager.Txn txn) { - assertTrue(txn == this.txn); - releaseReadTxnCalls++; - } - - @Override - public long resolveId(Value value) { - if (FOAF.KNOWS.equals(value)) { - return 5L; - } - - for (int i = 1; i < valuesById.size(); i++) { - if (value.equals(valuesById.get(i))) { - return i; - } - } - - return -1L; - } - - @Override - public Value resolveValue(long id) { - resolveValueCalls++; - return valuesById.get((int) id); - } - - @Override - public boolean includeInferred() { - return false; - } - - @Override - public Set configuredIndexes() { - return Set.of("psoc", "posc"); - } - - @Override - public RecordIterator openScan(TxnManager.Txn txn, String indexName, long subj, long pred, long obj, - long context, boolean explicit) { - openScanCalls++; - recordScanCalls++; - return new TestRecordIterator(quads, subj, pred, obj, context, this::recordClosedScan); - } - - @Override - public LmdbTrieKeyCursor openTrieCursor(TxnManager.Txn txn, String indexName, boolean explicit) { - openScanCalls++; - openTrieCursorCalls++; - return new TestTrieKeyCursor(quads, indexName, this::recordClosedScan); - } - - private void recordClosedScan() { - closedScanCalls++; - } - } - - private static final class TestRecordIterator implements RecordIterator { - - private final List quads; - private final long subj; - private final long pred; - private final long obj; - private final long context; - private final Runnable closeCallback; - - private int position; - private boolean closed; - - private TestRecordIterator(List quads, long subj, long pred, long obj, long context, - Runnable closeCallback) { - this.quads = quads; - this.subj = subj; - this.pred = pred; - this.obj = obj; - this.context = context; - this.closeCallback = closeCallback; - } - - @Override - public long[] next() { - while (position < quads.size()) { - long[] quad = quads.get(position++); - if (matches(subj, quad[0]) && matches(pred, quad[1]) && matches(obj, quad[2]) - && matches(context, quad[3])) { - return quad.clone(); - } - } - return null; - } - - private boolean matches(long expected, long actual) { - return expected < 0 || expected == actual; - } - - @Override - public void close() { - if (!closed) { - closed = true; - closeCallback.run(); - } - } - } - - private static final class TestTrieKeyCursor implements LmdbTrieKeyCursor { - - private final List quads; - private final int[] order; - private final Runnable closeCallback; - - private long[] lowerBound; - private long[] upperBound; - private int prefixLength; - private int position; - private long[] current; - private boolean closed; - - private TestTrieKeyCursor(List quads, String indexName, Runnable closeCallback) { - this.quads = quads.stream() - .map(long[]::clone) - .sorted(Comparator.comparingLong((long[] quad) -> quad[componentIndex(indexName, 0)]) - .thenComparingLong(quad -> quad[componentIndex(indexName, 1)]) - .thenComparingLong(quad -> quad[componentIndex(indexName, 2)]) - .thenComparingLong(quad -> quad[componentIndex(indexName, 3)])) - .toList(); - this.order = new int[] { - componentIndex(indexName, 0), - componentIndex(indexName, 1), - componentIndex(indexName, 2), - componentIndex(indexName, 3) - }; - this.closeCallback = closeCallback; - } - - @Override - public boolean position(long[] lowerBound, long[] upperBound, int prefixLength) { - this.lowerBound = lowerBound.clone(); - this.upperBound = upperBound.clone(); - this.prefixLength = prefixLength; - this.position = 0; - this.current = null; - - while (position < quads.size()) { - long[] quad = quads.get(position); - if (comparePrefix(quad, this.lowerBound, this.prefixLength) < 0) { - position++; - continue; - } - if (compare(quad, this.upperBound) > 0) { - return false; - } - current = quad; - return true; - } - return false; - } - - @Override - public boolean next() { - if (current == null) { - return false; - } - - while (++position < quads.size()) { - long[] quad = quads.get(position); - if (compare(quad, upperBound) > 0) { - current = null; - return false; - } - current = quad; - return true; - } - - current = null; - return false; - } - - @Override - public boolean isPositioned() { - return current != null; - } - - @Override - public long valueAt(int keyFieldIndex) { - return current[order[keyFieldIndex]]; - } - - @Override - public void close() { - if (!closed) { - closed = true; - closeCallback.run(); - } - } - - private int compare(long[] quad, long[] bounds) { - for (int component : order) { - int comparison = Long.compare(quad[component], bounds[component]); - if (comparison != 0) { - return comparison; - } - } - return 0; - } - - private int comparePrefix(long[] quad, long[] bounds, int prefixLength) { - for (int i = 0; i < prefixLength; i++) { - int comparison = Long.compare(quad[order[i]], bounds[order[i]]); - if (comparison != 0) { - return comparison; - } - } - return 0; - } - - private static int componentIndex(String indexName, int keyFieldIndex) { - switch (indexName.charAt(keyFieldIndex)) { - case 's': - return 0; - case 'p': - return 1; - case 'o': - return 2; - case 'c': - return 3; - default: - throw new IllegalArgumentException("Unsupported LMDB index field: " + indexName.charAt(keyFieldIndex)); - } - } - } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSpecializationTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSpecializationTest.java new file mode 100644 index 0000000000..0e772a0cdf --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSpecializationTest.java @@ -0,0 +1,89 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.Dataset; +import org.eclipse.rdf4j.query.algebra.Join; +import org.eclipse.rdf4j.query.algebra.QueryRoot; +import org.eclipse.rdf4j.query.algebra.StatementPattern; +import org.eclipse.rdf4j.query.algebra.TupleExpr; +import org.eclipse.rdf4j.query.algebra.Var; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; +import org.eclipse.rdf4j.query.impl.EmptyBindingSet; +import org.junit.jupiter.api.Test; + +class LmdbLftjSpecializationTest { + + @Test + void evaluateShouldBuildOneDerivedRelationForSamePredicateCycle() { + LmdbLftjSyntheticScenario.TestQueryAccess queryAccess = new LmdbLftjSyntheticScenario.TestQueryAccess(); + QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess); + + long count = 0; + try (CloseableIteration iteration = evaluationStep.evaluate(EmptyBindingSet.getInstance())) { + while (iteration.hasNext()) { + iteration.next(); + count++; + } + } + + assertTrue(count > 0, "sanity check: the synthetic cycle must still produce rows"); + assertEquals(2, queryAccess.openTrieCursorCalls, + "same-predicate cycle specialization should build one reusable relation from one forward and one reverse scan"); + } + + @Test + void optimizerShouldFallbackForRepeatedVariablePattern() { + LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer(new LmdbLftjTripleSource( + new LmdbLftjSyntheticScenario.EmptyTripleSource(), + new OptimizerQueryAccess())); + + StatementPattern repeated = new StatementPattern( + new Var("a"), + new Var("pred", LmdbLftjSyntheticScenario.VF.createIRI("urn:p")), + new Var("a")); + StatementPattern second = new StatementPattern( + new Var("a"), + new Var("pred2", LmdbLftjSyntheticScenario.VF.createIRI("urn:q")), + new Var("b")); + StatementPattern third = new StatementPattern( + new Var("b"), + new Var("pred3", LmdbLftjSyntheticScenario.VF.createIRI("urn:r")), + new Var("c")); + TupleExpr tupleExpr = new QueryRoot(new Join(new Join(repeated, second), third)); + + optimizer.optimize(tupleExpr, (Dataset) null, EmptyBindingSet.getInstance()); + + assertFalse(((QueryRoot) tupleExpr).getArg() instanceof LmdbLftjTupleExpr, + "repeated visible variables must stay on the generic evaluator path"); + } + + private static final class OptimizerQueryAccess extends LmdbLftjSyntheticScenario.TestQueryAccess { + + @Override + public RecordIterator openScan(TxnManager.Txn txn, String indexName, long subj, long pred, long obj, + long context, boolean explicit) { + throw new UnsupportedOperationException(); + } + + @Override + public LmdbTrieKeyCursor openTrieCursor(TxnManager.Txn txn, String indexName, boolean explicit) { + throw new UnsupportedOperationException(); + } + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSyntheticScenario.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSyntheticScenario.java new file mode 100644 index 0000000000..9d89596ce1 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSyntheticScenario.java @@ -0,0 +1,413 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Set; + +import org.eclipse.rdf4j.collection.factory.impl.DefaultCollectionFactory; +import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.common.iteration.EmptyIteration; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.vocabulary.FOAF; +import org.eclipse.rdf4j.query.Dataset; +import org.eclipse.rdf4j.query.algebra.Join; +import org.eclipse.rdf4j.query.algebra.StatementPattern; +import org.eclipse.rdf4j.query.algebra.TupleExpr; +import org.eclipse.rdf4j.query.algebra.Var; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; +import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; + +final class LmdbLftjSyntheticScenario { + + static final ValueFactory VF = SimpleValueFactory.getInstance(); + + private LmdbLftjSyntheticScenario() { + } + + static QueryEvaluationStep createEvaluationStep(TestQueryAccess queryAccess) { + return createEvaluationStep(queryAccess, createPlan()); + } + + static QueryEvaluationStep createEvaluationStep(TestQueryAccess queryAccess, LmdbLftjPlan plan) { + QueryEvaluationContext context = new QueryEvaluationContext.Minimal((Dataset) null); + LmdbLftjEvaluationStrategy strategy = new LmdbLftjEvaluationStrategy( + new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess), + null, + null, + 0L, + new EvaluationStatistics(), + false, + DefaultCollectionFactory::new); + LmdbLftjExecutor executor = new LmdbLftjExecutor(strategy); + return executor.prepare(new LmdbLftjTupleExpr(plan), context); + } + + static LmdbLftjPlan createPlan() { + return createPlan("psoc", "psoc", "posc"); + } + + static LmdbLftjPlan createPlanWithHiddenContexts() { + StatementPattern pattern1 = statementPattern("a", "b", "ctx1"); + StatementPattern pattern2 = statementPattern("b", "c", "ctx2"); + StatementPattern pattern3 = statementPattern("c", "a", "ctx3"); + TupleExpr fallbackExpr = new Join(new Join(pattern1.clone(), pattern2.clone()), pattern3.clone()); + return new LmdbLftjPlan( + fallbackExpr, + fallbackExpr.getBindingNames(), + fallbackExpr.getAssuredBindingNames(), + List.of("a", "b", "c"), + List.of( + new LmdbLftjPatternPlan(pattern1, "psoc"), + new LmdbLftjPatternPlan(pattern2, "psoc"), + new LmdbLftjPatternPlan(pattern3, "posc"))); + } + + static LmdbLftjPlan createPlan(String firstIndex, String secondIndex, String thirdIndex) { + StatementPattern pattern1 = statementPattern("a", "b"); + StatementPattern pattern2 = statementPattern("b", "c"); + StatementPattern pattern3 = statementPattern("c", "a"); + TupleExpr fallbackExpr = new Join(new Join(pattern1.clone(), pattern2.clone()), pattern3.clone()); + return new LmdbLftjPlan( + fallbackExpr, + fallbackExpr.getBindingNames(), + fallbackExpr.getAssuredBindingNames(), + List.of("a", "b", "c"), + List.of( + new LmdbLftjPatternPlan(pattern1, firstIndex), + new LmdbLftjPatternPlan(pattern2, secondIndex), + new LmdbLftjPatternPlan(pattern3, thirdIndex))); + } + + static StatementPattern statementPattern(String subjectName, String objectName) { + return new StatementPattern(new Var(subjectName), new Var("pred", FOAF.KNOWS), new Var(objectName)); + } + + static StatementPattern statementPattern(String subjectName, String objectName, String hiddenContextName) { + return new StatementPattern( + new Var(subjectName), + new Var("pred", FOAF.KNOWS), + new Var(objectName), + Var.of(hiddenContextName, true)); + } + + static final class EmptyTripleSource implements TripleSource { + + @Override + public CloseableIteration getStatements(Resource subj, IRI pred, Value obj, + Resource... contexts) { + return new EmptyIteration<>(); + } + + @Override + public ValueFactory getValueFactory() { + return VF; + } + } + + static class TestQueryAccess implements LmdbQueryAccess { + + private final TxnManager txnManager = new TxnManager(0L, TxnManager.Mode.NONE); + private final TxnManager.Txn txn = txnManager.createTxn(1L); + private final List quads = new ArrayList<>(); + private final List valuesById = new ArrayList<>(); + + int resolveValueCalls; + int releaseReadTxnCalls; + int openScanCalls; + int recordScanCalls; + int openTrieCursorCalls; + int closedScanCalls; + + TestQueryAccess() { + this(false); + } + + TestQueryAccess(boolean duplicateContexts) { + valuesById.add(null); + valuesById.add(VF.createIRI("urn:person:1")); + valuesById.add(VF.createIRI("urn:person:2")); + valuesById.add(VF.createIRI("urn:person:3")); + valuesById.add(VF.createIRI("urn:person:4")); + valuesById.add(FOAF.KNOWS); + valuesById.add(VF.createIRI("urn:ctx:1")); + valuesById.add(VF.createIRI("urn:ctx:2")); + + for (long subject = 1; subject <= 4; subject++) { + for (long object = 1; object <= 4; object++) { + if (subject != object) { + quads.add(new long[] { subject, 5L, object, 0L }); + if (duplicateContexts) { + quads.add(new long[] { subject, 5L, object, 6L }); + quads.add(new long[] { subject, 5L, object, 7L }); + } + } + } + } + } + + static TestQueryAccess withDuplicateContexts() { + return new TestQueryAccess(true); + } + + @Override + public TripleStore tripleStore() { + return null; + } + + @Override + public TxnManager.Txn acquireReadTxn() { + return txn; + } + + @Override + public void releaseReadTxn(TxnManager.Txn txn) { + assertTrue(txn == this.txn); + releaseReadTxnCalls++; + } + + @Override + public long resolveId(Value value) { + if (FOAF.KNOWS.equals(value)) { + return 5L; + } + + for (int i = 1; i < valuesById.size(); i++) { + if (value.equals(valuesById.get(i))) { + return i; + } + } + + return -1L; + } + + @Override + public Value resolveValue(long id) { + resolveValueCalls++; + return valuesById.get((int) id); + } + + @Override + public boolean includeInferred() { + return false; + } + + @Override + public Set configuredIndexes() { + return Set.of("psoc", "posc"); + } + + @Override + public RecordIterator openScan(TxnManager.Txn txn, String indexName, long subj, long pred, long obj, + long context, boolean explicit) { + openScanCalls++; + recordScanCalls++; + return new TestRecordIterator(quads, subj, pred, obj, context, this::recordClosedScan); + } + + @Override + public LmdbTrieKeyCursor openTrieCursor(TxnManager.Txn txn, String indexName, boolean explicit) { + openScanCalls++; + openTrieCursorCalls++; + return new TestTrieKeyCursor(quads, indexName, this::recordClosedScan); + } + + private void recordClosedScan() { + closedScanCalls++; + } + } + + private static final class TestRecordIterator implements RecordIterator { + + private final List quads; + private final long subj; + private final long pred; + private final long obj; + private final long context; + private final Runnable closeCallback; + + private int position; + private boolean closed; + + private TestRecordIterator(List quads, long subj, long pred, long obj, long context, + Runnable closeCallback) { + this.quads = quads; + this.subj = subj; + this.pred = pred; + this.obj = obj; + this.context = context; + this.closeCallback = closeCallback; + } + + @Override + public long[] next() { + while (position < quads.size()) { + long[] quad = quads.get(position++); + if (matches(subj, quad[0]) && matches(pred, quad[1]) && matches(obj, quad[2]) + && matches(context, quad[3])) { + return quad.clone(); + } + } + return null; + } + + private boolean matches(long expected, long actual) { + return expected < 0 || expected == actual; + } + + @Override + public void close() { + if (!closed) { + closed = true; + closeCallback.run(); + } + } + } + + private static final class TestTrieKeyCursor implements LmdbTrieKeyCursor { + + private final List quads; + private final int[] order; + private final Runnable closeCallback; + + private long[] lowerBound; + private long[] upperBound; + private int prefixLength; + private int position; + private long[] current; + private boolean closed; + + private TestTrieKeyCursor(List quads, String indexName, Runnable closeCallback) { + this.quads = quads.stream() + .map(long[]::clone) + .sorted(Comparator.comparingLong((long[] quad) -> quad[componentIndex(indexName, 0)]) + .thenComparingLong(quad -> quad[componentIndex(indexName, 1)]) + .thenComparingLong(quad -> quad[componentIndex(indexName, 2)]) + .thenComparingLong(quad -> quad[componentIndex(indexName, 3)])) + .toList(); + this.order = new int[] { + componentIndex(indexName, 0), + componentIndex(indexName, 1), + componentIndex(indexName, 2), + componentIndex(indexName, 3) + }; + this.closeCallback = closeCallback; + } + + @Override + public boolean position(long[] lowerBound, long[] upperBound, int prefixLength) { + this.lowerBound = lowerBound.clone(); + this.upperBound = upperBound.clone(); + this.prefixLength = prefixLength; + this.position = 0; + this.current = null; + + while (position < quads.size()) { + long[] quad = quads.get(position); + if (comparePrefix(quad, this.lowerBound, this.prefixLength) < 0) { + position++; + continue; + } + if (compare(quad, this.upperBound) > 0) { + return false; + } + current = quad; + return true; + } + return false; + } + + @Override + public boolean next() { + if (current == null) { + return false; + } + + while (++position < quads.size()) { + long[] quad = quads.get(position); + if (compare(quad, upperBound) > 0) { + current = null; + return false; + } + current = quad; + return true; + } + + current = null; + return false; + } + + @Override + public boolean isPositioned() { + return current != null; + } + + @Override + public long valueAt(int keyFieldIndex) { + return current[order[keyFieldIndex]]; + } + + @Override + public void close() { + if (!closed) { + closed = true; + closeCallback.run(); + } + } + + private int compare(long[] quad, long[] bounds) { + for (int component : order) { + int comparison = Long.compare(quad[component], bounds[component]); + if (comparison != 0) { + return comparison; + } + } + return 0; + } + + private int comparePrefix(long[] quad, long[] bounds, int prefixLength) { + for (int i = 0; i < prefixLength; i++) { + int comparison = Long.compare(quad[order[i]], bounds[order[i]]); + if (comparison != 0) { + return comparison; + } + } + return 0; + } + + private static int componentIndex(String indexName, int keyFieldIndex) { + switch (indexName.charAt(keyFieldIndex)) { + case 's': + return 0; + case 'p': + return 1; + case 'o': + return 2; + case 'c': + return 3; + default: + throw new IllegalArgumentException("Unsupported LMDB index field: " + indexName.charAt(keyFieldIndex)); + } + } + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java index ba7e74cde4..15a5a1102c 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java @@ -28,6 +28,20 @@ class FoafCliqueLftjCorrectnessTest { @Test void cycle3ShouldMatchRegularJoinCount(@TempDir File disabledDir, @TempDir File enabledDir) { + assertCycleCountMatches(disabledDir, enabledDir, 3); + } + + @Test + void cycle4ShouldMatchRegularJoinCount(@TempDir File disabledDir, @TempDir File enabledDir) { + assertCycleCountMatches(disabledDir, enabledDir, 4); + } + + @Test + void cycle5ShouldMatchRegularJoinCount(@TempDir File disabledDir, @TempDir File enabledDir) { + assertCycleCountMatches(disabledDir, enabledDir, 5); + } + + private void assertCycleCountMatches(File disabledDir, File enabledDir, int cycleSize) { Repository disabledRepository = createRepository(disabledDir, false); Repository enabledRepository = createRepository(enabledDir, true); @@ -35,10 +49,10 @@ void cycle3ShouldMatchRegularJoinCount(@TempDir File disabledDir, @TempDir File populate(disabledRepository); populate(enabledRepository); - long expected = executeCount(disabledRepository, cycleQuery(3)); - long actual = executeCount(enabledRepository, cycleQuery(3)); + long expected = executeCount(disabledRepository, cycleQuery(cycleSize)); + long actual = executeCount(enabledRepository, cycleQuery(cycleSize)); - assertEquals(expected, actual, "LFTJ must preserve the cycle3 result count"); + assertEquals(expected, actual, "LFTJ must preserve the cycle" + cycleSize + " result count"); } finally { disabledRepository.shutDown(); enabledRepository.shutDown(); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md index 885b80bb0e..5aa9ab2f70 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md @@ -1,7 +1,7 @@ Benchmark (cliquePercentage) (lftjEnabled) (maxCliqueSize) (minCliqueSize) (peopleCount) (randomKnowsEdges) (seed) Mode Cnt Score Error Units -FoafCliqueQueryBenchmark.cycle3 30 true 8 3 5000 15000 12345 avgt 3 36.818 ± 43.052 ms/op -FoafCliqueQueryBenchmark.cycle3 30 false 8 3 5000 15000 12345 avgt 3 90.331 ± 3.032 ms/op -FoafCliqueQueryBenchmark.cycle4 30 true 8 3 5000 15000 12345 avgt 3 180.250 ± 30.860 ms/op -FoafCliqueQueryBenchmark.cycle4 30 false 8 3 5000 15000 12345 avgt 3 618.881 ± 51.191 ms/op -FoafCliqueQueryBenchmark.cycle5 30 true 8 3 5000 15000 12345 avgt 3 1260.939 ± 263.311 ms/op -FoafCliqueQueryBenchmark.cycle5 30 false 8 3 5000 15000 12345 avgt 3 3751.511 ± 141.681 ms/op +FoafCliqueQueryBenchmark.cycle3 30 true 8 3 5000 15000 12345 avgt 3 30.297 ± 7.495 ms/op +FoafCliqueQueryBenchmark.cycle3 30 false 8 3 5000 15000 12345 avgt 3 88.044 ± 24.824 ms/op +FoafCliqueQueryBenchmark.cycle4 30 true 8 3 5000 15000 12345 avgt 3 173.018 ± 254.982 ms/op +FoafCliqueQueryBenchmark.cycle4 30 false 8 3 5000 15000 12345 avgt 3 569.874 ± 159.548 ms/op +FoafCliqueQueryBenchmark.cycle5 30 true 8 3 5000 15000 12345 avgt 3 1093.809 ± 240.403 ms/op +FoafCliqueQueryBenchmark.cycle5 30 false 8 3 5000 15000 12345 avgt 3 3815.727 ± 256.018 ms/op From 08892f53cd4a035417384fb3398808aa70b0102d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 17:41:24 +0200 Subject: [PATCH 16/32] new best --- core/sail/lmdb/pom.xml | 5 + .../AbstractLmdbCompiledLftjIteration.java | 103 ++++++ .../sail/lmdb/LmdbCompiledLftjFactory.java | 23 ++ .../sail/lmdb/LmdbDerivedBinaryRelation.java | 12 + .../rdf4j/sail/lmdb/LmdbLftjBindingState.java | 10 +- .../rdf4j/sail/lmdb/LmdbLftjCodegenCache.java | 73 +++++ .../sail/lmdb/LmdbLftjCodegenCompiler.java | 306 ++++++++++++++++++ .../rdf4j/sail/lmdb/LmdbLftjCursor.java | 2 +- .../sail/lmdb/LmdbLftjExecutionShape.java | 52 +++ .../rdf4j/sail/lmdb/LmdbLftjExecutor.java | 31 +- .../rdf4j/sail/lmdb/LmdbLftjMetrics.java | 2 +- .../eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java | 52 ++- .../sail/lmdb/LmdbPrefixFrontierProvider.java | 306 ++++++++++++++---- .../rdf4j/sail/lmdb/LmdbQueryAccess.java | 22 +- .../eclipse/rdf4j/sail/lmdb/LmdbStore.java | 6 + .../rdf4j/sail/lmdb/LmdbStoreConnection.java | 20 ++ .../sail/lmdb/config/LmdbStoreConfig.java | 25 ++ .../sail/lmdb/config/LmdbStoreSchema.java | 6 + .../rdf4j/sail/lmdb/LmdbLftjCodegenTest.java | 247 ++++++++++++++ .../sail/lmdb/LmdbLftjExecutorBenchmark.java | 36 ++- .../FoafCliqueLftjCorrectnessTest.java | 42 ++- .../benchmark/FoafCliqueQueryBenchmark.java | 9 +- .../FoafCliqueQueryBenchmarkResults.md | 20 +- .../sail/lmdb/config/LmdbStoreConfigTest.java | 19 ++ 24 files changed, 1326 insertions(+), 103 deletions(-) create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbCompiledLftjIteration.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCompiledLftjFactory.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCache.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCompiler.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java diff --git a/core/sail/lmdb/pom.xml b/core/sail/lmdb/pom.xml index 702da7d065..be4786316b 100644 --- a/core/sail/lmdb/pom.xml +++ b/core/sail/lmdb/pom.xml @@ -151,6 +151,11 @@ com.google.guava guava + + org.codehaus.janino + janino + 3.1.12 + ${project.groupId} rdf4j-sail-testsuite diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbCompiledLftjIteration.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbCompiledLftjIteration.java new file mode 100644 index 0000000000..ecfd434ecc --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbCompiledLftjIteration.java @@ -0,0 +1,103 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import org.eclipse.rdf4j.common.iteration.LookAheadIteration; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.QueryEvaluationException; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; + +public abstract class AbstractLmdbCompiledLftjIteration extends LookAheadIteration { + + private final LmdbLftjPlan plan; + private final LmdbLftjExecutionShape shape; + private final LmdbLftjBindingState state; + private final QueryEvaluationContext context; + private final LmdbLftjMetrics metrics; + private final LmdbPrefixFrontierProvider frontierProvider; + + private BindingSet repeatedBinding; + private long repeatedCount; + + protected AbstractLmdbCompiledLftjIteration(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, + LmdbLftjBindingState state, + QueryEvaluationContext context, LmdbQueryAccess queryAccess, LmdbLftjMetrics metrics) { + this.plan = plan; + this.shape = shape; + this.state = state; + this.context = context; + this.metrics = metrics; + this.frontierProvider = new LmdbPrefixFrontierProvider(queryAccess, state, metrics); + } + + @Override + protected final BindingSet getNextElement() { + if (repeatedCount > 0) { + repeatedCount--; + metrics.recordEmitted(1); + return repeatedBinding; + } + + try { + return computeNextElement(); + } catch (RuntimeException e) { + throw new QueryEvaluationException("LMDB LFTJ compiled iteration failed", e); + } + } + + @Override + protected final void handleClose() { + closeCursors(); + state.close(); + } + + protected abstract BindingSet computeNextElement(); + + protected abstract void closeCursors(); + + protected final LmdbLftjBindingState state() { + return state; + } + + protected final LmdbLftjMetrics metrics() { + return metrics; + } + + protected final void recordCandidateScan() { + metrics.recordCandidateScan(); + } + + protected final boolean isFixed(int slot) { + return state.isFixed(slot); + } + + protected final LmdbLftjCursor createCursor(int patternOrdinal) { + return new LmdbCachedTrieCursor(plan.patternPlans().get(patternOrdinal), frontierProvider); + } + + protected final long countMatches(int patternOrdinal) { + metrics.recordWitnessScan(); + return frontierProvider.countMatches(plan.patternPlans().get(patternOrdinal)); + } + + protected final BindingSet emitCurrent(long multiplicity) { + BindingSet result = state.materialize(context); + repeatedBinding = result; + repeatedCount = multiplicity - 1; + metrics.recordEmitted(1); + return result; + } + + protected final LmdbLftjExecutionShape shape() { + return shape; + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCompiledLftjFactory.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCompiledLftjFactory.java new file mode 100644 index 0000000000..07c3085bc7 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCompiledLftjFactory.java @@ -0,0 +1,23 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; + +public interface LmdbCompiledLftjFactory { + + CloseableIteration create(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, + LmdbLftjBindingState state, QueryEvaluationContext context, LmdbQueryAccess queryAccess, + LmdbLftjMetrics metrics); +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbDerivedBinaryRelation.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbDerivedBinaryRelation.java index 1a800c2744..4a3e10ffbc 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbDerivedBinaryRelation.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbDerivedBinaryRelation.java @@ -131,6 +131,18 @@ static final class RelationKey { this.predicateId = predicateId; } + String indexName() { + return indexName; + } + + boolean includeInferred() { + return includeInferred; + } + + long predicateId() { + return predicateId; + } + @Override public boolean equals(Object other) { if (!(other instanceof RelationKey)) { diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java index 93adb8dac3..a4f9fdff10 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java @@ -20,7 +20,7 @@ import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue; -final class LmdbLftjBindingState { +public final class LmdbLftjBindingState { private final LmdbLftjPlan plan; private final BindingSet inputBindings; @@ -95,6 +95,10 @@ boolean isBound(int slot) { return assignedPresent[slot] || fixedPresent[slot]; } + boolean isFixed(int slot) { + return fixedPresent[slot]; + } + long value(String variableName) { return value(slot(variableName)); } @@ -110,7 +114,7 @@ void assign(String variableName, long value) { assign(slot(variableName), value); } - void assign(int slot, long value) { + public void assign(int slot, long value) { assignedValues[slot] = value; assignedPresent[slot] = true; } @@ -119,7 +123,7 @@ void clear(String variableName) { clear(slot(variableName)); } - void clear(int slot) { + public void clear(int slot) { assignedPresent[slot] = false; } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCache.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCache.java new file mode 100644 index 0000000000..f40b4b7aa3 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCache.java @@ -0,0 +1,73 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.LinkedHashMap; +import java.util.Map; + +final class LmdbLftjCodegenCache { + + private static final int MAX_ENTRIES = 256; + + private final Map entries = new LinkedHashMap<>(32, 0.75f, true) { + @Override + protected boolean removeEldestEntry(Map.Entry eldest) { + return size() > MAX_ENTRIES; + } + }; + + synchronized CacheEntry get(String executionKey) { + return entries.get(executionKey); + } + + synchronized void putSuccess(String executionKey, LmdbCompiledLftjFactory factory) { + entries.put(executionKey, CacheEntry.success(factory)); + } + + synchronized void putFailure(String executionKey, String message) { + entries.put(executionKey, CacheEntry.failure(message)); + } + + synchronized void clear() { + entries.clear(); + } + + static final class CacheEntry { + private final LmdbCompiledLftjFactory factory; + private final String failureMessage; + + private CacheEntry(LmdbCompiledLftjFactory factory, String failureMessage) { + this.factory = factory; + this.failureMessage = failureMessage; + } + + static CacheEntry success(LmdbCompiledLftjFactory factory) { + return new CacheEntry(factory, null); + } + + static CacheEntry failure(String failureMessage) { + return new CacheEntry(null, failureMessage); + } + + boolean compiled() { + return factory != null; + } + + LmdbCompiledLftjFactory factory() { + return factory; + } + + String failureMessage() { + return failureMessage; + } + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCompiler.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCompiler.java new file mode 100644 index 0000000000..019ba74635 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCompiler.java @@ -0,0 +1,306 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.concurrent.atomic.AtomicLong; + +import org.codehaus.janino.SimpleCompiler; + +class LmdbLftjCodegenCompiler { + + static final LmdbLftjCodegenCompiler INSTANCE = new LmdbLftjCodegenCompiler(); + + private static final AtomicLong CLASS_COUNTER = new AtomicLong(); + private static final String PACKAGE_NAME = LmdbLftjCodegenCompiler.class.getPackageName(); + + LmdbCompiledLftjFactory compile(LmdbLftjPlan plan, LmdbLftjExecutionShape shape) { + String simpleClassName = "GeneratedLmdbLftjFactory" + CLASS_COUNTER.incrementAndGet(); + String source = new SourceBuilder(simpleClassName, shape).build(); + try { + SimpleCompiler compiler = new SimpleCompiler(); + compiler.setParentClassLoader(LmdbLftjCodegenCompiler.class.getClassLoader()); + compiler.cook(source); + Class compiledClass = compiler.getClassLoader().loadClass(PACKAGE_NAME + "." + simpleClassName); + var constructor = compiledClass.getDeclaredConstructor(); + constructor.setAccessible(true); + return (LmdbCompiledLftjFactory) constructor.newInstance(); + } catch (Exception e) { + throw new IllegalArgumentException("Unable to compile LMDB LFTJ codegen for " + plan.executionKey(), e); + } + } + + private static final class SourceBuilder { + + private final String simpleClassName; + private final LmdbLftjExecutionShape shape; + + private SourceBuilder(String simpleClassName, LmdbLftjExecutionShape shape) { + this.simpleClassName = simpleClassName; + this.shape = shape; + } + + private String build() { + StringBuilder source = new StringBuilder(); + source.append("package ").append(PACKAGE_NAME).append(";\n\n"); + source.append("import org.eclipse.rdf4j.common.iteration.CloseableIteration;\n"); + source.append("import org.eclipse.rdf4j.query.BindingSet;\n"); + source.append("import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext;\n\n"); + source.append("final class ").append(simpleClassName).append(" implements LmdbCompiledLftjFactory {\n"); + appendFactoryMethod(source); + appendIterationClass(source); + source.append("}\n"); + return source.toString(); + } + + private void appendFactoryMethod(StringBuilder source) { + source.append(" @Override\n"); + source.append( + " public CloseableIteration create(LmdbLftjPlan plan, LmdbLftjExecutionShape shape,\n"); + source.append( + " LmdbLftjBindingState state, QueryEvaluationContext context, LmdbQueryAccess queryAccess,\n"); + source.append(" LmdbLftjMetrics metrics) {\n"); + source.append(" return new Iteration(plan, shape, state, context, queryAccess, metrics);\n"); + source.append(" }\n\n"); + } + + private void appendIterationClass(StringBuilder source) { + int variableCount = shape.variableCount(); + source.append(" private static final class Iteration extends AbstractLmdbCompiledLftjIteration {\n"); + for (int patternOrdinal = 0; patternOrdinal < shape.patternCount(); patternOrdinal++) { + source.append(" private final LmdbLftjCursor cursor").append(patternOrdinal).append(";\n"); + } + for (int slot = 0; slot < variableCount; slot++) { + source.append(" private boolean depth").append(slot).append("Initialized;\n"); + source.append(" private boolean depth").append(slot).append("Advance;\n"); + } + source.append(" private int depth;\n\n"); + + source.append( + " private Iteration(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, LmdbLftjBindingState state,\n"); + source.append( + " QueryEvaluationContext context, LmdbQueryAccess queryAccess, LmdbLftjMetrics metrics) {\n"); + source.append(" super(plan, shape, state, context, queryAccess, metrics);\n"); + for (int patternOrdinal = 0; patternOrdinal < shape.patternCount(); patternOrdinal++) { + source.append(" this.cursor") + .append(patternOrdinal) + .append(" = createCursor(") + .append(patternOrdinal) + .append(");\n"); + } + source.append(" this.depth = firstDepth();\n"); + source.append(" }\n\n"); + + source.append(" @Override\n"); + source.append(" protected BindingSet computeNextElement() {\n"); + source.append(" while (depth >= 0) {\n"); + source.append(" if (depth == ").append(variableCount).append(") {\n"); + source.append(" long multiplicity = 1L;\n"); + for (int patternOrdinal = 0; patternOrdinal < shape.patternCount(); patternOrdinal++) { + source.append(" long witnesses") + .append(patternOrdinal) + .append(" = countMatches(") + .append(patternOrdinal) + .append(");\n"); + source.append(" if (witnesses").append(patternOrdinal).append(" == 0L) {\n"); + source.append(" backtrackAfterLeaf();\n"); + source.append(" continue;\n"); + source.append(" }\n"); + source.append(" multiplicity = Math.multiplyExact(multiplicity, witnesses") + .append(patternOrdinal) + .append(");\n"); + } + source.append(" backtrackAfterLeaf();\n"); + source.append(" if (multiplicity > 0L) {\n"); + source.append(" return emitCurrent(multiplicity);\n"); + source.append(" }\n"); + source.append(" continue;\n"); + source.append(" }\n"); + source.append(" switch (depth) {\n"); + for (int slot = 0; slot < variableCount; slot++) { + appendDepthCase(source, slot); + } + source.append(" default:\n"); + source.append(" return null;\n"); + source.append(" }\n"); + source.append(" }\n"); + source.append(" return null;\n"); + source.append(" }\n\n"); + + appendFirstDepth(source, variableCount); + appendBacktrackAfterLeaf(source, variableCount); + appendBacktrackFromDepth(source, variableCount); + for (int slot = 0; slot < variableCount; slot++) { + appendReleaseDepth(source, slot); + appendPositionDepth(source, slot, shape.cursorOrdinals(slot)); + } + + source.append(" @Override\n"); + source.append(" protected void closeCursors() {\n"); + for (int patternOrdinal = 0; patternOrdinal < shape.patternCount(); patternOrdinal++) { + source.append(" cursor").append(patternOrdinal).append(".close();\n"); + } + source.append(" }\n"); + source.append(" }\n"); + } + + private void appendDepthCase(StringBuilder source, int slot) { + source.append(" case ").append(slot).append(":\n"); + source.append(" if (isFixed(").append(slot).append(")) {\n"); + source.append(" depth = ").append(slot + 1).append(";\n"); + source.append(" continue;\n"); + source.append(" }\n"); + source.append(" if (!depth").append(slot).append("Initialized) {\n"); + source.append(" if (!positionDepth").append(slot).append("(false)) {\n"); + source.append(" backtrackFromDepth(").append(slot).append(");\n"); + source.append(" continue;\n"); + source.append(" }\n"); + source.append(" depth").append(slot).append("Initialized = true;\n"); + source.append(" depth = ").append(slot + 1).append(";\n"); + source.append(" continue;\n"); + source.append(" }\n"); + source.append(" if (depth").append(slot).append("Advance) {\n"); + source.append(" if (!positionDepth").append(slot).append("(true)) {\n"); + source.append(" backtrackFromDepth(").append(slot).append(");\n"); + source.append(" continue;\n"); + source.append(" }\n"); + source.append(" depth").append(slot).append("Advance = false;\n"); + source.append(" depth = ").append(slot + 1).append(";\n"); + source.append(" continue;\n"); + source.append(" }\n"); + source.append(" depth = ").append(slot + 1).append(";\n"); + source.append(" continue;\n"); + } + + private void appendFirstDepth(StringBuilder source, int variableCount) { + source.append(" private int firstDepth() {\n"); + for (int slot = 0; slot < variableCount; slot++) { + source.append(" if (!isFixed(").append(slot).append(")) {\n"); + source.append(" return ").append(slot).append(";\n"); + source.append(" }\n"); + } + source.append(" return ").append(variableCount).append(";\n"); + source.append(" }\n\n"); + } + + private void appendBacktrackAfterLeaf(StringBuilder source, int variableCount) { + source.append(" private void backtrackAfterLeaf() {\n"); + for (int slot = variableCount - 1; slot >= 0; slot--) { + source.append(" if (!isFixed(").append(slot).append(")) {\n"); + source.append(" depth").append(slot).append("Advance = true;\n"); + source.append(" depth = ").append(slot).append(";\n"); + source.append(" return;\n"); + source.append(" }\n"); + } + source.append(" depth = -1;\n"); + source.append(" }\n\n"); + } + + private void appendBacktrackFromDepth(StringBuilder source, int variableCount) { + source.append(" private void backtrackFromDepth(int failedDepth) {\n"); + source.append(" switch (failedDepth) {\n"); + for (int failedDepth = 0; failedDepth < variableCount; failedDepth++) { + source.append(" case ").append(failedDepth).append(":\n"); + source.append(" releaseDepth").append(failedDepth).append("();\n"); + for (int slot = failedDepth - 1; slot >= 0; slot--) { + source.append(" if (!isFixed(").append(slot).append(")) {\n"); + source.append(" depth").append(slot).append("Advance = true;\n"); + source.append(" depth = ").append(slot).append(";\n"); + source.append(" return;\n"); + source.append(" }\n"); + } + source.append(" depth = -1;\n"); + source.append(" return;\n"); + } + source.append(" default:\n"); + source.append(" depth = -1;\n"); + source.append(" }\n"); + source.append(" }\n\n"); + } + + private void appendReleaseDepth(StringBuilder source, int slot) { + source.append(" private void releaseDepth").append(slot).append("() {\n"); + source.append(" state().clear(").append(slot).append(");\n"); + source.append(" depth").append(slot).append("Initialized = false;\n"); + source.append(" depth").append(slot).append("Advance = false;\n"); + for (int cursorOrdinal : shape.cursorOrdinals(slot)) { + source.append(" cursor").append(cursorOrdinal).append(".release(").append(slot).append(");\n"); + } + source.append(" }\n\n"); + } + + private void appendPositionDepth(StringBuilder source, int slot, int[] cursorOrdinals) { + source.append(" private boolean positionDepth").append(slot).append("(boolean advanceExisting) {\n"); + source.append(" state().clear(").append(slot).append(");\n"); + if (cursorOrdinals.length == 0) { + source.append(" return false;\n"); + source.append(" }\n\n"); + return; + } + source.append(" if (!advanceExisting) {\n"); + for (int cursorOrdinal : cursorOrdinals) { + source.append(" recordCandidateScan();\n"); + source.append(" if (!cursor") + .append(cursorOrdinal) + .append(".open(") + .append(slot) + .append(")) {\n"); + source.append(" return false;\n"); + source.append(" }\n"); + } + source.append(" } else if (!cursor").append(cursorOrdinals[0]).append(".next()) {\n"); + source.append(" return false;\n"); + source.append(" }\n"); + if (cursorOrdinals.length == 1) { + source.append(" state().assign(") + .append(slot) + .append(", cursor") + .append(cursorOrdinals[0]) + .append(".value());\n"); + source.append(" return true;\n"); + source.append(" }\n\n"); + return; + } + source.append(" long current = cursor").append(cursorOrdinals[0]).append(".value();\n"); + for (int i = 1; i < cursorOrdinals.length; i++) { + source.append(" if (cursor").append(cursorOrdinals[i]).append(".value() > current) {\n"); + source.append(" current = cursor").append(cursorOrdinals[i]).append(".value();\n"); + source.append(" }\n"); + } + source.append(" while (true) {\n"); + source.append(" boolean allMatch = true;\n"); + source.append(" long max = current;\n"); + for (int cursorOrdinal : cursorOrdinals) { + source.append(" if (!cursor").append(cursorOrdinal).append(".seek(current)) {\n"); + source.append(" return false;\n"); + source.append(" }\n"); + source.append(" long value") + .append(cursorOrdinal) + .append(" = cursor") + .append(cursorOrdinal) + .append(".value();\n"); + source.append(" if (value").append(cursorOrdinal).append(" != current) {\n"); + source.append(" if (value").append(cursorOrdinal).append(" > max) {\n"); + source.append(" max = value").append(cursorOrdinal).append(";\n"); + source.append(" }\n"); + source.append(" allMatch = false;\n"); + source.append(" }\n"); + } + source.append(" if (allMatch) {\n"); + source.append(" state().assign(").append(slot).append(", current);\n"); + source.append(" return true;\n"); + source.append(" }\n"); + source.append(" current = max;\n"); + source.append(" }\n"); + source.append(" }\n\n"); + } + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCursor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCursor.java index 6d3156661c..7d1ce00391 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCursor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCursor.java @@ -11,7 +11,7 @@ // Some portions generated by Codex package org.eclipse.rdf4j.sail.lmdb; -interface LmdbLftjCursor extends AutoCloseable { +public interface LmdbLftjCursor extends AutoCloseable { boolean open(int bindingSlot); diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java new file mode 100644 index 0000000000..7c0d8e6b17 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java @@ -0,0 +1,52 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.ArrayList; +import java.util.List; + +public final class LmdbLftjExecutionShape { + + private final int variableCount; + private final int[][] cursorOrdinalsBySlot; + private final int patternCount; + + LmdbLftjExecutionShape(LmdbLftjPlan plan) { + this.variableCount = plan.variableOrder().size(); + this.patternCount = plan.patternCount(); + this.cursorOrdinalsBySlot = new int[variableCount][]; + List variableOrder = plan.variableOrder(); + List patternPlans = plan.patternPlans(); + for (int slot = 0; slot < variableCount; slot++) { + String variableName = variableOrder.get(slot); + List cursorOrdinals = new ArrayList<>(); + for (int patternOrdinal = 0; patternOrdinal < patternPlans.size(); patternOrdinal++) { + if (patternPlans.get(patternOrdinal).containsVariable(variableName)) { + cursorOrdinals.add(patternOrdinal); + } + } + this.cursorOrdinalsBySlot[slot] = cursorOrdinals.stream().mapToInt(Integer::intValue).toArray(); + } + } + + int variableCount() { + return variableCount; + } + + int[] cursorOrdinals(int slot) { + return cursorOrdinalsBySlot[slot]; + } + + int patternCount() { + return patternCount; + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java index fdab68a114..b6b7427941 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java @@ -33,10 +33,12 @@ final class LmdbLftjExecutor { QueryEvaluationStep prepare(LmdbLftjTupleExpr node, QueryEvaluationContext context) { LazyFallbackStep fallback = new LazyFallbackStep(node.plan().fallbackExpr().clone(), context); - return bindings -> evaluate(node.plan(), context, fallback, bindings); + LmdbLftjExecutionShape shape = new LmdbLftjExecutionShape(node.plan()); + return bindings -> evaluate(node.plan(), shape, context, fallback, bindings); } - private CloseableIteration evaluate(LmdbLftjPlan plan, QueryEvaluationContext context, + private CloseableIteration evaluate(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, + QueryEvaluationContext context, LazyFallbackStep fallback, BindingSet bindings) { LmdbQueryAccess queryAccess = strategy.queryAccess(); if (queryAccess == null) { @@ -48,8 +50,12 @@ private CloseableIteration evaluate(LmdbLftjPlan plan, QueryEvaluati return fallback.evaluate(bindings); } + LmdbCompiledLftjFactory compiledFactory = compiledFactory(queryAccess, plan, shape); try { state.attachTxn(queryAccess.acquireReadTxn()); + if (compiledFactory != null) { + return compiledFactory.create(plan, shape, state, context, queryAccess, new LmdbLftjMetrics()); + } return new LmdbLftjIteration(plan, state, context, queryAccess, new LmdbLftjMetrics()); } catch (RuntimeException e) { state.close(); @@ -57,6 +63,27 @@ private CloseableIteration evaluate(LmdbLftjPlan plan, QueryEvaluati } } + private LmdbCompiledLftjFactory compiledFactory(LmdbQueryAccess queryAccess, LmdbLftjPlan plan, + LmdbLftjExecutionShape shape) { + if (!queryAccess.lftjCodegenEnabled()) { + return null; + } + + LmdbLftjCodegenCache.CacheEntry cached = queryAccess.cachedCompiledPlan(plan.executionKey()); + if (cached != null) { + return cached.compiled() ? cached.factory() : null; + } + + try { + LmdbCompiledLftjFactory factory = queryAccess.codegenCompiler().compile(plan, shape); + queryAccess.cacheCompiledPlanSuccess(plan.executionKey(), factory); + return factory; + } catch (RuntimeException e) { + queryAccess.cacheCompiledPlanFailure(plan.executionKey(), e.getMessage()); + return null; + } + } + private final class LmdbLftjIteration extends LookAheadIteration { private final LmdbLftjPlan plan; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java index 700d5c004c..d0777bd52a 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java @@ -11,7 +11,7 @@ // Some portions generated by Codex package org.eclipse.rdf4j.sail.lmdb; -final class LmdbLftjMetrics { +public final class LmdbLftjMetrics { private long candidateScans; private long witnessScans; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java index 1924924285..424300eb62 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java @@ -19,13 +19,14 @@ import org.eclipse.rdf4j.query.algebra.TupleExpr; -final class LmdbLftjPlan { +public final class LmdbLftjPlan { private final TupleExpr fallbackExpr; private final Set bindingNames; private final Set assuredBindingNames; private final List variableOrder; private final List patternPlans; + private final String executionKey; LmdbLftjPlan(TupleExpr fallbackExpr, Set bindingNames, Set assuredBindingNames, List variableOrder, List patternPlans) { @@ -34,6 +35,7 @@ final class LmdbLftjPlan { this.assuredBindingNames = Set.copyOf(new LinkedHashSet<>(assuredBindingNames)); this.variableOrder = List.copyOf(variableOrder); this.patternPlans = List.copyOf(patternPlans); + this.executionKey = executionKey(this.variableOrder, this.patternPlans); } TupleExpr fallbackExpr() { @@ -56,6 +58,10 @@ List patternPlans() { return patternPlans; } + public String executionKey() { + return executionKey; + } + List indexNames() { return patternPlans.stream().map(LmdbLftjPatternPlan::indexName).collect(Collectors.toList()); } @@ -86,4 +92,48 @@ public boolean equals(Object other) { public int hashCode() { return Objects.hash(fallbackExpr, bindingNames, assuredBindingNames, variableOrder, patternPlans); } + + private static String executionKey(List variableOrder, List patternPlans) { + StringBuilder builder = new StringBuilder(variableOrder.size() * 16 + patternPlans.size() * 48); + builder.append("varOrder="); + for (String variable : variableOrder) { + builder.append(variable).append(','); + } + builder.append(";patterns="); + for (LmdbLftjPatternPlan patternPlan : patternPlans) { + builder.append(patternPlan.indexName()).append(':').append(patternKey(patternPlan.pattern())).append(';'); + } + return builder.toString(); + } + + private static String patternKey(org.eclipse.rdf4j.query.algebra.StatementPattern pattern) { + StringBuilder builder = new StringBuilder(48); + builder.append('[').append(pattern.getScope().name()).append(';'); + appendTerm(builder, pattern.getSubjectVar()); + appendTerm(builder, pattern.getPredicateVar()); + appendTerm(builder, pattern.getObjectVar()); + appendTerm(builder, pattern.getContextVar()); + builder.append(']'); + return builder.toString(); + } + + private static void appendTerm(StringBuilder builder, org.eclipse.rdf4j.query.algebra.Var var) { + if (var == null) { + builder.append("null;"); + return; + } + if (var.hasValue()) { + builder.append("const=") + .append(var.getValue().getClass().getSimpleName()) + .append(':') + .append(var.getValue()) + .append(';'); + return; + } + if (var.isAnonymous() || var.getName() == null) { + builder.append("hidden;"); + return; + } + builder.append("var=").append(var.getName()).append(';'); + } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbPrefixFrontierProvider.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbPrefixFrontierProvider.java index eabe03ac1f..daea3d0fa5 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbPrefixFrontierProvider.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbPrefixFrontierProvider.java @@ -21,9 +21,21 @@ final class LmdbPrefixFrontierProvider { private final LmdbQueryAccess queryAccess; private final LmdbLftjBindingState state; private final LmdbLftjMetrics metrics; - private final Map frontierCache = new HashMap<>(); - private final Map countCache = new HashMap<>(); + private final Map frontierCache = new HashMap<>(); + private final Map countCache = new HashMap<>(); private final Map relationCache = new HashMap<>(); + private final FrontierLookupKey frontierLookup = new FrontierLookupKey(); + private final CountLookupKey countLookup = new CountLookupKey(); + private final RelationLookupKey relationLookup = new RelationLookupKey(); + private final long[] frontierLowerBound = new long[4]; + private final long[] frontierUpperBound = new long[4]; + private final long[] countLowerBound = new long[4]; + private final long[] countUpperBound = new long[4]; + private final long[] relationLowerBound = new long[4]; + private final long[] relationUpperBound = new long[4]; + private final LmdbDerivedBinaryRelation.LongArrayBuilder frontierValues = new LmdbDerivedBinaryRelation.LongArrayBuilder(); + private final FrontierCollector frontierCollector = new FrontierCollector(); + private final CountCollector countCollector = new CountCollector(); LmdbPrefixFrontierProvider(LmdbQueryAccess queryAccess, LmdbLftjBindingState state, LmdbLftjMetrics metrics) { this.queryAccess = queryAccess; @@ -32,7 +44,7 @@ final class LmdbPrefixFrontierProvider { } LmdbCachedFrontier frontier(LmdbLftjPatternPlan patternPlan, int bindingSlot) { - FrontierKey key = FrontierKey.create(patternPlan, bindingSlot, state); + FrontierLookupKey key = frontierLookup.init(patternPlan, bindingSlot, state); LmdbCachedFrontier frontier = frontierCache.get(key); if (frontier != null) { metrics.recordFrontierHit(); @@ -41,12 +53,12 @@ LmdbCachedFrontier frontier(LmdbLftjPatternPlan patternPlan, int bindingSlot) { metrics.recordFrontierLoad(); frontier = loadFrontier(patternPlan, bindingSlot); - frontierCache.put(key, frontier); + frontierCache.put(key.freeze(), frontier); return frontier; } long countMatches(LmdbLftjPatternPlan patternPlan) { - CountKey key = CountKey.create(patternPlan, state); + CountLookupKey key = countLookup.init(patternPlan, state); Long count = countCache.get(key); if (count != null) { metrics.recordCountHit(); @@ -55,7 +67,7 @@ long countMatches(LmdbLftjPatternPlan patternPlan) { metrics.recordCountLoad(); long loaded = loadCount(patternPlan); - countCache.put(key, loaded); + countCache.put(key.freeze(), loaded); return loaded; } @@ -67,20 +79,11 @@ private LmdbCachedFrontier loadFrontier(LmdbLftjPatternPlan patternPlan, int bin } int keyFieldIndex = patternPlan.keyFieldIndexForBindingSlot(bindingSlot); - long[] lowerBound = new long[4]; - long[] upperBound = new long[4]; - patternPlan.fillRangeBounds(state, bindingSlot, 0L, lowerBound, upperBound); - - LmdbDerivedBinaryRelation.LongArrayBuilder values = new LmdbDerivedBinaryRelation.LongArrayBuilder(); - long[] last = { Long.MIN_VALUE }; - forEachUniqueRow(patternPlan, lowerBound, upperBound, keyFieldIndex + 1, row -> { - long value = row[keyFieldIndex]; - if (value != last[0]) { - values.add(value); - last[0] = value; - } - }); - return new LmdbCachedFrontier(values.toArray(), null); + Arrays.fill(frontierLowerBound, 0L); + patternPlan.fillRangeBounds(state, bindingSlot, 0L, frontierLowerBound, frontierUpperBound); + forEachUniqueRow(patternPlan, frontierLowerBound, frontierUpperBound, keyFieldIndex + 1, + frontierCollector.reset(keyFieldIndex)); + return new LmdbCachedFrontier(frontierValues.toArray(), null); } private long loadCount(LmdbLftjPatternPlan patternPlan) { @@ -90,13 +93,11 @@ private long loadCount(LmdbLftjPatternPlan patternPlan) { return derived; } - long[] lowerBound = new long[4]; - long[] upperBound = new long[4]; - patternPlan.fillMatchRange(state, lowerBound, upperBound); - - long[] count = { 0L }; - forEachUniqueRow(patternPlan, lowerBound, upperBound, patternPlan.fixedPrefixLength(state), row -> count[0]++); - return count[0]; + Arrays.fill(countLowerBound, 0L); + patternPlan.fillMatchRange(state, countLowerBound, countUpperBound); + forEachUniqueRow(patternPlan, countLowerBound, countUpperBound, patternPlan.fixedPrefixLength(state), + countCollector.reset()); + return countCollector.count(); } private LmdbCachedFrontier derivedFrontier(LmdbLftjPatternPlan patternPlan, int bindingSlot) { @@ -138,11 +139,9 @@ private Long derivedCount(LmdbLftjPatternPlan patternPlan) { private LmdbDerivedBinaryRelation relation(LmdbLftjPatternPlan patternPlan) { long predicateId = state.fixedId(patternPlan.predicateTerm()); - LmdbDerivedBinaryRelation.RelationKey key = new LmdbDerivedBinaryRelation.RelationKey( - patternPlan.indexName(), - queryAccess.includeInferred(), + RelationLookupKey lookup = relationLookup.init(patternPlan.indexName(), queryAccess.includeInferred(), predicateId); - LmdbDerivedBinaryRelation relation = relationCache.get(key); + LmdbDerivedBinaryRelation relation = relationCache.get(lookup); if (relation != null) { metrics.recordRelationHit(); return relation; @@ -153,17 +152,16 @@ private LmdbDerivedBinaryRelation relation(LmdbLftjPatternPlan patternPlan) { int targetComponent = patternPlan.keyTerm(2).component(); LmdbDerivedBinaryRelation.Builder builder = new LmdbDerivedBinaryRelation.Builder(sourceComponent, targetComponent); - long[] lowerBound = new long[4]; - long[] upperBound = new long[4]; - Arrays.fill(upperBound, Long.MAX_VALUE); - lowerBound[TripleStore.PRED_IDX] = predicateId; - upperBound[TripleStore.PRED_IDX] = predicateId; + Arrays.fill(relationLowerBound, 0L); + Arrays.fill(relationUpperBound, Long.MAX_VALUE); + relationLowerBound[TripleStore.PRED_IDX] = predicateId; + relationUpperBound[TripleStore.PRED_IDX] = predicateId; int sourceKeyField = patternPlan.keyFieldIndexForComponent(sourceComponent); int targetKeyField = patternPlan.keyFieldIndexForComponent(targetComponent); - forEachUniqueRow(patternPlan, lowerBound, upperBound, 1, + forEachUniqueRow(patternPlan, relationLowerBound, relationUpperBound, 1, row -> builder.add(row[sourceKeyField], row[targetKeyField])); relation = builder.build(); - relationCache.put(key, relation); + relationCache.put(lookup.freeze(), relation); return relation; } @@ -211,6 +209,45 @@ private interface RowConsumer { void accept(long[] row); } + private final class FrontierCollector implements RowConsumer { + private int keyFieldIndex; + private long last; + + private FrontierCollector reset(int keyFieldIndex) { + this.keyFieldIndex = keyFieldIndex; + this.last = Long.MIN_VALUE; + frontierValues.clear(); + return this; + } + + @Override + public void accept(long[] row) { + long value = row[keyFieldIndex]; + if (value != last) { + frontierValues.add(value); + last = value; + } + } + } + + private static final class CountCollector implements RowConsumer { + private long count; + + private CountCollector reset() { + count = 0L; + return this; + } + + @Override + public void accept(long[] row) { + count++; + } + + private long count() { + return count; + } + } + private static final class CursorReader implements AutoCloseable { private final LmdbTrieKeyCursor cursor; @@ -264,72 +301,211 @@ private void readRow() { } } - private static final class FrontierKey { + private static final class FrontierCacheKey { private final LmdbLftjPatternPlan patternPlan; private final int bindingSlot; - private final long[] prefix; + private final int prefixLength; + private final long prefix0; + private final long prefix1; + private final long prefix2; - private FrontierKey(LmdbLftjPatternPlan patternPlan, int bindingSlot, long[] prefix) { + private FrontierCacheKey(LmdbLftjPatternPlan patternPlan, int bindingSlot, int prefixLength, long prefix0, + long prefix1, long prefix2) { this.patternPlan = patternPlan; this.bindingSlot = bindingSlot; - this.prefix = prefix; + this.prefixLength = prefixLength; + this.prefix0 = prefix0; + this.prefix1 = prefix1; + this.prefix2 = prefix2; } - static FrontierKey create(LmdbLftjPatternPlan patternPlan, int bindingSlot, LmdbLftjBindingState state) { - int keyFieldIndex = patternPlan.keyFieldIndexForBindingSlot(bindingSlot); - long[] prefix = new long[keyFieldIndex]; - for (int i = 0; i < keyFieldIndex; i++) { - prefix[i] = state.fixedId(patternPlan.keyTerm(i)); + @Override + public boolean equals(Object other) { + if (!(other instanceof FrontierCacheKey)) { + return false; } - return new FrontierKey(patternPlan, bindingSlot, prefix); + FrontierCacheKey o = (FrontierCacheKey) other; + return Objects.equals(patternPlan, o.patternPlan) + && bindingSlot == o.bindingSlot + && prefixLength == o.prefixLength + && prefix0 == o.prefix0 + && prefix1 == o.prefix1 + && prefix2 == o.prefix2; + } + + @Override + public int hashCode() { + return frontierHash(patternPlan, bindingSlot, prefixLength, prefix0, prefix1, prefix2); + } + } + + private static final class FrontierLookupKey { + private LmdbLftjPatternPlan patternPlan; + private int bindingSlot; + private int prefixLength; + private long prefix0; + private long prefix1; + private long prefix2; + + private FrontierLookupKey init(LmdbLftjPatternPlan patternPlan, int bindingSlot, LmdbLftjBindingState state) { + this.patternPlan = patternPlan; + this.bindingSlot = bindingSlot; + this.prefixLength = patternPlan.keyFieldIndexForBindingSlot(bindingSlot); + this.prefix0 = prefixLength > 0 ? state.fixedId(patternPlan.keyTerm(0)) : 0L; + this.prefix1 = prefixLength > 1 ? state.fixedId(patternPlan.keyTerm(1)) : 0L; + this.prefix2 = prefixLength > 2 ? state.fixedId(patternPlan.keyTerm(2)) : 0L; + return this; + } + + private FrontierCacheKey freeze() { + return new FrontierCacheKey(patternPlan, bindingSlot, prefixLength, prefix0, prefix1, prefix2); } @Override public boolean equals(Object other) { - if (!(other instanceof FrontierKey)) { + if (!(other instanceof FrontierCacheKey)) { return false; } - FrontierKey o = (FrontierKey) other; + FrontierCacheKey o = (FrontierCacheKey) other; return Objects.equals(patternPlan, o.patternPlan) && bindingSlot == o.bindingSlot - && Arrays.equals(prefix, o.prefix); + && prefixLength == o.prefixLength + && prefix0 == o.prefix0 + && prefix1 == o.prefix1 + && prefix2 == o.prefix2; } @Override public int hashCode() { - return Objects.hash(patternPlan, bindingSlot, Arrays.hashCode(prefix)); + return frontierHash(patternPlan, bindingSlot, prefixLength, prefix0, prefix1, prefix2); } } - private static final class CountKey { + private static int frontierHash(LmdbLftjPatternPlan patternPlan, int bindingSlot, int prefixLength, long prefix0, + long prefix1, long prefix2) { + int result = Objects.hashCode(patternPlan); + result = 31 * result + bindingSlot; + result = 31 * result + prefixLength; + result = 31 * result + Long.hashCode(prefix0); + result = 31 * result + Long.hashCode(prefix1); + result = 31 * result + Long.hashCode(prefix2); + return result; + } + + private static final class CountCacheKey { private final LmdbLftjPatternPlan patternPlan; - private final long[] fixedValues; + private final long subjectValue; + private final long predicateValue; + private final long objectValue; + private final long contextValue; - private CountKey(LmdbLftjPatternPlan patternPlan, long[] fixedValues) { + private CountCacheKey(LmdbLftjPatternPlan patternPlan, long subjectValue, long predicateValue, long objectValue, + long contextValue) { this.patternPlan = patternPlan; - this.fixedValues = fixedValues; + this.subjectValue = subjectValue; + this.predicateValue = predicateValue; + this.objectValue = objectValue; + this.contextValue = contextValue; } - static CountKey create(LmdbLftjPatternPlan patternPlan, LmdbLftjBindingState state) { - long[] fixedValues = new long[4]; - for (int i = 0; i < 4; i++) { - fixedValues[i] = state.fixedId(patternPlan.termForComponent(i)); + @Override + public boolean equals(Object other) { + if (!(other instanceof CountCacheKey)) { + return false; } - return new CountKey(patternPlan, fixedValues); + CountCacheKey o = (CountCacheKey) other; + return Objects.equals(patternPlan, o.patternPlan) + && subjectValue == o.subjectValue + && predicateValue == o.predicateValue + && objectValue == o.objectValue + && contextValue == o.contextValue; + } + + @Override + public int hashCode() { + int result = Objects.hashCode(patternPlan); + result = 31 * result + Long.hashCode(subjectValue); + result = 31 * result + Long.hashCode(predicateValue); + result = 31 * result + Long.hashCode(objectValue); + result = 31 * result + Long.hashCode(contextValue); + return result; + } + } + + private static final class CountLookupKey { + private LmdbLftjPatternPlan patternPlan; + private long subjectValue; + private long predicateValue; + private long objectValue; + private long contextValue; + + private CountLookupKey init(LmdbLftjPatternPlan patternPlan, LmdbLftjBindingState state) { + this.patternPlan = patternPlan; + this.subjectValue = state.fixedId(patternPlan.termForComponent(TripleStore.SUBJ_IDX)); + this.predicateValue = state.fixedId(patternPlan.termForComponent(TripleStore.PRED_IDX)); + this.objectValue = state.fixedId(patternPlan.termForComponent(TripleStore.OBJ_IDX)); + this.contextValue = state.fixedId(patternPlan.termForComponent(TripleStore.CONTEXT_IDX)); + return this; + } + + private CountCacheKey freeze() { + return new CountCacheKey(patternPlan, subjectValue, predicateValue, objectValue, contextValue); + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof CountCacheKey)) { + return false; + } + CountCacheKey o = (CountCacheKey) other; + return Objects.equals(patternPlan, o.patternPlan) + && subjectValue == o.subjectValue + && predicateValue == o.predicateValue + && objectValue == o.objectValue + && contextValue == o.contextValue; + } + + @Override + public int hashCode() { + int result = Objects.hashCode(patternPlan); + result = 31 * result + Long.hashCode(subjectValue); + result = 31 * result + Long.hashCode(predicateValue); + result = 31 * result + Long.hashCode(objectValue); + result = 31 * result + Long.hashCode(contextValue); + return result; + } + } + + private static final class RelationLookupKey { + private String indexName; + private boolean includeInferred; + private long predicateId; + + private RelationLookupKey init(String indexName, boolean includeInferred, long predicateId) { + this.indexName = indexName; + this.includeInferred = includeInferred; + this.predicateId = predicateId; + return this; + } + + private LmdbDerivedBinaryRelation.RelationKey freeze() { + return new LmdbDerivedBinaryRelation.RelationKey(indexName, includeInferred, predicateId); } @Override public boolean equals(Object other) { - if (!(other instanceof CountKey)) { + if (!(other instanceof LmdbDerivedBinaryRelation.RelationKey)) { return false; } - CountKey o = (CountKey) other; - return Objects.equals(patternPlan, o.patternPlan) && Arrays.equals(fixedValues, o.fixedValues); + LmdbDerivedBinaryRelation.RelationKey o = (LmdbDerivedBinaryRelation.RelationKey) other; + return includeInferred == o.includeInferred() + && predicateId == o.predicateId() + && Objects.equals(indexName, o.indexName()); } @Override public int hashCode() { - return Objects.hash(patternPlan, Arrays.hashCode(fixedValues)); + return Objects.hash(indexName, includeInferred, predicateId); } } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java index 14f9639c56..e539b2d020 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java @@ -15,7 +15,7 @@ import org.eclipse.rdf4j.model.Value; -interface LmdbQueryAccess { +public interface LmdbQueryAccess { TripleStore tripleStore(); @@ -43,4 +43,24 @@ default LmdbLftjPlanner.PlanningResult cachedPlanningResult(String cacheKey) { default void cachePlanningResult(String cacheKey, LmdbLftjPlanner.PlanningResult result) { // optional prepared-plan cache } + + default boolean lftjCodegenEnabled() { + return true; + } + + default LmdbLftjCodegenCache.CacheEntry cachedCompiledPlan(String executionKey) { + return null; + } + + default void cacheCompiledPlanSuccess(String executionKey, LmdbCompiledLftjFactory factory) { + // optional compiled-plan cache + } + + default void cacheCompiledPlanFailure(String executionKey, String message) { + // optional negative cache + } + + default LmdbLftjCodegenCompiler codegenCompiler() { + return LmdbLftjCodegenCompiler.INSTANCE; + } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStore.java index 42e2f88c7c..c2d29df2f0 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStore.java @@ -102,6 +102,7 @@ public class LmdbStore extends AbstractNotifyingSail implements FederatedService */ private final ReentrantLock txnLockManager = new ReentrantLock(); private final LmdbLftjPreparedPlanCache preparedPlanCache = new LmdbLftjPreparedPlanCache(); + private final LmdbLftjCodegenCache codegenCache = new LmdbLftjCodegenCache(); /** * Holds locks for all isolated transactions. @@ -342,6 +343,7 @@ protected void shutDownInternal() throws SailException { } } preparedPlanCache.clear(); + codegenCache.clear(); logger.debug("LmdbStore shut down"); } @@ -373,6 +375,10 @@ LmdbLftjPreparedPlanCache preparedPlanCache() { return preparedPlanCache; } + LmdbLftjCodegenCache codegenCache() { + return codegenCache; + } + /** * This call will block when {@link IsolationLevels#NONE} is provided when there are active transactions with a * higher isolation and block when a higher isolation is provided when there are active transactions with diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java index cb35e08bb7..a41df58b01 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java @@ -263,6 +263,26 @@ public LmdbLftjPlanner.PlanningResult cachedPlanningResult(String cacheKey) { public void cachePlanningResult(String cacheKey, LmdbLftjPlanner.PlanningResult result) { lmdbStore.preparedPlanCache().put(cacheKey, result); } + + @Override + public boolean lftjCodegenEnabled() { + return lmdbStore.getLmdbStoreConfig().isLftjCodegenEnabled(); + } + + @Override + public LmdbLftjCodegenCache.CacheEntry cachedCompiledPlan(String executionKey) { + return lmdbStore.codegenCache().get(executionKey); + } + + @Override + public void cacheCompiledPlanSuccess(String executionKey, LmdbCompiledLftjFactory factory) { + lmdbStore.codegenCache().putSuccess(executionKey, factory); + } + + @Override + public void cacheCompiledPlanFailure(String executionKey, String message) { + lmdbStore.codegenCache().putFailure(executionKey, message); + } }; } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfig.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfig.java index 4eab8483fc..5d9d467904 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfig.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfig.java @@ -80,6 +80,8 @@ public class LmdbStoreConfig extends BaseSailConfig { private boolean lftjEnabled = true; + private boolean lftjCodegenEnabled = true; + private long valueEvictionInterval = Duration.ofSeconds(60).toMillis(); /*--------------* @@ -224,6 +226,15 @@ public LmdbStoreConfig setLftjEnabled(boolean lftjEnabled) { return this; } + public boolean isLftjCodegenEnabled() { + return lftjCodegenEnabled; + } + + public LmdbStoreConfig setLftjCodegenEnabled(boolean lftjCodegenEnabled) { + this.lftjCodegenEnabled = lftjCodegenEnabled; + return this; + } + @Override public Resource export(Model m) { Resource implNode = super.export(m); @@ -266,6 +277,9 @@ public Resource export(Model m) { if (!lftjEnabled) { m.add(implNode, LmdbStoreSchema.LFTJ_ENABLED, vf.createLiteral(false)); } + if (!lftjCodegenEnabled) { + m.add(implNode, LmdbStoreSchema.LFTJ_CODEGEN_ENABLED, vf.createLiteral(false)); + } if (valueEvictionInterval != Duration.ofSeconds(60).toMillis()) { m.add(implNode, LmdbStoreSchema.VALUE_EVICTION_INTERVAL, vf.createLiteral(valueEvictionInterval)); } @@ -393,6 +407,17 @@ public void parse(Model m, Resource implNode) throws SailConfigException { } }); + Models.objectLiteral(m.getStatements(implNode, LmdbStoreSchema.LFTJ_CODEGEN_ENABLED, null)) + .ifPresent(lit -> { + try { + setLftjCodegenEnabled(lit.booleanValue()); + } catch (IllegalArgumentException e) { + throw new SailConfigException( + "Boolean value required for " + LmdbStoreSchema.LFTJ_CODEGEN_ENABLED + + " property, found " + lit); + } + }); + Models.objectLiteral(m.getStatements(implNode, LmdbStoreSchema.VALUE_EVICTION_INTERVAL, null)) .ifPresent(lit -> { try { diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreSchema.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreSchema.java index 41afe20500..50c08e3af9 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreSchema.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreSchema.java @@ -87,6 +87,11 @@ public class LmdbStoreSchema { */ public final static IRI LFTJ_ENABLED; + /** + * http://rdf4j.org/config/sail/lmdb#lftjCodegenEnabled + */ + public final static IRI LFTJ_CODEGEN_ENABLED; + /** * http://rdf4j.org/config/sail/lmdb#valueEvictionInterval */ @@ -106,6 +111,7 @@ public class LmdbStoreSchema { AUTO_GROW = factory.createIRI(NAMESPACE, "autoGrow"); PAGE_CARDINALITY_ESTIMATOR = factory.createIRI(NAMESPACE, "pageCardinalityEstimator"); LFTJ_ENABLED = factory.createIRI(NAMESPACE, "lftjEnabled"); + LFTJ_CODEGEN_ENABLED = factory.createIRI(NAMESPACE, "lftjCodegenEnabled"); VALUE_EVICTION_INTERVAL = factory.createIRI(NAMESPACE, "valueEvictionInterval"); } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java new file mode 100644 index 0000000000..2df5f94df8 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java @@ -0,0 +1,247 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; +import org.eclipse.rdf4j.query.impl.EmptyBindingSet; +import org.junit.jupiter.api.Test; + +class LmdbLftjCodegenTest { + + @Test + void planShouldExposeStableExecutionKeyAcrossCopies() { + LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); + + assertThat(invokeStringGetter(plan, "executionKey")) + .isEqualTo(invokeStringGetter(plan.copy(), "executionKey")); + } + + @Test + void syntheticQueryAccessShouldEnableCodegenByDefault() { + assertThat(invokeBooleanGetter(new LmdbLftjSyntheticScenario.TestQueryAccess(), "lftjCodegenEnabled")) + .isTrue(); + } + + @Test + void compiledAndInterpretedShouldProduceSameRowsForSyntheticCycle() { + LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); + + List interpreted = drain( + LmdbLftjSyntheticScenario.createEvaluationStep(new InterpretedQueryAccess(), plan), + EmptyBindingSet.getInstance()); + List compiled = drain( + LmdbLftjSyntheticScenario.createEvaluationStep(new CachingQueryAccess(new CountingCompiler()), plan), + EmptyBindingSet.getInstance()); + + assertThat(compiled).isNotEmpty().containsExactlyElementsOf(interpreted); + } + + @Test + void compiledAndInterpretedShouldMatchForHiddenContextMultiplicity() { + LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlanWithHiddenContexts(); + + List interpreted = drain( + LmdbLftjSyntheticScenario.createEvaluationStep(new InterpretedQueryAccess(true), plan), + EmptyBindingSet.getInstance()); + List compiled = drain( + LmdbLftjSyntheticScenario.createEvaluationStep( + new CachingQueryAccess(true, new CountingCompiler()), plan), + EmptyBindingSet.getInstance()); + + assertThat(compiled).hasSize(648).containsExactlyElementsOf(interpreted); + } + + @Test + void compiledAndInterpretedShouldMatchForFullyBoundInput() { + LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); + QueryBindingSet matchingBindings = new QueryBindingSet(); + matchingBindings.setBinding("a", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:1")); + matchingBindings.setBinding("b", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:2")); + matchingBindings.setBinding("c", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:3")); + + List interpreted = drain( + LmdbLftjSyntheticScenario.createEvaluationStep(new InterpretedQueryAccess(), plan), matchingBindings); + List compiled = drain( + LmdbLftjSyntheticScenario.createEvaluationStep(new CachingQueryAccess(new CountingCompiler()), plan), + matchingBindings); + + assertThat(compiled).containsExactlyElementsOf(interpreted); + } + + @Test + void codegenCacheShouldCompileOncePerExecutionKey() { + LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); + CountingCompiler compiler = new CountingCompiler(); + CachingQueryAccess queryAccess = new CachingQueryAccess(compiler); + QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess, plan); + + List first = drain(evaluationStep, EmptyBindingSet.getInstance()); + List second = drain(evaluationStep, EmptyBindingSet.getInstance()); + + assertThat(second).containsExactlyElementsOf(first); + assertThat(compiler.compileCalls).isEqualTo(1); + assertThat(queryAccess.cachedEntry(plan.executionKey())).isNotNull(); + assertThat(queryAccess.cachedEntry(plan.executionKey()).compiled()) + .as(compiler.failureDescription()) + .isTrue(); + } + + @Test + void codegenCacheShouldReuseNegativeResultAfterCompileFailure() { + LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); + FailingCompiler compiler = new FailingCompiler(); + CachingQueryAccess queryAccess = new CachingQueryAccess(compiler); + QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess, plan); + + List first = drain(evaluationStep, EmptyBindingSet.getInstance()); + List second = drain(evaluationStep, EmptyBindingSet.getInstance()); + + assertThat(second).containsExactlyElementsOf(first); + assertThat(compiler.compileCalls).isEqualTo(1); + assertThat(queryAccess.cachedEntry(plan.executionKey())).isNotNull(); + assertThat(queryAccess.cachedEntry(plan.executionKey()).compiled()).isFalse(); + assertThat(queryAccess.cachedEntry(plan.executionKey()).failureMessage()).contains("forced failure"); + } + + private boolean invokeBooleanGetter(Object target, String getterName) { + try { + Method getter = target.getClass().getMethod(getterName); + return (boolean) getter.invoke(target); + } catch (ReflectiveOperationException e) { + throw new AssertionError("Missing LMDB codegen boolean getter: " + getterName, e); + } + } + + private String invokeStringGetter(Object target, String getterName) { + try { + Method getter = target.getClass().getMethod(getterName); + return (String) getter.invoke(target); + } catch (ReflectiveOperationException e) { + throw new AssertionError("Missing LMDB codegen string getter: " + getterName, e); + } + } + + private List drain(QueryEvaluationStep evaluationStep, BindingSet bindings) { + List rows = new ArrayList<>(); + try (CloseableIteration iteration = evaluationStep.evaluate(bindings)) { + while (iteration.hasNext()) { + BindingSet row = iteration.next(); + rows.add(render(row)); + } + } + return rows; + } + + private String render(BindingSet row) { + return row.getValue("a").stringValue() + "|" + row.getValue("b").stringValue() + "|" + + row.getValue("c").stringValue(); + } + + private static final class InterpretedQueryAccess extends LmdbLftjSyntheticScenario.TestQueryAccess { + + private InterpretedQueryAccess() { + super(); + } + + private InterpretedQueryAccess(boolean duplicateContexts) { + super(duplicateContexts); + } + + @Override + public boolean lftjCodegenEnabled() { + return false; + } + } + + private static final class CachingQueryAccess extends LmdbLftjSyntheticScenario.TestQueryAccess { + private final Map compiledPlans = new HashMap<>(); + private final LmdbLftjCodegenCompiler compiler; + + private CachingQueryAccess(LmdbLftjCodegenCompiler compiler) { + this(false, compiler); + } + + private CachingQueryAccess(boolean duplicateContexts, LmdbLftjCodegenCompiler compiler) { + super(duplicateContexts); + this.compiler = compiler; + } + + @Override + public LmdbLftjCodegenCache.CacheEntry cachedCompiledPlan(String executionKey) { + return compiledPlans.get(executionKey); + } + + @Override + public void cacheCompiledPlanSuccess(String executionKey, LmdbCompiledLftjFactory factory) { + compiledPlans.put(executionKey, LmdbLftjCodegenCache.CacheEntry.success(factory)); + } + + @Override + public void cacheCompiledPlanFailure(String executionKey, String message) { + compiledPlans.put(executionKey, LmdbLftjCodegenCache.CacheEntry.failure(message)); + } + + @Override + public LmdbLftjCodegenCompiler codegenCompiler() { + return compiler; + } + + private LmdbLftjCodegenCache.CacheEntry cachedEntry(String executionKey) { + return compiledPlans.get(executionKey); + } + } + + private static class CountingCompiler extends LmdbLftjCodegenCompiler { + private int compileCalls; + private RuntimeException failure; + + @Override + LmdbCompiledLftjFactory compile(LmdbLftjPlan plan, LmdbLftjExecutionShape shape) { + compileCalls++; + try { + return super.compile(plan, shape); + } catch (RuntimeException e) { + failure = e; + throw e; + } + } + + private String failureDescription() { + if (failure == null) { + return "expected compiled factory cache entry"; + } + Throwable cause = failure.getCause(); + return cause == null ? failure.toString() : failure + " | cause=" + cause; + } + } + + private static final class FailingCompiler extends LmdbLftjCodegenCompiler { + private int compileCalls; + + @Override + LmdbCompiledLftjFactory compile(LmdbLftjPlan plan, LmdbLftjExecutionShape shape) { + compileCalls++; + throw new IllegalArgumentException("forced failure"); + } + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorBenchmark.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorBenchmark.java index 5dc63bde08..4296cfc50f 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorBenchmark.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorBenchmark.java @@ -49,12 +49,15 @@ public static class CycleState { @Param({ "true", "false" }) public boolean derivedRelationEnabled; + @Param({ "true", "false" }) + public boolean lftjCodegenEnabled; + private QueryEvaluationStep evaluationStep; @Setup(Level.Trial) public void setup() { evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep( - new BenchmarkQueryAccess(false, derivedRelationEnabled), + new BenchmarkQueryAccess(false, derivedRelationEnabled, lftjCodegenEnabled), LmdbLftjSyntheticScenario.createPlan()); } } @@ -65,12 +68,15 @@ public static class HiddenContextState { @Param({ "true", "false" }) public boolean derivedRelationEnabled; + @Param({ "true", "false" }) + public boolean lftjCodegenEnabled; + private QueryEvaluationStep evaluationStep; @Setup(Level.Trial) public void setup() { evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep( - new BenchmarkQueryAccess(true, derivedRelationEnabled), + new BenchmarkQueryAccess(true, derivedRelationEnabled, lftjCodegenEnabled), LmdbLftjSyntheticScenario.createPlanWithHiddenContexts()); } } @@ -114,16 +120,40 @@ private static final class BenchmarkQueryAccess extends LmdbLftjSyntheticScenari private static final Set DERIVED_RELATION_INDEXES = Set.of("psoc", "posc"); private static final Set GENERIC_INDEXES = Set.of("psoc"); + private final LmdbLftjCodegenCache codegenCache = new LmdbLftjCodegenCache(); private final Set configuredIndexes; + private final boolean lftjCodegenEnabled; - private BenchmarkQueryAccess(boolean duplicateContexts, boolean derivedRelationEnabled) { + private BenchmarkQueryAccess(boolean duplicateContexts, boolean derivedRelationEnabled, + boolean lftjCodegenEnabled) { super(duplicateContexts); this.configuredIndexes = derivedRelationEnabled ? DERIVED_RELATION_INDEXES : GENERIC_INDEXES; + this.lftjCodegenEnabled = lftjCodegenEnabled; } @Override public Set configuredIndexes() { return configuredIndexes; } + + @Override + public boolean lftjCodegenEnabled() { + return lftjCodegenEnabled; + } + + @Override + public LmdbLftjCodegenCache.CacheEntry cachedCompiledPlan(String executionKey) { + return codegenCache.get(executionKey); + } + + @Override + public void cacheCompiledPlanSuccess(String executionKey, LmdbCompiledLftjFactory factory) { + codegenCache.putSuccess(executionKey, factory); + } + + @Override + public void cacheCompiledPlanFailure(String executionKey, String message) { + codegenCache.putFailure(executionKey, message); + } } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java index 15a5a1102c..c17ee0b84f 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java @@ -14,6 +14,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.File; +import java.nio.file.Path; import org.eclipse.rdf4j.repository.Repository; import org.eclipse.rdf4j.repository.RepositoryConnection; @@ -27,41 +28,48 @@ class FoafCliqueLftjCorrectnessTest { @Test - void cycle3ShouldMatchRegularJoinCount(@TempDir File disabledDir, @TempDir File enabledDir) { - assertCycleCountMatches(disabledDir, enabledDir, 3); + void cycle3ShouldMatchRegularJoinCount(@TempDir Path tempDir) { + assertCycleCountMatches(tempDir, 3); } @Test - void cycle4ShouldMatchRegularJoinCount(@TempDir File disabledDir, @TempDir File enabledDir) { - assertCycleCountMatches(disabledDir, enabledDir, 4); + void cycle4ShouldMatchRegularJoinCount(@TempDir Path tempDir) { + assertCycleCountMatches(tempDir, 4); } @Test - void cycle5ShouldMatchRegularJoinCount(@TempDir File disabledDir, @TempDir File enabledDir) { - assertCycleCountMatches(disabledDir, enabledDir, 5); + void cycle5ShouldMatchRegularJoinCount(@TempDir Path tempDir) { + assertCycleCountMatches(tempDir, 5); } - private void assertCycleCountMatches(File disabledDir, File enabledDir, int cycleSize) { - Repository disabledRepository = createRepository(disabledDir, false); - Repository enabledRepository = createRepository(enabledDir, true); + private void assertCycleCountMatches(Path tempDir, int cycleSize) { + Repository fallbackRepository = createRepository(tempDir.resolve("fallback").toFile(), false, false); + Repository interpretedRepository = createRepository(tempDir.resolve("interpreted").toFile(), true, false); + Repository compiledRepository = createRepository(tempDir.resolve("compiled").toFile(), true, true); try { - populate(disabledRepository); - populate(enabledRepository); + populate(fallbackRepository); + populate(interpretedRepository); + populate(compiledRepository); - long expected = executeCount(disabledRepository, cycleQuery(cycleSize)); - long actual = executeCount(enabledRepository, cycleQuery(cycleSize)); + long expected = executeCount(fallbackRepository, cycleQuery(cycleSize)); + long interpreted = executeCount(interpretedRepository, cycleQuery(cycleSize)); + long compiled = executeCount(compiledRepository, cycleQuery(cycleSize)); - assertEquals(expected, actual, "LFTJ must preserve the cycle" + cycleSize + " result count"); + assertEquals(expected, interpreted, + "Interpreted LFTJ must preserve the cycle" + cycleSize + " result count"); + assertEquals(expected, compiled, "Compiled LFTJ must preserve the cycle" + cycleSize + " result count"); } finally { - disabledRepository.shutDown(); - enabledRepository.shutDown(); + fallbackRepository.shutDown(); + interpretedRepository.shutDown(); + compiledRepository.shutDown(); } } - private Repository createRepository(File dataDir, boolean lftjEnabled) { + private Repository createRepository(File dataDir, boolean lftjEnabled, boolean lftjCodegenEnabled) { LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); config.setLftjEnabled(lftjEnabled); + config.setLftjCodegenEnabled(lftjCodegenEnabled); config.setForceSync(false); config.setValueDBSize(1_073_741_824L); config.setTripleDBSize(config.getValueDBSize()); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java index 7c01bcfec9..b61196400c 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java @@ -71,6 +71,9 @@ public class FoafCliqueQueryBenchmark { @Param({ "true", "false" }) public boolean lftjEnabled; + @Param({ "true", "false" }) + public boolean lftjCodegenEnabled; + private File dataDir; private SailRepository repository; @@ -84,7 +87,8 @@ public static void main(String[] args) throws RunnerException { @Setup(Level.Trial) public void setup() throws IOException { dataDir = Files.createTempDirectory("rdf4j-lmdb-foaf-cliques").toFile(); - repository = new SailRepository(new LmdbStore(dataDir, createLftjBenchmarkConfig(lftjEnabled))); + repository = new SailRepository(new LmdbStore(dataDir, createLftjBenchmarkConfig(lftjEnabled, + lftjCodegenEnabled))); repository.init(); try (SailRepositoryConnection connection = repository.getConnection()) { @@ -124,9 +128,10 @@ private long executeCount(String query) { } } - private static LmdbStoreConfig createLftjBenchmarkConfig(boolean lftjEnabled) { + private static LmdbStoreConfig createLftjBenchmarkConfig(boolean lftjEnabled, boolean lftjCodegenEnabled) { LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); config.setLftjEnabled(lftjEnabled); + config.setLftjCodegenEnabled(lftjCodegenEnabled); config.setForceSync(false); config.setValueDBSize(1_073_741_824L); config.setTripleDBSize(config.getValueDBSize()); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md index 5aa9ab2f70..a30d1299d4 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md @@ -1,7 +1,13 @@ -Benchmark (cliquePercentage) (lftjEnabled) (maxCliqueSize) (minCliqueSize) (peopleCount) (randomKnowsEdges) (seed) Mode Cnt Score Error Units -FoafCliqueQueryBenchmark.cycle3 30 true 8 3 5000 15000 12345 avgt 3 30.297 ± 7.495 ms/op -FoafCliqueQueryBenchmark.cycle3 30 false 8 3 5000 15000 12345 avgt 3 88.044 ± 24.824 ms/op -FoafCliqueQueryBenchmark.cycle4 30 true 8 3 5000 15000 12345 avgt 3 173.018 ± 254.982 ms/op -FoafCliqueQueryBenchmark.cycle4 30 false 8 3 5000 15000 12345 avgt 3 569.874 ± 159.548 ms/op -FoafCliqueQueryBenchmark.cycle5 30 true 8 3 5000 15000 12345 avgt 3 1093.809 ± 240.403 ms/op -FoafCliqueQueryBenchmark.cycle5 30 false 8 3 5000 15000 12345 avgt 3 3815.727 ± 256.018 ms/op +Benchmark (cliquePercentage) (lftjCodegenEnabled) (lftjEnabled) (maxCliqueSize) (minCliqueSize) (peopleCount) (randomKnowsEdges) (seed) Mode Cnt Score Error Units +FoafCliqueQueryBenchmark.cycle3 30 true true 8 3 5000 15000 12345 avgt 3 22.886 ± 3.922 ms/op +FoafCliqueQueryBenchmark.cycle3 30 true false 8 3 5000 15000 12345 avgt 3 90.273 ± 22.294 ms/op +FoafCliqueQueryBenchmark.cycle3 30 false true 8 3 5000 15000 12345 avgt 3 25.738 ± 5.604 ms/op +FoafCliqueQueryBenchmark.cycle3 30 false false 8 3 5000 15000 12345 avgt 3 89.718 ± 6.997 ms/op +FoafCliqueQueryBenchmark.cycle4 30 true true 8 3 5000 15000 12345 avgt 3 116.551 ± 35.375 ms/op +FoafCliqueQueryBenchmark.cycle4 30 true false 8 3 5000 15000 12345 avgt 3 590.218 ± 82.850 ms/op +FoafCliqueQueryBenchmark.cycle4 30 false true 8 3 5000 15000 12345 avgt 3 134.350 ± 28.883 ms/op +FoafCliqueQueryBenchmark.cycle4 30 false false 8 3 5000 15000 12345 avgt 3 569.446 ± 38.531 ms/op +FoafCliqueQueryBenchmark.cycle5 30 true true 8 3 5000 15000 12345 avgt 3 712.650 ± 127.855 ms/op +FoafCliqueQueryBenchmark.cycle5 30 true false 8 3 5000 15000 12345 avgt 3 3783.058 ± 345.017 ms/op +FoafCliqueQueryBenchmark.cycle5 30 false true 8 3 5000 15000 12345 avgt 3 852.463 ± 217.004 ms/op +FoafCliqueQueryBenchmark.cycle5 30 false false 8 3 5000 15000 12345 avgt 3 3814.985 ± 530.638 ms/op diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfigTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfigTest.java index 1efdc989c9..02cc61f6cb 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfigTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfigTest.java @@ -36,6 +36,8 @@ class LmdbStoreConfigTest { private static final IRI LFTJ_ENABLED = Values.iri(LmdbStoreSchema.NAMESPACE + "lftjEnabled"); + private static final IRI LFTJ_CODEGEN_ENABLED = Values.iri(LmdbStoreSchema.NAMESPACE + "lftjCodegenEnabled"); + private static final IRI NO_READAHEAD = Values.iri(LmdbStoreSchema.NAMESPACE + "noReadahead"); @Test @@ -53,6 +55,11 @@ void lftjEnabledDefaultsToEnabled() { assertThat(invokeBooleanGetter(new LmdbStoreConfig(), "isLftjEnabled")).isTrue(); } + @Test + void lftjCodegenEnabledDefaultsToEnabled() { + assertThat(invokeBooleanGetter(new LmdbStoreConfig(), "isLftjCodegenEnabled")).isTrue(); + } + @ParameterizedTest @ValueSource(booleans = { true, false }) void testThatLmdbStoreConfigParseAndExportNoReadahead(final boolean noReadahead) { @@ -89,6 +96,18 @@ void testThatLmdbStoreConfigParseAndExportLftjEnabled(final boolean lftjEnabled) ); } + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void testThatLmdbStoreConfigParseAndExportLftjCodegenEnabled(final boolean lftjCodegenEnabled) { + testParseAndExportReflective( + LFTJ_CODEGEN_ENABLED, + Values.literal(lftjCodegenEnabled), + "isLftjCodegenEnabled", + lftjCodegenEnabled, + !lftjCodegenEnabled + ); + } + @ParameterizedTest @ValueSource(longs = { 1, 205454, 0, -1231 }) void testThatLmdbStoreConfigParseAndExportValueEvictionInterval(final long valueEvictionInterval) { From e0e8b687b0a04a5f5a36af3aa32c2cbbf4153d9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 17:57:41 +0200 Subject: [PATCH 17/32] wip --- .../rdf4j/sail/lmdb/LmdbLftjBindingState.java | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java index a4f9fdff10..16d8cb438d 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java @@ -14,7 +14,9 @@ import java.util.HashMap; import java.util.IdentityHashMap; import java.util.Map; +import java.util.function.BiConsumer; +import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.MutableBindingSet; import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; @@ -32,6 +34,8 @@ public final class LmdbLftjBindingState { private final long[] assignedValues; private final boolean[] assignedPresent; private final IdentityHashMap constantIds = new IdentityHashMap<>(); + private BiConsumer[] bindingSetters; + private QueryEvaluationContext bindingSettersContext; private TxnManager.Txn txn; @@ -145,10 +149,10 @@ long fixedId(LmdbLftjPatternPlan.TermRef term) { BindingSet materialize(QueryEvaluationContext context) { MutableBindingSet result = context.createBindingSet(inputBindings); + BiConsumer[] setters = bindingSetters(context); for (int slot = 0; slot < variableNames.length; slot++) { - String variableName = variableNames[slot]; - if (!result.hasBinding(variableName) && isBound(slot)) { - context.setBinding(variableName).accept(queryAccess.resolveValue(value(slot)), result); + if (assignedPresent[slot]) { + setters[slot].accept(queryAccess.resolveValue(assignedValues[slot]), result); } } return result; @@ -169,6 +173,20 @@ void close() { } } + @SuppressWarnings("unchecked") + private BiConsumer[] bindingSetters(QueryEvaluationContext context) { + if (bindingSetters != null && bindingSettersContext == context) { + return bindingSetters; + } + BiConsumer[] setters = new BiConsumer[variableNames.length]; + for (int slot = 0; slot < variableNames.length; slot++) { + setters[slot] = context.setBinding(variableNames[slot]); + } + bindingSetters = setters; + bindingSettersContext = context; + return setters; + } + private int slot(String variableName) { Integer slot = variableSlots.get(variableName); if (slot == null) { From 8cc530fb93d355cd4118473ee7d3f55473e79884 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 20:22:03 +0200 Subject: [PATCH 18/32] fastest yet, with codegen --- ...actLmdbFullStackCompiledLftjIteration.java | 106 ++ .../rdf4j/sail/lmdb/LmdbCachedFrontier.java | 7 +- .../sail/lmdb/LmdbDerivedBinaryRelation.java | 22 +- .../rdf4j/sail/lmdb/LmdbLftjBindingState.java | 24 +- .../sail/lmdb/LmdbLftjCodegenCompiler.java | 27 +- .../sail/lmdb/LmdbLftjExecutionShape.java | 169 ++ .../rdf4j/sail/lmdb/LmdbLftjExecutor.java | 27 +- .../lmdb/LmdbLftjFullCodegenCompiler.java | 1421 +++++++++++++++++ .../rdf4j/sail/lmdb/LmdbLftjMetrics.java | 20 +- .../lmdb/LmdbLftjTieredCodegenCompiler.java | 29 + .../rdf4j/sail/lmdb/LmdbStoreConnection.java | 11 +- .../org/eclipse/rdf4j/sail/lmdb/LmdbUtil.java | 4 +- .../org/eclipse/rdf4j/sail/lmdb/Pool.java | 16 +- .../eclipse/rdf4j/sail/lmdb/TripleStore.java | 8 +- .../eclipse/rdf4j/sail/lmdb/TxnManager.java | 10 +- .../rdf4j/sail/lmdb/LmdbBenchmarkStore.java | 52 + .../sail/lmdb/LmdbLftjBenchmarkMode.java | 57 + .../rdf4j/sail/lmdb/LmdbLftjCodegenTest.java | 388 ++++- .../sail/lmdb/LmdbLftjExecutorBenchmark.java | 153 +- .../lmdb/LmdbLftjMetricsDiagnosticTest.java | 196 +++ .../sail/lmdb/LmdbLftjSyntheticScenario.java | 14 +- .../benchmark/FoafCliqueQueryBenchmark.java | 23 +- .../FoafCliqueQueryBenchmarkResults.md | 24 +- 23 files changed, 2681 insertions(+), 127 deletions(-) create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbFullStackCompiledLftjIteration.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTieredCodegenCompiler.java create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbBenchmarkStore.java create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBenchmarkMode.java create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetricsDiagnosticTest.java diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbFullStackCompiledLftjIteration.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbFullStackCompiledLftjIteration.java new file mode 100644 index 0000000000..d64efafeab --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbFullStackCompiledLftjIteration.java @@ -0,0 +1,106 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import org.eclipse.rdf4j.common.iteration.LookAheadIteration; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.QueryEvaluationException; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; + +public abstract class AbstractLmdbFullStackCompiledLftjIteration extends LookAheadIteration { + + private final LmdbLftjPlan plan; + private final LmdbLftjExecutionShape shape; + private final LmdbLftjBindingState state; + private final QueryEvaluationContext context; + private final LmdbQueryAccess queryAccess; + private final LmdbLftjMetrics metrics; + + private BindingSet repeatedBinding; + private long repeatedCount; + + protected AbstractLmdbFullStackCompiledLftjIteration(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, + LmdbLftjBindingState state, QueryEvaluationContext context, LmdbQueryAccess queryAccess, + LmdbLftjMetrics metrics) { + this.plan = plan; + this.shape = shape; + this.state = state; + this.context = context; + this.queryAccess = queryAccess; + this.metrics = metrics; + } + + @Override + protected final BindingSet getNextElement() { + if (repeatedCount > 0) { + repeatedCount--; + metrics.recordEmitted(1); + return repeatedBinding; + } + + try { + return computeNextElement(); + } catch (RuntimeException e) { + throw new QueryEvaluationException("LMDB LFTJ full-stack compiled iteration failed", e); + } + } + + @Override + protected final void handleClose() { + closeResources(); + state.close(); + } + + protected abstract BindingSet computeNextElement(); + + protected abstract void closeResources(); + + protected final LmdbLftjPlan plan() { + return plan; + } + + protected final LmdbLftjExecutionShape shape() { + return shape; + } + + protected final LmdbLftjBindingState state() { + return state; + } + + protected final QueryEvaluationContext context() { + return context; + } + + protected final LmdbQueryAccess queryAccess() { + return queryAccess; + } + + protected final LmdbLftjMetrics metrics() { + return metrics; + } + + protected final void recordCandidateScan() { + metrics.recordCandidateScan(); + } + + protected final void recordWitnessScan() { + metrics.recordWitnessScan(); + } + + protected final BindingSet emitCurrent(long multiplicity) { + BindingSet result = state.materialize(context); + repeatedBinding = result; + repeatedCount = multiplicity - 1; + metrics.recordEmitted(1); + return result; + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedFrontier.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedFrontier.java index 9103aa68e0..229a124af1 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedFrontier.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedFrontier.java @@ -15,7 +15,8 @@ final class LmdbCachedFrontier { - static final LmdbCachedFrontier EMPTY = new LmdbCachedFrontier(new long[0], null); + static final long[] EMPTY_VALUES = new long[0]; + static final LmdbCachedFrontier EMPTY = new LmdbCachedFrontier(EMPTY_VALUES, null); private final long[] values; private final long[] counts; @@ -33,6 +34,10 @@ int size() { return values.length; } + long[] values() { + return values; + } + long valueAt(int index) { return values[index]; } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbDerivedBinaryRelation.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbDerivedBinaryRelation.java index 4a3e10ffbc..838e22c7ef 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbDerivedBinaryRelation.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbDerivedBinaryRelation.java @@ -15,14 +15,14 @@ import java.util.Map; import java.util.Objects; -final class LmdbDerivedBinaryRelation { +public final class LmdbDerivedBinaryRelation { private final int sourceComponent; private final int targetComponent; private final LmdbCachedFrontier rootFrontier; private final Map adjacency; - LmdbDerivedBinaryRelation(int sourceComponent, int targetComponent, LmdbCachedFrontier rootFrontier, + public LmdbDerivedBinaryRelation(int sourceComponent, int targetComponent, LmdbCachedFrontier rootFrontier, Map adjacency) { this.sourceComponent = sourceComponent; this.targetComponent = targetComponent; @@ -42,15 +42,23 @@ LmdbCachedFrontier rootFrontier() { return rootFrontier; } + public long[] rootFrontierValues() { + return rootFrontier.values(); + } + LmdbCachedFrontier frontier(long sourceValue) { return adjacency.getOrDefault(sourceValue, LmdbCachedFrontier.EMPTY); } - long count(long sourceValue, long targetValue) { + public long[] frontierValues(long sourceValue) { + return frontier(sourceValue).values(); + } + + public long count(long sourceValue, long targetValue) { return frontier(sourceValue).countFor(targetValue); } - static final class Builder { + public static final class Builder { private final int sourceComponent; private final int targetComponent; @@ -65,12 +73,12 @@ static final class Builder { private long currentTarget; private long currentCount; - Builder(int sourceComponent, int targetComponent) { + public Builder(int sourceComponent, int targetComponent) { this.sourceComponent = sourceComponent; this.targetComponent = targetComponent; } - void add(long sourceValue, long targetValue) { + public void add(long sourceValue, long targetValue) { if (!sourceOpen || sourceValue != currentSource) { finishPair(); finishSource(); @@ -90,7 +98,7 @@ void add(long sourceValue, long targetValue) { currentCount++; } - LmdbDerivedBinaryRelation build() { + public LmdbDerivedBinaryRelation build() { finishPair(); finishSource(); return new LmdbDerivedBinaryRelation(sourceComponent, targetComponent, diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java index 16d8cb438d..99c571cf4e 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java @@ -54,7 +54,7 @@ public final class LmdbLftjBindingState { } } - boolean initialize() { + public boolean initialize() { for (LmdbLftjPatternPlan patternPlan : plan.patternPlans()) { for (LmdbLftjPatternPlan.TermRef term : patternPlan.terms()) { if (term.isConstant() && !constantIds.containsKey(term)) { @@ -83,11 +83,11 @@ boolean initialize() { return true; } - void attachTxn(TxnManager.Txn txn) { + public void attachTxn(TxnManager.Txn txn) { this.txn = txn; } - TxnManager.Txn txn() { + public TxnManager.Txn txn() { return txn; } @@ -95,11 +95,11 @@ boolean isBound(String variableName) { return isBound(slot(variableName)); } - boolean isBound(int slot) { + public boolean isBound(int slot) { return assignedPresent[slot] || fixedPresent[slot]; } - boolean isFixed(int slot) { + public boolean isFixed(int slot) { return fixedPresent[slot]; } @@ -107,7 +107,7 @@ long value(String variableName) { return value(slot(variableName)); } - long value(int slot) { + public long value(int slot) { if (assignedPresent[slot]) { return assignedValues[slot]; } @@ -147,7 +147,7 @@ long fixedId(LmdbLftjPatternPlan.TermRef term) { return -1; } - BindingSet materialize(QueryEvaluationContext context) { + public BindingSet materialize(QueryEvaluationContext context) { MutableBindingSet result = context.createBindingSet(inputBindings); BiConsumer[] setters = bindingSetters(context); for (int slot = 0; slot < variableNames.length; slot++) { @@ -158,21 +158,25 @@ BindingSet materialize(QueryEvaluationContext context) { return result; } - int variableCount() { + public int variableCount() { return variableNames.length; } - String variableName(int slot) { + public String variableName(int slot) { return variableNames[slot]; } - void close() { + public void close() { if (txn != null) { queryAccess.releaseReadTxn(txn); txn = null; } } + public long fixedIdForComponent(int patternOrdinal, int component) { + return fixedId(plan.patternPlans().get(patternOrdinal).termForComponent(component)); + } + @SuppressWarnings("unchecked") private BiConsumer[] bindingSetters(QueryEvaluationContext context) { if (bindingSetters != null && bindingSettersContext == context) { diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCompiler.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCompiler.java index 019ba74635..2a6d0cfbe5 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCompiler.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCompiler.java @@ -22,9 +22,21 @@ class LmdbLftjCodegenCompiler { private static final AtomicLong CLASS_COUNTER = new AtomicLong(); private static final String PACKAGE_NAME = LmdbLftjCodegenCompiler.class.getPackageName(); + String cacheKey(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred) { + return plan.executionKey(); + } + LmdbCompiledLftjFactory compile(LmdbLftjPlan plan, LmdbLftjExecutionShape shape) { String simpleClassName = "GeneratedLmdbLftjFactory" + CLASS_COUNTER.incrementAndGet(); - String source = new SourceBuilder(simpleClassName, shape).build(); + String source = sourceFor(simpleClassName, plan, shape, false); + return compileSource(plan.executionKey(), simpleClassName, source); + } + + LmdbCompiledLftjFactory compile(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred) { + return compile(plan, shape); + } + + protected final LmdbCompiledLftjFactory compileSource(String executionKey, String simpleClassName, String source) { try { SimpleCompiler compiler = new SimpleCompiler(); compiler.setParentClassLoader(LmdbLftjCodegenCompiler.class.getClassLoader()); @@ -34,11 +46,20 @@ LmdbCompiledLftjFactory compile(LmdbLftjPlan plan, LmdbLftjExecutionShape shape) constructor.setAccessible(true); return (LmdbCompiledLftjFactory) constructor.newInstance(); } catch (Exception e) { - throw new IllegalArgumentException("Unable to compile LMDB LFTJ codegen for " + plan.executionKey(), e); + throw new IllegalArgumentException("Unable to compile LMDB LFTJ codegen for " + executionKey, e); } } - private static final class SourceBuilder { + String sourceFor(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred) { + return sourceFor("GeneratedLmdbLftjSource", plan, shape, includeInferred); + } + + protected String sourceFor(String simpleClassName, LmdbLftjPlan plan, LmdbLftjExecutionShape shape, + boolean includeInferred) { + return new SourceBuilder(simpleClassName, shape).build(); + } + + protected static final class SourceBuilder { private final String simpleClassName; private final LmdbLftjExecutionShape shape; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java index 7c0d8e6b17..a6be5798db 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java @@ -12,20 +12,38 @@ package org.eclipse.rdf4j.sail.lmdb; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; public final class LmdbLftjExecutionShape { + private static final int FULL_STACK_TEMPLATE_VERSION = 2; + private final int variableCount; private final int[][] cursorOrdinalsBySlot; private final int patternCount; + private final String[] variableNames; + private final PatternShape[] patterns; LmdbLftjExecutionShape(LmdbLftjPlan plan) { this.variableCount = plan.variableOrder().size(); this.patternCount = plan.patternCount(); + this.variableNames = plan.variableOrder().toArray(new String[0]); this.cursorOrdinalsBySlot = new int[variableCount][]; + this.patterns = new PatternShape[patternCount]; + Map slotsByVariable = new HashMap<>(variableCount); + for (int slot = 0; slot < variableCount; slot++) { + slotsByVariable.put(variableNames[slot], slot); + } List variableOrder = plan.variableOrder(); List patternPlans = plan.patternPlans(); + for (int patternOrdinal = 0; patternOrdinal < patternPlans.size(); patternOrdinal++) { + patterns[patternOrdinal] = new PatternShape(patternOrdinal, patternPlans.get(patternOrdinal), + slotsByVariable, + variableCount); + } for (int slot = 0; slot < variableCount; slot++) { String variableName = variableOrder.get(slot); List cursorOrdinals = new ArrayList<>(); @@ -49,4 +67,155 @@ int[] cursorOrdinals(int slot) { int patternCount() { return patternCount; } + + String variableName(int slot) { + return variableNames[slot]; + } + + int templateVersion() { + return FULL_STACK_TEMPLATE_VERSION; + } + + PatternShape pattern(int patternOrdinal) { + return patterns[patternOrdinal]; + } + + static final class PatternShape { + + private final int ordinal; + private final String indexName; + private final char[] indexFields; + private final int[] indexComponents; + private final int[] slotByComponent = new int[4]; + private final boolean[] constantByComponent = new boolean[4]; + private final boolean[] hiddenByComponent = new boolean[4]; + private final int[] visibleSlots; + private final int[] keyFieldIndexBySlot; + private final int[] componentBySlot; + private final boolean hasHiddenTerms; + private final boolean derivedBinaryRelation; + private final int derivedSourceComponent; + private final int derivedTargetComponent; + + private PatternShape(int ordinal, LmdbLftjPatternPlan plan, Map slotsByVariable, + int variableCount) { + this.ordinal = ordinal; + this.indexName = plan.indexName(); + this.indexFields = plan.indexName().toCharArray(); + this.indexComponents = new int[indexFields.length]; + Arrays.fill(slotByComponent, -1); + this.keyFieldIndexBySlot = new int[variableCount]; + Arrays.fill(keyFieldIndexBySlot, -1); + this.componentBySlot = new int[variableCount]; + Arrays.fill(componentBySlot, -1); + + List visibleSlots = new ArrayList<>(); + boolean hidden = false; + for (LmdbLftjPatternPlan.TermRef term : plan.terms()) { + int component = term.component(); + if (term.isVisible()) { + Integer slot = slotsByVariable.get(term.name()); + if (slot == null) { + throw new IllegalArgumentException( + "Unknown LMDB LFTJ variable in execution shape: " + term.name()); + } + slotByComponent[component] = slot; + componentBySlot[slot] = component; + visibleSlots.add(slot); + } else if (term.isConstant()) { + constantByComponent[component] = true; + } else if (term.isHidden()) { + hiddenByComponent[component] = true; + hidden = true; + } + } + this.visibleSlots = visibleSlots.stream().mapToInt(Integer::intValue).toArray(); + this.hasHiddenTerms = hidden; + for (int i = 0; i < indexFields.length; i++) { + indexComponents[i] = componentFor(indexFields[i]); + int slot = slotByComponent[indexComponents[i]]; + if (slot >= 0) { + keyFieldIndexBySlot[slot] = i; + } + } + this.derivedBinaryRelation = plan.canUseDerivedBinaryRelation(); + this.derivedSourceComponent = derivedBinaryRelation ? plan.keyTerm(1).component() : -1; + this.derivedTargetComponent = derivedBinaryRelation ? plan.keyTerm(2).component() : -1; + } + + int ordinal() { + return ordinal; + } + + String indexName() { + return indexName; + } + + char[] indexFields() { + return indexFields.clone(); + } + + int indexComponent(int keyFieldIndex) { + return indexComponents[keyFieldIndex]; + } + + int slotForComponent(int component) { + return slotByComponent[component]; + } + + boolean isConstantComponent(int component) { + return constantByComponent[component]; + } + + boolean isHiddenComponent(int component) { + return hiddenByComponent[component]; + } + + int[] visibleSlots() { + return visibleSlots.clone(); + } + + boolean containsSlot(int slot) { + return componentBySlot[slot] >= 0; + } + + int componentForSlot(int slot) { + return componentBySlot[slot]; + } + + int keyFieldIndexForSlot(int slot) { + return keyFieldIndexBySlot[slot]; + } + + boolean hasHiddenTerms() { + return hasHiddenTerms; + } + + boolean derivedBinaryRelation() { + return derivedBinaryRelation; + } + + int derivedSourceComponent() { + return derivedSourceComponent; + } + + int derivedTargetComponent() { + return derivedTargetComponent; + } + + private static int componentFor(char field) { + switch (field) { + case 's': + return TripleStore.SUBJ_IDX; + case 'p': + return TripleStore.PRED_IDX; + case 'o': + return TripleStore.OBJ_IDX; + case 'c': + return TripleStore.CONTEXT_IDX; + default: + throw new IllegalArgumentException("Unknown LMDB field: " + field); + } + } + } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java index b6b7427941..6bdb3f7186 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java @@ -50,8 +50,8 @@ private CloseableIteration evaluate(LmdbLftjPlan plan, LmdbLftjExecu return fallback.evaluate(bindings); } - LmdbCompiledLftjFactory compiledFactory = compiledFactory(queryAccess, plan, shape); try { + LmdbCompiledLftjFactory compiledFactory = compiledFactory(queryAccess, plan, shape); state.attachTxn(queryAccess.acquireReadTxn()); if (compiledFactory != null) { return compiledFactory.create(plan, shape, state, context, queryAccess, new LmdbLftjMetrics()); @@ -69,19 +69,32 @@ private LmdbCompiledLftjFactory compiledFactory(LmdbQueryAccess queryAccess, Lmd return null; } - LmdbLftjCodegenCache.CacheEntry cached = queryAccess.cachedCompiledPlan(plan.executionKey()); + LmdbLftjCodegenCompiler compiler = queryAccess.codegenCompiler(); + String cacheKey = compiler.cacheKey(plan, shape, queryAccess.includeInferred()); + LmdbLftjCodegenCache.CacheEntry cached = queryAccess.cachedCompiledPlan(cacheKey); if (cached != null) { - return cached.compiled() ? cached.factory() : null; + if (cached.compiled()) { + return cached.factory(); + } + throw codegenFailure(cacheKey, cached.failureMessage(), null); } try { - LmdbCompiledLftjFactory factory = queryAccess.codegenCompiler().compile(plan, shape); - queryAccess.cacheCompiledPlanSuccess(plan.executionKey(), factory); + LmdbCompiledLftjFactory factory = compiler.compile(plan, shape, queryAccess.includeInferred()); + queryAccess.cacheCompiledPlanSuccess(cacheKey, factory); return factory; } catch (RuntimeException e) { - queryAccess.cacheCompiledPlanFailure(plan.executionKey(), e.getMessage()); - return null; + queryAccess.cacheCompiledPlanFailure(cacheKey, e.getMessage()); + throw codegenFailure(cacheKey, e.getMessage(), e); + } + } + + private IllegalStateException codegenFailure(String cacheKey, String message, RuntimeException cause) { + String detail = message == null || message.isBlank() ? "" : message; + if (cause == null) { + return new IllegalStateException("LMDB LFTJ codegen failed for " + cacheKey + ": " + detail); } + return new IllegalStateException("LMDB LFTJ codegen failed for " + cacheKey + ": " + detail, cause); } private final class LmdbLftjIteration extends LookAheadIteration { diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java new file mode 100644 index 0000000000..ad788a3bfa --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java @@ -0,0 +1,1421 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +final class LmdbLftjFullCodegenCompiler extends LmdbLftjCodegenCompiler { + + static final LmdbLftjFullCodegenCompiler INSTANCE = new LmdbLftjFullCodegenCompiler(); + + @Override + String cacheKey(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred) { + return "full-stack:v" + shape.templateVersion() + ":includeInferred=" + includeInferred + ":" + + plan.executionKey(); + } + + @Override + protected String sourceFor(String simpleClassName, LmdbLftjPlan plan, LmdbLftjExecutionShape shape, + boolean includeInferred) { + return new SourceBuilder(simpleClassName, plan, shape, includeInferred).build(); + } + + @Override + LmdbCompiledLftjFactory compile(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred) { + String simpleClassName = "GeneratedLmdbFullStackLftjFactory" + System.nanoTime(); + String source = sourceFor(simpleClassName, plan, shape, includeInferred); + return compileSource(cacheKey(plan, shape, includeInferred), simpleClassName, source); + } + + private static final class SourceBuilder { + + private static final String PACKAGE_NAME = LmdbLftjCodegenCompiler.class.getPackageName(); + + private final String simpleClassName; + private final LmdbLftjPlan plan; + private final LmdbLftjExecutionShape shape; + private final boolean includeInferred; + private final int[] relationGroupByPattern; + private final RelationGroup[] relationGroups; + + private SourceBuilder(String simpleClassName, LmdbLftjPlan plan, LmdbLftjExecutionShape shape, + boolean includeInferred) { + this.simpleClassName = simpleClassName; + this.plan = plan; + this.shape = shape; + this.includeInferred = includeInferred; + this.relationGroupByPattern = new int[shape.patternCount()]; + for (int i = 0; i < relationGroupByPattern.length; i++) { + relationGroupByPattern[i] = -1; + } + this.relationGroups = collectRelationGroups(); + } + + private String build() { + StringBuilder source = new StringBuilder(); + source.append("package ").append(PACKAGE_NAME).append(";\n\n"); + source.append("import static org.eclipse.rdf4j.sail.lmdb.LmdbUtil.E;\n"); + source.append("import static org.lwjgl.util.lmdb.LMDB.MDB_NEXT;\n"); + source.append("import static org.lwjgl.util.lmdb.LMDB.MDB_SET_RANGE;\n"); + source.append("import static org.lwjgl.util.lmdb.LMDB.MDB_SUCCESS;\n"); + source.append("import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_close;\n"); + source.append("import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_get;\n"); + source.append("import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_open;\n"); + source.append("import java.nio.ByteBuffer;\n"); + source.append("import org.eclipse.rdf4j.common.concurrent.locks.StampedLongAdderLockManager;\n"); + source.append("import org.eclipse.rdf4j.common.iteration.CloseableIteration;\n"); + source.append("import org.eclipse.rdf4j.query.BindingSet;\n"); + source.append("import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext;\n"); + source.append("import org.eclipse.rdf4j.sail.SailException;\n"); + source.append("import org.lwjgl.PointerBuffer;\n"); + source.append("import org.lwjgl.system.MemoryStack;\n"); + source.append("import org.lwjgl.util.lmdb.MDBVal;\n\n"); + source.append("final class ").append(simpleClassName).append(" implements LmdbCompiledLftjFactory {\n"); + appendFactoryMethod(source); + appendIterationClass(source); + source.append("}\n"); + return source.toString(); + } + + private void appendFactoryMethod(StringBuilder source) { + source.append(" @Override\n"); + source.append( + " public CloseableIteration create(LmdbLftjPlan plan, LmdbLftjExecutionShape shape,\n"); + source.append( + " LmdbLftjBindingState state, QueryEvaluationContext context, LmdbQueryAccess queryAccess,\n"); + source.append(" LmdbLftjMetrics metrics) {\n"); + source.append(" return new Iteration(plan, shape, state, context, queryAccess, metrics);\n"); + source.append(" }\n\n"); + } + + private void appendIterationClass(StringBuilder source) { + int variableCount = shape.variableCount(); + source.append( + " private static final class Iteration extends AbstractLmdbFullStackCompiledLftjIteration {\n"); + source.append(" private final Pool pool;\n"); + source.append(" private final TxnManager.Txn txnRef;\n"); + source.append(" private final long txn;\n"); + source.append(" private final StampedLongAdderLockManager txnLockManager;\n"); + source.append(" private final TripleStore tripleStore;\n"); + for (int patternOrdinal = 0; patternOrdinal < shape.patternCount(); patternOrdinal++) { + appendPatternFields(source, patternOrdinal); + } + for (RelationGroup relationGroup : relationGroups) { + appendRelationGroupFields(source, relationGroup.groupId); + } + for (int slot = 0; slot < variableCount; slot++) { + source.append(" private boolean depth").append(slot).append("Initialized;\n"); + source.append(" private boolean depth").append(slot).append("Advance;\n"); + } + source.append(" private int depth;\n\n"); + + source.append( + " private Iteration(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, LmdbLftjBindingState state,\n"); + source.append( + " QueryEvaluationContext context, LmdbQueryAccess queryAccess, LmdbLftjMetrics metrics) {\n"); + source.append(" super(plan, shape, state, context, queryAccess, metrics);\n"); + source.append(" this.pool = Pool.get();\n"); + source.append(" this.txnRef = state.txn();\n"); + source.append(" this.txn = txnRef.get();\n"); + source.append(" this.txnLockManager = txnRef.lockManager();\n"); + source.append(" this.tripleStore = queryAccess.tripleStore();\n"); + source.append(" if (tripleStore == null) {\n"); + source.append( + " throw new IllegalStateException(\"LMDB full-stack codegen requires a real TripleStore\");\n"); + source.append(" }\n"); + for (int patternOrdinal = 0; patternOrdinal < shape.patternCount(); patternOrdinal++) { + appendPatternConstructor(source, patternOrdinal); + } + source.append(" this.depth = firstDepth();\n"); + source.append(" }\n\n"); + + appendComputeNextElement(source, variableCount); + appendFirstDepth(source, variableCount); + appendBacktrackAfterLeaf(source, variableCount); + appendBacktrackFromDepth(source, variableCount); + for (int slot = 0; slot < variableCount; slot++) { + appendReleaseDepth(source, slot); + appendPositionDepth(source, slot, shape.cursorOrdinals(slot)); + } + for (RelationGroup relationGroup : relationGroups) { + appendRelationGroupAccessor(source, relationGroup); + appendRelationGroupCacheMethods(source, relationGroup.groupId); + } + for (int patternOrdinal = 0; patternOrdinal < shape.patternCount(); patternOrdinal++) { + appendPatternMethods(source, patternOrdinal); + } + appendCloseResources(source); + appendHelpers(source); + source.append(" }\n"); + } + + private void appendPatternFields(StringBuilder source, int patternOrdinal) { + LmdbLftjExecutionShape.PatternShape patternShape = shape.pattern(patternOrdinal); + source.append(" private final TripleStore.TripleIndex index").append(patternOrdinal).append(";\n"); + if (patternShape.derivedBinaryRelation()) { + for (int slot : patternShape.visibleSlots()) { + source.append(" private long[] frontierValues") + .append(patternOrdinal) + .append('_') + .append(slot) + .append(" = EMPTY_FRONTIER_VALUES;\n"); + source.append(" private int frontierIndex") + .append(patternOrdinal) + .append('_') + .append(slot) + .append(";\n"); + source.append(" private long frontierValue") + .append(patternOrdinal) + .append('_') + .append(slot) + .append(";\n"); + source.append(" private boolean frontierAvailable") + .append(patternOrdinal) + .append('_') + .append(slot) + .append(";\n"); + } + appendRawCursorFields(source, patternOrdinal, -1); + return; + } + for (int slot : patternShape.visibleSlots()) { + appendRawCursorFields(source, patternOrdinal, slot); + } + appendRawCursorFields(source, patternOrdinal, -1); + } + + private void appendRelationGroupFields(StringBuilder source, int groupId) { + source.append(" private LmdbDerivedBinaryRelation relationGroup").append(groupId).append(";\n"); + source.append(" private boolean relationGroup").append(groupId).append("Loaded;\n"); + source.append(" private long[] relationGroup").append(groupId).append("RootFrontierValues;\n"); + source.append(" private boolean relationGroup").append(groupId).append("RootFrontierLoaded;\n"); + source.append(" private final long[] relationGroup") + .append(groupId) + .append("FrontierKeys = new long[65536];\n"); + source.append(" private final long[][] relationGroup") + .append(groupId) + .append("FrontierValues = new long[65536][];\n"); + source.append(" private final boolean[] relationGroup") + .append(groupId) + .append("FrontierUsed = new boolean[65536];\n"); + source.append(" private final long[] relationGroup") + .append(groupId) + .append("CountSourceKeys = new long[65536];\n"); + source.append(" private final long[] relationGroup") + .append(groupId) + .append("CountTargetKeys = new long[65536];\n"); + source.append(" private final long[] relationGroup") + .append(groupId) + .append("CountValues = new long[65536];\n"); + source.append(" private final boolean[] relationGroup") + .append(groupId) + .append("CountUsed = new boolean[65536];\n"); + } + + private void appendRawCursorFields(StringBuilder source, int patternOrdinal, int slot) { + String suffix = slotSuffix(patternOrdinal, slot); + source.append(" private final int dbi").append(suffix).append("Explicit;\n"); + source.append(" private final MDBVal key").append(suffix).append("Explicit;\n"); + source.append(" private final MDBVal data").append(suffix).append("Explicit;\n"); + source.append(" private final ByteBuffer lower").append(suffix).append("Explicit;\n"); + source.append(" private final long cursor").append(suffix).append("Explicit;\n"); + source.append(" private boolean available").append(suffix).append("Explicit;\n"); + source.append(" private long subj").append(suffix).append("Explicit;\n"); + source.append(" private long pred").append(suffix).append("Explicit;\n"); + source.append(" private long obj").append(suffix).append("Explicit;\n"); + source.append(" private long ctx").append(suffix).append("Explicit;\n"); + if (includeInferred) { + source.append(" private final int dbi").append(suffix).append("Inferred;\n"); + source.append(" private final MDBVal key").append(suffix).append("Inferred;\n"); + source.append(" private final MDBVal data").append(suffix).append("Inferred;\n"); + source.append(" private final ByteBuffer lower").append(suffix).append("Inferred;\n"); + source.append(" private final long cursor").append(suffix).append("Inferred;\n"); + source.append(" private boolean available").append(suffix).append("Inferred;\n"); + source.append(" private long subj").append(suffix).append("Inferred;\n"); + source.append(" private long pred").append(suffix).append("Inferred;\n"); + source.append(" private long obj").append(suffix).append("Inferred;\n"); + source.append(" private long ctx").append(suffix).append("Inferred;\n"); + } + if (slot >= 0) { + source.append(" private long current").append(suffix).append(";\n"); + source.append(" private boolean currentAvailable").append(suffix).append(";\n"); + } + } + + private void appendPatternConstructor(StringBuilder source, int patternOrdinal) { + LmdbLftjExecutionShape.PatternShape patternShape = shape.pattern(patternOrdinal); + source.append(" this.index") + .append(patternOrdinal) + .append(" = tripleStore.tripleIndex(\"") + .append(patternShape.indexName()) + .append("\");\n"); + if (patternShape.derivedBinaryRelation()) { + appendRawCursorConstructor(source, patternOrdinal, -1); + return; + } + for (int slot : patternShape.visibleSlots()) { + appendRawCursorConstructor(source, patternOrdinal, slot); + } + appendRawCursorConstructor(source, patternOrdinal, -1); + } + + private void appendRawCursorConstructor(StringBuilder source, int patternOrdinal, int slot) { + String suffix = slotSuffix(patternOrdinal, slot); + source.append(" this.dbi") + .append(suffix) + .append("Explicit = index") + .append(patternOrdinal) + .append(".getDB(true);\n"); + source.append(" this.key").append(suffix).append("Explicit = pool.getVal();\n"); + source.append(" this.data").append(suffix).append("Explicit = pool.getVal();\n"); + source.append(" this.lower").append(suffix).append("Explicit = pool.getKeyBuffer();\n"); + source.append(" this.cursor") + .append(suffix) + .append("Explicit = openCursor(dbi") + .append(suffix) + .append("Explicit);\n"); + if (includeInferred) { + source.append(" this.dbi") + .append(suffix) + .append("Inferred = index") + .append(patternOrdinal) + .append(".getDB(false);\n"); + source.append(" this.key").append(suffix).append("Inferred = pool.getVal();\n"); + source.append(" this.data").append(suffix).append("Inferred = pool.getVal();\n"); + source.append(" this.lower").append(suffix).append("Inferred = pool.getKeyBuffer();\n"); + source.append(" this.cursor") + .append(suffix) + .append("Inferred = openCursor(dbi") + .append(suffix) + .append("Inferred);\n"); + } + } + + private void appendComputeNextElement(StringBuilder source, int variableCount) { + source.append(" @Override\n"); + source.append(" protected BindingSet computeNextElement() {\n"); + source.append(" while (depth >= 0) {\n"); + source.append(" if (depth == ").append(variableCount).append(") {\n"); + source.append(" long multiplicity = 1L;\n"); + for (int patternOrdinal = 0; patternOrdinal < shape.patternCount(); patternOrdinal++) { + source.append(" long witnesses") + .append(patternOrdinal) + .append(" = countMatchesPattern") + .append(patternOrdinal) + .append("();\n"); + source.append(" if (witnesses").append(patternOrdinal).append(" == 0L) {\n"); + source.append(" backtrackAfterLeaf();\n"); + source.append(" continue;\n"); + source.append(" }\n"); + source.append(" multiplicity = Math.multiplyExact(multiplicity, witnesses") + .append(patternOrdinal) + .append(");\n"); + } + source.append(" backtrackAfterLeaf();\n"); + source.append(" if (multiplicity > 0L) {\n"); + source.append(" return emitCurrent(multiplicity);\n"); + source.append(" }\n"); + source.append(" continue;\n"); + source.append(" }\n"); + source.append(" switch (depth) {\n"); + for (int slot = 0; slot < variableCount; slot++) { + source.append(" case ").append(slot).append(":\n"); + source.append(" if (state().isFixed(").append(slot).append(")) {\n"); + source.append(" depth = ").append(slot + 1).append(";\n"); + source.append(" continue;\n"); + source.append(" }\n"); + source.append(" if (!depth").append(slot).append("Initialized) {\n"); + source.append(" if (!positionDepth").append(slot).append("(false)) {\n"); + source.append(" backtrackFromDepth(").append(slot).append(");\n"); + source.append(" continue;\n"); + source.append(" }\n"); + source.append(" depth").append(slot).append("Initialized = true;\n"); + source.append(" depth = ").append(slot + 1).append(";\n"); + source.append(" continue;\n"); + source.append(" }\n"); + source.append(" if (depth").append(slot).append("Advance) {\n"); + source.append(" if (!positionDepth").append(slot).append("(true)) {\n"); + source.append(" backtrackFromDepth(").append(slot).append(");\n"); + source.append(" continue;\n"); + source.append(" }\n"); + source.append(" depth").append(slot).append("Advance = false;\n"); + source.append(" depth = ").append(slot + 1).append(";\n"); + source.append(" continue;\n"); + source.append(" }\n"); + source.append(" depth = ").append(slot + 1).append(";\n"); + source.append(" continue;\n"); + } + source.append(" default:\n"); + source.append(" return null;\n"); + source.append(" }\n"); + source.append(" }\n"); + source.append(" return null;\n"); + source.append(" }\n\n"); + } + + private void appendFirstDepth(StringBuilder source, int variableCount) { + source.append(" private int firstDepth() {\n"); + for (int slot = 0; slot < variableCount; slot++) { + source.append(" if (!state().isFixed(").append(slot).append(")) {\n"); + source.append(" return ").append(slot).append(";\n"); + source.append(" }\n"); + } + source.append(" return ").append(variableCount).append(";\n"); + source.append(" }\n\n"); + } + + private void appendBacktrackAfterLeaf(StringBuilder source, int variableCount) { + source.append(" private void backtrackAfterLeaf() {\n"); + for (int slot = variableCount - 1; slot >= 0; slot--) { + source.append(" if (!state().isFixed(").append(slot).append(")) {\n"); + source.append(" depth").append(slot).append("Advance = true;\n"); + source.append(" depth = ").append(slot).append(";\n"); + source.append(" return;\n"); + source.append(" }\n"); + } + source.append(" depth = -1;\n"); + source.append(" }\n\n"); + } + + private void appendBacktrackFromDepth(StringBuilder source, int variableCount) { + source.append(" private void backtrackFromDepth(int failedDepth) {\n"); + source.append(" switch (failedDepth) {\n"); + for (int failedDepth = 0; failedDepth < variableCount; failedDepth++) { + source.append(" case ").append(failedDepth).append(":\n"); + source.append(" releaseDepth").append(failedDepth).append("();\n"); + for (int slot = failedDepth - 1; slot >= 0; slot--) { + source.append(" if (!state().isFixed(").append(slot).append(")) {\n"); + source.append(" depth").append(slot).append("Advance = true;\n"); + source.append(" depth = ").append(slot).append(";\n"); + source.append(" return;\n"); + source.append(" }\n"); + } + source.append(" depth = -1;\n"); + source.append(" return;\n"); + } + source.append(" default:\n"); + source.append(" depth = -1;\n"); + source.append(" }\n"); + source.append(" }\n\n"); + } + + private void appendReleaseDepth(StringBuilder source, int slot) { + source.append(" private void releaseDepth").append(slot).append("() {\n"); + source.append(" state().clear(").append(slot).append(");\n"); + source.append(" depth").append(slot).append("Initialized = false;\n"); + source.append(" depth").append(slot).append("Advance = false;\n"); + for (int patternOrdinal : shape.cursorOrdinals(slot)) { + LmdbLftjExecutionShape.PatternShape patternShape = shape.pattern(patternOrdinal); + if (patternShape.derivedBinaryRelation()) { + source.append(" frontierAvailable") + .append(patternOrdinal) + .append('_') + .append(slot) + .append(" = false;\n"); + source.append(" frontierValues") + .append(patternOrdinal) + .append('_') + .append(slot) + .append(" = EMPTY_FRONTIER_VALUES;\n"); + source.append(" frontierIndex") + .append(patternOrdinal) + .append('_') + .append(slot) + .append(" = 0;\n"); + } else { + source.append(" currentAvailable") + .append(slotSuffix(patternOrdinal, slot)) + .append(" = false;\n"); + } + } + source.append(" }\n\n"); + } + + private void appendPositionDepth(StringBuilder source, int slot, int[] cursorOrdinals) { + source.append(" private boolean positionDepth").append(slot).append("(boolean advanceExisting) {\n"); + source.append(" state().clear(").append(slot).append(");\n"); + if (cursorOrdinals.length == 0) { + source.append(" return false;\n"); + source.append(" }\n\n"); + return; + } + source.append(" if (!advanceExisting) {\n"); + for (int cursorOrdinal : cursorOrdinals) { + source.append(" recordCandidateScan();\n"); + source.append(" if (!openSlot").append(cursorOrdinal).append('_').append(slot).append("()) {\n"); + source.append(" return false;\n"); + source.append(" }\n"); + } + source.append(" } else if (!nextDistinctSlot") + .append(cursorOrdinals[0]) + .append('_') + .append(slot) + .append("()) {\n"); + source.append(" return false;\n"); + source.append(" }\n"); + if (cursorOrdinals.length == 1) { + source.append(" state().assign(") + .append(slot) + .append(", currentValue") + .append(cursorOrdinals[0]) + .append('_') + .append(slot) + .append("());\n"); + source.append(" return true;\n"); + source.append(" }\n\n"); + return; + } + source.append(" long current = currentValue") + .append(cursorOrdinals[0]) + .append('_') + .append(slot) + .append("();\n"); + for (int i = 1; i < cursorOrdinals.length; i++) { + source.append(" if (currentValue") + .append(cursorOrdinals[i]) + .append('_') + .append(slot) + .append("() > current) {\n"); + source.append(" current = currentValue") + .append(cursorOrdinals[i]) + .append('_') + .append(slot) + .append("();\n"); + source.append(" }\n"); + } + source.append(" while (true) {\n"); + source.append(" boolean allMatch = true;\n"); + source.append(" long max = current;\n"); + for (int cursorOrdinal : cursorOrdinals) { + source.append(" if (!seekSlot") + .append(cursorOrdinal) + .append('_') + .append(slot) + .append("(current)) {\n"); + source.append(" return false;\n"); + source.append(" }\n"); + source.append(" long value") + .append(cursorOrdinal) + .append(" = currentValue") + .append(cursorOrdinal) + .append('_') + .append(slot) + .append("();\n"); + source.append(" if (value").append(cursorOrdinal).append(" != current) {\n"); + source.append(" if (value").append(cursorOrdinal).append(" > max) {\n"); + source.append(" max = value").append(cursorOrdinal).append(";\n"); + source.append(" }\n"); + source.append(" allMatch = false;\n"); + source.append(" }\n"); + } + source.append(" if (allMatch) {\n"); + source.append(" state().assign(").append(slot).append(", current);\n"); + source.append(" return true;\n"); + source.append(" }\n"); + source.append(" current = max;\n"); + source.append(" }\n"); + source.append(" }\n\n"); + } + + private void appendPatternMethods(StringBuilder source, int patternOrdinal) { + LmdbLftjExecutionShape.PatternShape patternShape = shape.pattern(patternOrdinal); + if (patternShape.derivedBinaryRelation()) { + appendRelationMethods(source, patternOrdinal, patternShape); + } else { + for (int slot : patternShape.visibleSlots()) { + appendCandidateMethods(source, patternOrdinal, patternShape, slot); + } + appendWitnessMethods(source, patternOrdinal, patternShape); + } + if (includeInferred) { + appendCompareWitnessMethod(source, patternOrdinal); + } + } + + private void appendRelationMethods(StringBuilder source, int patternOrdinal, + LmdbLftjExecutionShape.PatternShape patternShape) { + appendLoadRelation(source, patternOrdinal, patternShape); + for (int slot : patternShape.visibleSlots()) { + appendRelationFrontierMethods(source, patternOrdinal, patternShape, slot); + } + appendRelationCountMethod(source, patternOrdinal, patternShape); + appendRelationWitnessMethods(source, patternOrdinal); + } + + private void appendRelationWitnessMethods(StringBuilder source, int patternOrdinal) { + appendAdvanceWitnessMethod(source, patternOrdinal, "Explicit"); + if (includeInferred) { + appendAdvanceWitnessMethod(source, patternOrdinal, "Inferred"); + } + } + + private void appendLoadRelation(StringBuilder source, int patternOrdinal, + LmdbLftjExecutionShape.PatternShape patternShape) { + source.append(" private LmdbDerivedBinaryRelation relation").append(patternOrdinal).append("() {\n"); + source.append(" return relationGroup").append(relationGroupByPattern[patternOrdinal]).append("();\n"); + source.append(" }\n\n"); + } + + private void appendRelationGroupAccessor(StringBuilder source, RelationGroup relationGroup) { + int patternOrdinal = relationGroup.representativePatternOrdinal; + LmdbLftjExecutionShape.PatternShape patternShape = shape.pattern(patternOrdinal); + int sourceComponent = patternShape.derivedSourceComponent(); + int targetComponent = patternShape.derivedTargetComponent(); + source.append(" private LmdbDerivedBinaryRelation relationGroup") + .append(relationGroup.groupId) + .append("() {\n"); + source.append(" if (relationGroup").append(relationGroup.groupId).append("Loaded) {\n"); + source.append(" return relationGroup").append(relationGroup.groupId).append(";\n"); + source.append(" }\n"); + source.append(" metrics().recordRelationLoad();\n"); + source.append(" LmdbDerivedBinaryRelation.Builder builder = new LmdbDerivedBinaryRelation.Builder(") + .append(sourceComponent) + .append(", ") + .append(targetComponent) + .append(");\n"); + appendWitnessSeek(source, patternOrdinal, patternShape, "0L"); + source.append(" while (available").append(slotSuffix(patternOrdinal, -1)).append("Explicit"); + if (includeInferred) { + source.append(" || available").append(slotSuffix(patternOrdinal, -1)).append("Inferred"); + } + source.append(") {\n"); + if (includeInferred) { + appendMergedWitnessRowSelection(source, patternOrdinal, patternShape, true); + } else { + source.append(" builder.add(") + .append(componentAccessor(patternOrdinal, -1, "Explicit", sourceComponent)) + .append(", ") + .append(componentAccessor(patternOrdinal, -1, "Explicit", targetComponent)) + .append(");\n"); + source.append(" advanceWitness").append(patternOrdinal).append("Explicit();\n"); + } + source.append(" }\n"); + source.append(" relationGroup").append(relationGroup.groupId).append(" = builder.build();\n"); + source.append(" relationGroup").append(relationGroup.groupId).append("Loaded = true;\n"); + source.append(" return relationGroup").append(relationGroup.groupId).append(";\n"); + source.append(" }\n\n"); + } + + private void appendRelationFrontierMethods(StringBuilder source, int patternOrdinal, + LmdbLftjExecutionShape.PatternShape patternShape, int slot) { + int component = patternShape.componentForSlot(slot); + int sourceSlot = patternShape.slotForComponent(patternShape.derivedSourceComponent()); + int groupId = relationGroupByPattern[patternOrdinal]; + String suffix = patternOrdinal + "_" + slot; + source.append(" private boolean openSlot").append(suffix).append("() {\n"); + source.append(" return seekSlot").append(suffix).append("(0L);\n"); + source.append(" }\n\n"); + source.append(" private boolean seekSlot").append(suffix).append("(long target) {\n"); + if (component == patternShape.derivedSourceComponent()) { + source.append(" frontierValues") + .append(suffix) + .append(" = rootFrontierValuesForRelationGroup") + .append(groupId) + .append("();\n"); + } else { + source.append(" long sourceValue = state().value(").append(sourceSlot).append(");\n"); + source.append(" frontierValues") + .append(suffix) + .append(" = frontierValuesForRelationGroup") + .append(groupId) + .append("(sourceValue);\n"); + } + source.append(" frontierIndex") + .append(suffix) + .append(" = seekFrontier(frontierValues") + .append(suffix) + .append(", target);\n"); + source.append(" if (frontierIndex") + .append(suffix) + .append(" >= frontierValues") + .append(suffix) + .append(".length) {\n"); + source.append(" frontierAvailable").append(suffix).append(" = false;\n"); + source.append(" return false;\n"); + source.append(" }\n"); + source.append(" frontierValue") + .append(suffix) + .append(" = frontierValues") + .append(suffix) + .append("[frontierIndex") + .append(suffix) + .append("];\n"); + source.append(" frontierAvailable").append(suffix).append(" = true;\n"); + source.append(" return true;\n"); + source.append(" }\n\n"); + source.append(" private boolean nextDistinctSlot").append(suffix).append("() {\n"); + source.append(" if (!frontierAvailable").append(suffix).append(") {\n"); + source.append(" return false;\n"); + source.append(" }\n"); + source.append(" frontierIndex").append(suffix).append("++;\n"); + source.append(" if (frontierIndex") + .append(suffix) + .append(" >= frontierValues") + .append(suffix) + .append(".length) {\n"); + source.append(" frontierAvailable").append(suffix).append(" = false;\n"); + source.append(" return false;\n"); + source.append(" }\n"); + source.append(" frontierValue") + .append(suffix) + .append(" = frontierValues") + .append(suffix) + .append("[frontierIndex") + .append(suffix) + .append("];\n"); + source.append(" return true;\n"); + source.append(" }\n\n"); + source.append(" private long currentValue").append(suffix).append("() {\n"); + source.append(" return frontierValue").append(suffix).append(";\n"); + source.append(" }\n\n"); + } + + private void appendRelationCountMethod(StringBuilder source, int patternOrdinal, + LmdbLftjExecutionShape.PatternShape patternShape) { + int sourceSlot = patternShape.slotForComponent(patternShape.derivedSourceComponent()); + int targetSlot = patternShape.slotForComponent(patternShape.derivedTargetComponent()); + int groupId = relationGroupByPattern[patternOrdinal]; + source.append(" private long countMatchesPattern").append(patternOrdinal).append("() {\n"); + source.append(" metrics().recordWitnessScan();\n"); + source.append(" long sourceValue = state().value(").append(sourceSlot).append(");\n"); + source.append(" long targetValue = state().value(").append(targetSlot).append(");\n"); + source.append(" return countForRelationGroup").append(groupId).append("(sourceValue, targetValue);\n"); + source.append(" }\n\n"); + } + + private void appendRelationGroupCacheMethods(StringBuilder source, int groupId) { + source.append(" private long[] rootFrontierValuesForRelationGroup").append(groupId).append("() {\n"); + source.append(" if (relationGroup").append(groupId).append("RootFrontierLoaded) {\n"); + source.append(" metrics().recordFrontierHit();\n"); + source.append(" return relationGroup").append(groupId).append("RootFrontierValues;\n"); + source.append(" }\n"); + source.append(" metrics().recordFrontierLoad();\n"); + source.append(" boolean relationLoaded = relationGroup").append(groupId).append("Loaded;\n"); + source.append(" relationGroup") + .append(groupId) + .append("RootFrontierValues = relationGroup") + .append(groupId) + .append("().rootFrontierValues();\n"); + source.append(" if (relationLoaded) {\n"); + source.append(" metrics().recordRelationHit();\n"); + source.append(" }\n"); + source.append(" metrics().recordRelationUse();\n"); + source.append(" relationGroup").append(groupId).append("RootFrontierLoaded = true;\n"); + source.append(" return relationGroup").append(groupId).append("RootFrontierValues;\n"); + source.append(" }\n\n"); + source.append(" private long[] frontierValuesForRelationGroup") + .append(groupId) + .append("(long sourceValue) {\n"); + source.append(" int mask = relationGroup").append(groupId).append("FrontierKeys.length - 1;\n"); + source.append(" int slot = mixFrontierCacheKey(sourceValue) & mask;\n"); + source.append(" int evictionSlot = slot;\n"); + source.append(" for (int probe = 0; probe < 8; probe++) {\n"); + source.append(" if (!relationGroup").append(groupId).append("FrontierUsed[slot]) {\n"); + source.append(" metrics().recordFrontierLoad();\n"); + source.append(" boolean relationLoaded = relationGroup").append(groupId).append("Loaded;\n"); + source.append(" long[] values = relationGroup") + .append(groupId) + .append("().frontierValues(sourceValue);\n"); + source.append(" if (relationLoaded) {\n"); + source.append(" metrics().recordRelationHit();\n"); + source.append(" }\n"); + source.append(" metrics().recordRelationUse();\n"); + source.append(" relationGroup").append(groupId).append("FrontierUsed[slot] = true;\n"); + source.append(" relationGroup").append(groupId).append("FrontierKeys[slot] = sourceValue;\n"); + source.append(" relationGroup").append(groupId).append("FrontierValues[slot] = values;\n"); + source.append(" return values;\n"); + source.append(" }\n"); + source.append(" if (relationGroup").append(groupId).append("FrontierKeys[slot] == sourceValue) {\n"); + source.append(" metrics().recordFrontierHit();\n"); + source.append(" return relationGroup").append(groupId).append("FrontierValues[slot];\n"); + source.append(" }\n"); + source.append(" slot = (slot + 1) & mask;\n"); + source.append(" }\n"); + source.append(" metrics().recordFrontierLoad();\n"); + source.append(" boolean relationLoaded = relationGroup").append(groupId).append("Loaded;\n"); + source.append(" long[] values = relationGroup") + .append(groupId) + .append("().frontierValues(sourceValue);\n"); + source.append(" if (relationLoaded) {\n"); + source.append(" metrics().recordRelationHit();\n"); + source.append(" }\n"); + source.append(" metrics().recordRelationUse();\n"); + source.append(" relationGroup").append(groupId).append("FrontierUsed[evictionSlot] = true;\n"); + source.append(" relationGroup").append(groupId).append("FrontierKeys[evictionSlot] = sourceValue;\n"); + source.append(" relationGroup").append(groupId).append("FrontierValues[evictionSlot] = values;\n"); + source.append(" return values;\n"); + source.append(" }\n\n"); + source.append(" private long countForRelationGroup") + .append(groupId) + .append("(long sourceValue, long targetValue) {\n"); + source.append(" int mask = relationGroup").append(groupId).append("CountSourceKeys.length - 1;\n"); + source.append(" int slot = mixCountCacheKey(sourceValue, targetValue) & mask;\n"); + source.append(" int evictionSlot = slot;\n"); + source.append(" for (int probe = 0; probe < 8; probe++) {\n"); + source.append(" if (!relationGroup").append(groupId).append("CountUsed[slot]) {\n"); + source.append(" metrics().recordCountLoad();\n"); + source.append(" boolean relationLoaded = relationGroup").append(groupId).append("Loaded;\n"); + source.append(" long count = relationGroup") + .append(groupId) + .append("().count(sourceValue, targetValue);\n"); + source.append(" if (relationLoaded) {\n"); + source.append(" metrics().recordRelationHit();\n"); + source.append(" }\n"); + source.append(" metrics().recordRelationUse();\n"); + source.append(" relationGroup").append(groupId).append("CountUsed[slot] = true;\n"); + source.append(" relationGroup").append(groupId).append("CountSourceKeys[slot] = sourceValue;\n"); + source.append(" relationGroup").append(groupId).append("CountTargetKeys[slot] = targetValue;\n"); + source.append(" relationGroup").append(groupId).append("CountValues[slot] = count;\n"); + source.append(" return count;\n"); + source.append(" }\n"); + source.append(" if (relationGroup").append(groupId).append("CountSourceKeys[slot] == sourceValue\n"); + source.append(" && relationGroup") + .append(groupId) + .append("CountTargetKeys[slot] == targetValue) {\n"); + source.append(" metrics().recordCountHit();\n"); + source.append(" return relationGroup").append(groupId).append("CountValues[slot];\n"); + source.append(" }\n"); + source.append(" slot = (slot + 1) & mask;\n"); + source.append(" }\n"); + source.append(" metrics().recordCountLoad();\n"); + source.append(" boolean relationLoaded = relationGroup").append(groupId).append("Loaded;\n"); + source.append(" long count = relationGroup") + .append(groupId) + .append("().count(sourceValue, targetValue);\n"); + source.append(" if (relationLoaded) {\n"); + source.append(" metrics().recordRelationHit();\n"); + source.append(" }\n"); + source.append(" metrics().recordRelationUse();\n"); + source.append(" relationGroup").append(groupId).append("CountUsed[evictionSlot] = true;\n"); + source.append(" relationGroup") + .append(groupId) + .append("CountSourceKeys[evictionSlot] = sourceValue;\n"); + source.append(" relationGroup") + .append(groupId) + .append("CountTargetKeys[evictionSlot] = targetValue;\n"); + source.append(" relationGroup").append(groupId).append("CountValues[evictionSlot] = count;\n"); + source.append(" return count;\n"); + source.append(" }\n\n"); + } + + private void appendCandidateMethods(StringBuilder source, int patternOrdinal, + LmdbLftjExecutionShape.PatternShape patternShape, int slot) { + String suffix = patternOrdinal + "_" + slot; + source.append(" private boolean openSlot").append(suffix).append("() {\n"); + source.append(" return seekSlot").append(suffix).append("(0L);\n"); + source.append(" }\n\n"); + source.append(" private boolean seekSlot").append(suffix).append("(long target) {\n"); + appendSeekCursorInvocation(source, patternOrdinal, patternShape, slot, "Explicit", "target"); + if (includeInferred) { + appendSeekCursorInvocation(source, patternOrdinal, patternShape, slot, "Inferred", "target"); + appendMergeDistinctCurrent(source, patternOrdinal, slot); + } else { + source.append(" currentAvailable") + .append(suffix) + .append(" = available") + .append(slotSuffix(patternOrdinal, slot)) + .append("Explicit;\n"); + source.append(" if (!currentAvailable").append(suffix).append(") {\n"); + source.append(" return false;\n"); + source.append(" }\n"); + source.append(" current") + .append(suffix) + .append(" = ") + .append(componentAccessor(patternOrdinal, slot, "Explicit", + patternShape.componentForSlot(slot))) + .append(";\n"); + source.append(" return true;\n"); + } + source.append(" }\n\n"); + source.append(" private boolean nextDistinctSlot").append(suffix).append("() {\n"); + if (includeInferred) { + source.append(" if (!currentAvailable").append(suffix).append(") {\n"); + source.append(" return false;\n"); + source.append(" }\n"); + source.append(" long previous = current").append(suffix).append(";\n"); + source.append(" while (available") + .append(slotSuffix(patternOrdinal, slot)) + .append("Explicit && ") + .append(componentAccessor(patternOrdinal, slot, "Explicit", + patternShape.componentForSlot(slot))) + .append(" == previous) {\n"); + source.append(" advanceCursor").append(slotSuffix(patternOrdinal, slot)).append("Explicit();\n"); + source.append(" }\n"); + source.append(" while (available") + .append(slotSuffix(patternOrdinal, slot)) + .append("Inferred && ") + .append(componentAccessor(patternOrdinal, slot, "Inferred", + patternShape.componentForSlot(slot))) + .append(" == previous) {\n"); + source.append(" advanceCursor").append(slotSuffix(patternOrdinal, slot)).append("Inferred();\n"); + source.append(" }\n"); + appendMergeDistinctCurrent(source, patternOrdinal, slot); + } else { + source.append(" if (!currentAvailable").append(suffix).append(") {\n"); + source.append(" return false;\n"); + source.append(" }\n"); + source.append(" long previous = current").append(suffix).append(";\n"); + source.append(" while (advanceCursor") + .append(slotSuffix(patternOrdinal, slot)) + .append("Explicit()) {\n"); + source.append(" long candidate = ") + .append(componentAccessor(patternOrdinal, slot, "Explicit", + patternShape.componentForSlot(slot))) + .append(";\n"); + source.append(" if (candidate != previous) {\n"); + source.append(" current").append(suffix).append(" = candidate;\n"); + source.append(" return true;\n"); + source.append(" }\n"); + source.append(" }\n"); + source.append(" currentAvailable").append(suffix).append(" = false;\n"); + source.append(" return false;\n"); + } + source.append(" }\n\n"); + source.append(" private long currentValue").append(suffix).append("() {\n"); + source.append(" return current").append(suffix).append(";\n"); + source.append(" }\n\n"); + } + + private void appendMergeDistinctCurrent(StringBuilder source, int patternOrdinal, int slot) { + String suffix = patternOrdinal + "_" + slot; + String cursorSuffix = slotSuffix(patternOrdinal, slot); + int component = shape.pattern(patternOrdinal).componentForSlot(slot); + source.append(" if (!available") + .append(cursorSuffix) + .append("Explicit && !available") + .append(cursorSuffix) + .append("Inferred) {\n"); + source.append(" currentAvailable").append(suffix).append(" = false;\n"); + source.append(" return false;\n"); + source.append(" }\n"); + source.append(" if (!available").append(cursorSuffix).append("Inferred) {\n"); + source.append(" current") + .append(suffix) + .append(" = ") + .append(componentAccessor(patternOrdinal, slot, "Explicit", component)) + .append(";\n"); + source.append(" currentAvailable").append(suffix).append(" = true;\n"); + source.append(" return true;\n"); + source.append(" }\n"); + source.append(" if (!available").append(cursorSuffix).append("Explicit) {\n"); + source.append(" current") + .append(suffix) + .append(" = ") + .append(componentAccessor(patternOrdinal, slot, "Inferred", component)) + .append(";\n"); + source.append(" currentAvailable").append(suffix).append(" = true;\n"); + source.append(" return true;\n"); + source.append(" }\n"); + source.append(" long explicitValue = ") + .append(componentAccessor(patternOrdinal, slot, "Explicit", component)) + .append(";\n"); + source.append(" long inferredValue = ") + .append(componentAccessor(patternOrdinal, slot, "Inferred", component)) + .append(";\n"); + source.append(" current") + .append(suffix) + .append(" = explicitValue <= inferredValue ? explicitValue : inferredValue;\n"); + source.append(" currentAvailable").append(suffix).append(" = true;\n"); + source.append(" return true;\n"); + } + + private void appendWitnessMethods(StringBuilder source, int patternOrdinal, + LmdbLftjExecutionShape.PatternShape patternShape) { + String suffix = slotSuffix(patternOrdinal, -1); + source.append(" private long countMatchesPattern").append(patternOrdinal).append("() {\n"); + source.append(" metrics().recordWitnessScan();\n"); + appendWitnessSeek(source, patternOrdinal, patternShape, "0L"); + source.append(" long count = 0L;\n"); + if (includeInferred) { + source.append(" while (available") + .append(suffix) + .append("Explicit || available") + .append(suffix) + .append("Inferred) {\n"); + appendMergedWitnessRowSelection(source, patternOrdinal, patternShape, false); + source.append(" }\n"); + } else { + source.append(" while (available").append(suffix).append("Explicit) {\n"); + source.append(" count++;\n"); + source.append(" advanceWitness").append(patternOrdinal).append("Explicit();\n"); + source.append(" }\n"); + } + source.append(" return count;\n"); + source.append(" }\n\n"); + appendAdvanceWitnessMethod(source, patternOrdinal, "Explicit"); + if (includeInferred) { + appendAdvanceWitnessMethod(source, patternOrdinal, "Inferred"); + } + } + + private void appendWitnessSeek(StringBuilder source, int patternOrdinal, + LmdbLftjExecutionShape.PatternShape patternShape, String targetExpr) { + appendSeekCursorInvocation(source, patternOrdinal, patternShape, -1, "Explicit", targetExpr); + if (includeInferred) { + appendSeekCursorInvocation(source, patternOrdinal, patternShape, -1, "Inferred", targetExpr); + } + } + + private void appendMergedWitnessRowSelection(StringBuilder source, int patternOrdinal, + LmdbLftjExecutionShape.PatternShape patternShape, boolean buildRelation) { + String suffix = slotSuffix(patternOrdinal, -1); + source.append(" if (!available") + .append(suffix) + .append("Inferred || (available") + .append(suffix) + .append("Explicit && compareWitnessRows") + .append(patternOrdinal) + .append("() <= 0)) {\n"); + source.append(" boolean duplicate = available") + .append(suffix) + .append("Inferred && compareWitnessRows") + .append(patternOrdinal) + .append("() == 0;\n"); + if (buildRelation) { + source.append(" builder.add(") + .append(componentAccessor(patternOrdinal, -1, "Explicit", + patternShape.derivedSourceComponent())) + .append(", ") + .append(componentAccessor(patternOrdinal, -1, "Explicit", + patternShape.derivedTargetComponent())) + .append(");\n"); + } else { + source.append(" count++;\n"); + } + source.append(" advanceWitness").append(patternOrdinal).append("Explicit();\n"); + source.append(" if (duplicate) {\n"); + source.append(" advanceWitness").append(patternOrdinal).append("Inferred();\n"); + source.append(" }\n"); + source.append(" } else {\n"); + if (buildRelation) { + source.append(" builder.add(") + .append(componentAccessor(patternOrdinal, -1, "Inferred", + patternShape.derivedSourceComponent())) + .append(", ") + .append(componentAccessor(patternOrdinal, -1, "Inferred", + patternShape.derivedTargetComponent())) + .append(");\n"); + } else { + source.append(" count++;\n"); + } + source.append(" advanceWitness").append(patternOrdinal).append("Inferred();\n"); + source.append(" }\n"); + } + + private void appendCompareWitnessMethod(StringBuilder source, int patternOrdinal) { + source.append(" private int compareWitnessRows").append(patternOrdinal).append("() {\n"); + appendCompareRows(source, patternOrdinal, -1); + source.append(" }\n\n"); + } + + private void appendCompareRows(StringBuilder source, int patternOrdinal, int slot) { + LmdbLftjExecutionShape.PatternShape patternShape = shape.pattern(patternOrdinal); + for (int i = 0; i < 4; i++) { + int component = patternShape.indexComponent(i); + source.append(" int compare") + .append(i) + .append(" = Long.compare(") + .append(componentAccessor(patternOrdinal, slot, "Explicit", component)) + .append(", ") + .append(componentAccessor(patternOrdinal, slot, "Inferred", component)) + .append(");\n"); + source.append(" if (compare").append(i).append(" != 0) {\n"); + source.append(" return compare").append(i).append(";\n"); + source.append(" }\n"); + } + source.append(" return 0;\n"); + } + + private void appendAdvanceWitnessMethod(StringBuilder source, int patternOrdinal, String kind) { + String suffix = slotSuffix(patternOrdinal, -1); + source.append(" private boolean advanceWitness").append(patternOrdinal).append(kind).append("() {\n"); + source.append(" return advanceCursor").append(suffix).append(kind).append("();\n"); + source.append(" }\n\n"); + } + + private void appendSeekCursorInvocation(StringBuilder source, int patternOrdinal, + LmdbLftjExecutionShape.PatternShape patternShape, int slot, String kind, String targetExpr) { + String suffix = slotSuffix(patternOrdinal, slot); + int keyFieldIndex = slot >= 0 ? patternShape.keyFieldIndexForSlot(slot) + : patternShape.visibleSlots().length == 0 + ? 0 + : patternShape.indexFields().length - (patternShape.hasHiddenTerms() ? 1 : 0); + if (slot < 0) { + keyFieldIndex = patternShape.hasHiddenTerms() ? patternShape.indexFields().length - 1 + : patternShape.indexFields().length; + } + source.append(" lower").append(suffix).append(kind).append(".clear();\n"); + for (int i = 0; i < patternShape.indexFields().length; i++) { + int component = patternShape.indexComponent(i); + if (slot >= 0 && i == keyFieldIndex) { + source.append(" Varint.writeUnsigned(lower") + .append(suffix) + .append(kind) + .append(", ") + .append(targetExpr) + .append(");\n"); + break; + } + if (slot < 0 && i == keyFieldIndex) { + break; + } + source.append(" Varint.writeUnsigned(lower") + .append(suffix) + .append(kind) + .append(", ") + .append(componentValueExpression(patternOrdinal, patternShape, component)) + .append(");\n"); + } + source.append(" key") + .append(suffix) + .append(kind) + .append(".mv_data(lower") + .append(suffix) + .append(kind) + .append(".flip());\n"); + source.append(" if (mdb_cursor_get(cursor") + .append(suffix) + .append(kind) + .append(", key") + .append(suffix) + .append(kind) + .append(", data") + .append(suffix) + .append(kind) + .append(", MDB_SET_RANGE) != MDB_SUCCESS) {\n"); + source.append(" available").append(suffix).append(kind).append(" = false;\n"); + if (slot >= 0) { + source.append(" return false;\n"); + } + source.append(" } else {\n"); + source.append(" decodeRow").append(suffix).append(kind).append("();\n"); + source.append(" available") + .append(suffix) + .append(kind) + .append(" = withinUpperBound") + .append(suffix) + .append(kind) + .append("();\n"); + source.append(" }\n"); + if (slot >= 0) { + source.append(" return available").append(suffix).append(kind).append(";\n"); + } + } + + private void appendCloseResources(StringBuilder source) { + source.append(" @Override\n"); + source.append(" protected void closeResources() {\n"); + for (int patternOrdinal = 0; patternOrdinal < shape.patternCount(); patternOrdinal++) { + LmdbLftjExecutionShape.PatternShape patternShape = shape.pattern(patternOrdinal); + if (patternShape.derivedBinaryRelation()) { + appendCloseCursorResources(source, patternOrdinal, -1); + continue; + } + for (int slot : patternShape.visibleSlots()) { + appendCloseCursorResources(source, patternOrdinal, slot); + } + appendCloseCursorResources(source, patternOrdinal, -1); + } + source.append(" }\n\n"); + } + + private void appendCloseCursorResources(StringBuilder source, int patternOrdinal, int slot) { + String suffix = slotSuffix(patternOrdinal, slot); + source.append(" mdb_cursor_close(cursor").append(suffix).append("Explicit);\n"); + source.append(" pool.free(key").append(suffix).append("Explicit);\n"); + source.append(" pool.free(data").append(suffix).append("Explicit);\n"); + source.append(" pool.free(lower").append(suffix).append("Explicit);\n"); + if (includeInferred) { + source.append(" mdb_cursor_close(cursor").append(suffix).append("Inferred);\n"); + source.append(" pool.free(key").append(suffix).append("Inferred);\n"); + source.append(" pool.free(data").append(suffix).append("Inferred);\n"); + source.append(" pool.free(lower").append(suffix).append("Inferred);\n"); + } + } + + private void appendHelpers(StringBuilder source) { + source.append(" private static final long[] EMPTY_FRONTIER_VALUES = new long[0];\n\n"); + if (relationGroups.length > 0) { + source.append(" private static int seekFrontier(long[] values, long target) {\n"); + source.append(" int index = java.util.Arrays.binarySearch(values, target);\n"); + source.append(" return index >= 0 ? index : -index - 1;\n"); + source.append(" }\n\n"); + source.append(" private static int mixFrontierCacheKey(long value) {\n"); + source.append(" return Long.hashCode(value);\n"); + source.append(" }\n\n"); + source.append(" private static int mixCountCacheKey(long sourceValue, long targetValue) {\n"); + source.append(" return 31 * Long.hashCode(sourceValue) + Long.hashCode(targetValue);\n"); + source.append(" }\n\n"); + } + source.append(" private long openCursor(int dbi) {\n"); + source.append(" long readStamp = readLock();\n"); + source.append(" MemoryStack stack = MemoryStack.stackPush();\n"); + source.append(" try {\n"); + source.append(" PointerBuffer pp = stack.mallocPointer(1);\n"); + source.append(" try {\n"); + source.append(" E(mdb_cursor_open(txn, dbi, pp));\n"); + source.append(" } catch (java.io.IOException e) {\n"); + source.append(" throw new SailException(e);\n"); + source.append(" }\n"); + source.append(" return pp.get(0);\n"); + source.append(" } finally {\n"); + source.append(" stack.pop();\n"); + source.append(" txnLockManager.unlockRead(readStamp);\n"); + source.append(" }\n"); + source.append(" }\n\n"); + source.append(" private long readLock() {\n"); + source.append(" try {\n"); + source.append(" return txnLockManager.readLock();\n"); + source.append(" } catch (InterruptedException e) {\n"); + source.append(" Thread.currentThread().interrupt();\n"); + source.append(" throw new SailException(e);\n"); + source.append(" }\n"); + source.append(" }\n\n"); + for (int patternOrdinal = 0; patternOrdinal < shape.patternCount(); patternOrdinal++) { + LmdbLftjExecutionShape.PatternShape patternShape = shape.pattern(patternOrdinal); + if (patternShape.derivedBinaryRelation()) { + appendCursorRowHelpers(source, patternOrdinal, -1, patternShape); + continue; + } + for (int slot : patternShape.visibleSlots()) { + appendCursorRowHelpers(source, patternOrdinal, slot, patternShape); + } + appendCursorRowHelpers(source, patternOrdinal, -1, patternShape); + } + } + + private void appendCursorRowHelpers(StringBuilder source, int patternOrdinal, int slot, + LmdbLftjExecutionShape.PatternShape patternShape) { + String suffix = slotSuffix(patternOrdinal, slot); + appendDecodeMethod(source, patternOrdinal, slot, patternShape, "Explicit"); + appendWithinUpperBoundMethod(source, patternOrdinal, slot, patternShape, "Explicit"); + appendAdvanceCursorMethod(source, patternOrdinal, slot, patternShape, "Explicit"); + if (includeInferred) { + appendDecodeMethod(source, patternOrdinal, slot, patternShape, "Inferred"); + appendWithinUpperBoundMethod(source, patternOrdinal, slot, patternShape, "Inferred"); + appendAdvanceCursorMethod(source, patternOrdinal, slot, patternShape, "Inferred"); + } + } + + private void appendDecodeMethod(StringBuilder source, int patternOrdinal, int slot, + LmdbLftjExecutionShape.PatternShape patternShape, String kind) { + String suffix = slotSuffix(patternOrdinal, slot); + source.append(" private void decodeRow").append(suffix).append(kind).append("() {\n"); + source.append(" ByteBuffer key = key").append(suffix).append(kind).append(".mv_data().duplicate();\n"); + for (int i = 0; i < patternShape.indexFields().length; i++) { + String fieldName = componentFieldName(patternShape.indexComponent(i)); + source.append(" ") + .append(fieldName) + .append(suffix) + .append(kind) + .append(" = Varint.readUnsigned(key);\n"); + } + source.append(" }\n\n"); + } + + private void appendWithinUpperBoundMethod(StringBuilder source, int patternOrdinal, int slot, + LmdbLftjExecutionShape.PatternShape patternShape, String kind) { + String suffix = slotSuffix(patternOrdinal, slot); + source.append(" private boolean withinUpperBound").append(suffix).append(kind).append("() {\n"); + for (int i = 0; i < patternShape.indexFields().length; i++) { + int component = patternShape.indexComponent(i); + String valueExpr = componentAccessor(patternOrdinal, slot, kind, component); + String upperExpr = upperBoundExpression(patternOrdinal, patternShape, slot, component); + source.append(" int compare") + .append(i) + .append(" = Long.compare(") + .append(valueExpr) + .append(", ") + .append(upperExpr) + .append(");\n"); + source.append(" if (compare").append(i).append(" != 0) {\n"); + source.append(" return compare").append(i).append(" < 0;\n"); + source.append(" }\n"); + } + source.append(" return true;\n"); + source.append(" }\n\n"); + } + + private void appendAdvanceCursorMethod(StringBuilder source, int patternOrdinal, int slot, + LmdbLftjExecutionShape.PatternShape patternShape, String kind) { + String suffix = slotSuffix(patternOrdinal, slot); + source.append(" private boolean advanceCursor").append(suffix).append(kind).append("() {\n"); + source.append(" if (!available").append(suffix).append(kind).append(") {\n"); + source.append(" return false;\n"); + source.append(" }\n"); + source.append(" long readStamp = readLock();\n"); + source.append(" try {\n"); + source.append(" if (mdb_cursor_get(cursor") + .append(suffix) + .append(kind) + .append(", key") + .append(suffix) + .append(kind) + .append(", data") + .append(suffix) + .append(kind) + .append(", MDB_NEXT) != MDB_SUCCESS) {\n"); + source.append(" available").append(suffix).append(kind).append(" = false;\n"); + source.append(" return false;\n"); + source.append(" }\n"); + source.append(" decodeRow").append(suffix).append(kind).append("();\n"); + source.append(" available") + .append(suffix) + .append(kind) + .append(" = withinUpperBound") + .append(suffix) + .append(kind) + .append("();\n"); + source.append(" return available").append(suffix).append(kind).append(";\n"); + source.append(" } finally {\n"); + source.append(" txnLockManager.unlockRead(readStamp);\n"); + source.append(" }\n"); + source.append(" }\n\n"); + } + + private String slotSuffix(int patternOrdinal, int slot) { + return "P" + patternOrdinal + (slot >= 0 ? "S" + slot : "W"); + } + + private String componentAccessor(int patternOrdinal, int slot, String kind, int component) { + return componentFieldName(component) + slotSuffix(patternOrdinal, slot) + kind; + } + + private String componentFieldName(int component) { + switch (component) { + case TripleStore.SUBJ_IDX: + return "subj"; + case TripleStore.PRED_IDX: + return "pred"; + case TripleStore.OBJ_IDX: + return "obj"; + case TripleStore.CONTEXT_IDX: + return "ctx"; + default: + throw new IllegalArgumentException("Unknown LMDB component: " + component); + } + } + + private String componentValueExpression(int patternOrdinal, LmdbLftjExecutionShape.PatternShape patternShape, + int component) { + if (patternShape.isConstantComponent(component)) { + switch (component) { + case TripleStore.SUBJ_IDX: + return "state().fixedIdForComponent(" + patternOrdinal + ", " + TripleStore.SUBJ_IDX + ")"; + case TripleStore.PRED_IDX: + return "state().fixedIdForComponent(" + patternOrdinal + ", " + TripleStore.PRED_IDX + ")"; + case TripleStore.OBJ_IDX: + return "state().fixedIdForComponent(" + patternOrdinal + ", " + TripleStore.OBJ_IDX + ")"; + case TripleStore.CONTEXT_IDX: + return "state().fixedIdForComponent(" + patternOrdinal + ", " + TripleStore.CONTEXT_IDX + ")"; + default: + throw new IllegalArgumentException("Unknown LMDB component: " + component); + } + } + int slot = patternShape.slotForComponent(component); + if (slot >= 0) { + return "state().value(" + slot + ")"; + } + return "0L"; + } + + private String upperBoundExpression(int patternOrdinal, LmdbLftjExecutionShape.PatternShape patternShape, + int slot, + int component) { + if (slot >= 0 && component == patternShape.componentForSlot(slot)) { + return "Long.MAX_VALUE"; + } + if (patternShape.isConstantComponent(component)) { + return componentValueExpression(patternOrdinal, patternShape, component); + } + int boundSlot = patternShape.slotForComponent(component); + if (boundSlot >= 0) { + return "state().isBound(" + boundSlot + ") ? state().value(" + boundSlot + ") : Long.MAX_VALUE"; + } + return "Long.MAX_VALUE"; + } + + private RelationGroup[] collectRelationGroups() { + Map groups = new HashMap<>(); + List ordered = new ArrayList<>(); + for (int patternOrdinal = 0; patternOrdinal < shape.patternCount(); patternOrdinal++) { + LmdbLftjExecutionShape.PatternShape patternShape = shape.pattern(patternOrdinal); + if (!patternShape.derivedBinaryRelation()) { + continue; + } + RelationGroupKey key = new RelationGroupKey( + patternShape.indexName(), + patternShape.derivedSourceComponent(), + patternShape.derivedTargetComponent(), + plan.patternPlans().get(patternOrdinal).predicateTerm().constantValue()); + RelationGroup group = groups.get(key); + if (group == null) { + group = new RelationGroup(ordered.size(), patternOrdinal); + groups.put(key, group); + ordered.add(group); + } + relationGroupByPattern[patternOrdinal] = group.groupId; + } + return ordered.toArray(new RelationGroup[0]); + } + + private static final class RelationGroup { + private final int groupId; + private final int representativePatternOrdinal; + + private RelationGroup(int groupId, int representativePatternOrdinal) { + this.groupId = groupId; + this.representativePatternOrdinal = representativePatternOrdinal; + } + } + + private static final class RelationGroupKey { + private final String indexName; + private final int sourceComponent; + private final int targetComponent; + private final Object predicateValue; + + private RelationGroupKey(String indexName, int sourceComponent, int targetComponent, + Object predicateValue) { + this.indexName = indexName; + this.sourceComponent = sourceComponent; + this.targetComponent = targetComponent; + this.predicateValue = predicateValue; + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof RelationGroupKey)) { + return false; + } + RelationGroupKey o = (RelationGroupKey) other; + if (sourceComponent != o.sourceComponent || targetComponent != o.targetComponent + || !indexName.equals(o.indexName)) { + return false; + } + return predicateValue == null ? o.predicateValue == null : predicateValue.equals(o.predicateValue); + } + + @Override + public int hashCode() { + int result = indexName.hashCode(); + result = 31 * result + sourceComponent; + result = 31 * result + targetComponent; + result = 31 * result + (predicateValue == null ? 0 : predicateValue.hashCode()); + return result; + } + } + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java index d0777bd52a..d441412261 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java @@ -24,43 +24,43 @@ public final class LmdbLftjMetrics { private long relationHits; private long relationUses; - void recordCandidateScan() { + public void recordCandidateScan() { candidateScans++; } - void recordWitnessScan() { + public void recordWitnessScan() { witnessScans++; } - void recordEmitted(long count) { + public void recordEmitted(long count) { emittedBindings += count; } - void recordFrontierLoad() { + public void recordFrontierLoad() { frontierLoads++; } - void recordFrontierHit() { + public void recordFrontierHit() { frontierHits++; } - void recordCountLoad() { + public void recordCountLoad() { countLoads++; } - void recordCountHit() { + public void recordCountHit() { countHits++; } - void recordRelationLoad() { + public void recordRelationLoad() { relationLoads++; } - void recordRelationHit() { + public void recordRelationHit() { relationHits++; } - void recordRelationUse() { + public void recordRelationUse() { relationUses++; } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTieredCodegenCompiler.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTieredCodegenCompiler.java new file mode 100644 index 0000000000..d27f67e79a --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTieredCodegenCompiler.java @@ -0,0 +1,29 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +final class LmdbLftjTieredCodegenCompiler extends LmdbLftjCodegenCompiler { + + static final LmdbLftjTieredCodegenCompiler INSTANCE = new LmdbLftjTieredCodegenCompiler(); + + private final LmdbLftjFullCodegenCompiler full = LmdbLftjFullCodegenCompiler.INSTANCE; + + @Override + String cacheKey(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred) { + return full.cacheKey(plan, shape, includeInferred); + } + + @Override + LmdbCompiledLftjFactory compile(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred) { + return full.compile(plan, shape, includeInferred); + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java index a41df58b01..652cdc6fe1 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java @@ -183,7 +183,7 @@ private boolean isDefaultDataset(Dataset dataset) { && dataset.getDefaultInsertGraph() == null); } - private LmdbQueryAccess createQueryAccess(boolean includeInferred) { + protected LmdbQueryAccess createQueryAccess(boolean includeInferred) { LmdbSailStore backingStore = lmdbStore.getBackingStore(); TripleStore tripleStore = backingStore.getTripleStore(); ValueStore valueStore = backingStore.getValueStore(); @@ -283,9 +283,18 @@ public void cacheCompiledPlanSuccess(String executionKey, LmdbCompiledLftjFactor public void cacheCompiledPlanFailure(String executionKey, String message) { lmdbStore.codegenCache().putFailure(executionKey, message); } + + @Override + public LmdbLftjCodegenCompiler codegenCompiler() { + return LmdbStoreConnection.this.codegenCompiler(); + } }; } + protected LmdbLftjCodegenCompiler codegenCompiler() { + return LmdbLftjFullCodegenCompiler.INSTANCE; + } + @Override protected CloseableIteration getStatementsInternal(Resource subj, IRI pred, Value obj, diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUtil.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUtil.java index 25459309f3..fd2b6d22e7 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUtil.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUtil.java @@ -44,7 +44,7 @@ /** * Utility class for working with LMDB. */ -final class LmdbUtil { +public final class LmdbUtil { private static final Logger logger = LoggerFactory.getLogger(LmdbUtil.class); @@ -62,7 +62,7 @@ final class LmdbUtil { private LmdbUtil() { } - static int E(int rc) throws IOException { + public static int E(int rc) throws IOException { if (rc != MDB_SUCCESS && rc != MDB_NOTFOUND && rc != MDB_KEYEXIST) { IOException ioException = new IOException(mdb_strerror(rc)); logger.info("Possible LMDB error: {}", mdb_strerror(rc), ioException); diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Pool.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Pool.java index ebb9952582..277151b16f 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Pool.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Pool.java @@ -18,7 +18,7 @@ /** * A simple pool for {@link MDBVal}, {@link ByteBuffer} and {@link Statistics} instances. */ -class Pool { +public class Pool { // thread-local pool instance private static final ThreadLocal threadlocal = ThreadLocal.withInitial(Pool::new); @@ -29,14 +29,14 @@ class Pool { private int keyPoolIndex = -1; private int statisticsPoolIndex = -1; - final MDBVal getVal() { + public final MDBVal getVal() { if (valPoolIndex >= 0) { return valPool[valPoolIndex--]; } return MDBVal.malloc(); } - final ByteBuffer getKeyBuffer() { + public final ByteBuffer getKeyBuffer() { if (keyPoolIndex >= 0) { ByteBuffer bb = keyPool[keyPoolIndex--]; bb.clear(); @@ -45,14 +45,14 @@ final ByteBuffer getKeyBuffer() { return MemoryUtil.memAlloc(TripleStore.MAX_KEY_LENGTH); } - final Statistics getStatistics() { + public final Statistics getStatistics() { if (statisticsPoolIndex >= 0) { return statisticsPool[statisticsPoolIndex--]; } return new Statistics(); } - final void free(MDBVal val) { + public final void free(MDBVal val) { if (valPoolIndex < valPool.length - 1) { valPool[++valPoolIndex] = val; } else { @@ -60,7 +60,7 @@ final void free(MDBVal val) { } } - final void free(ByteBuffer bb) { + public final void free(ByteBuffer bb) { if (keyPoolIndex < keyPool.length - 1) { keyPool[++keyPoolIndex] = bb; } else { @@ -68,13 +68,13 @@ final void free(ByteBuffer bb) { } } - final void free(Statistics statistics) { + public final void free(Statistics statistics) { if (statisticsPoolIndex < statisticsPool.length - 1) { statisticsPool[++statisticsPoolIndex] = statistics; } } - final void close() { + public final void close() { while (valPoolIndex >= 0) { valPool[valPoolIndex--].close(); } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java index 4c6ec5918c..8b003ba8ff 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java @@ -104,7 +104,7 @@ * an actual RDF value. */ @SuppressWarnings("deprecation") -class TripleStore implements Closeable { +public class TripleStore implements Closeable { static ConcurrentHashMap stats = new ConcurrentHashMap<>(); static long hit = 0; @@ -538,6 +538,10 @@ LmdbTrieKeyCursor openTrieCursor(Txn txn, String indexName, boolean explicit) { return new LmdbTrieDbCursor(getIndex(indexName), explicit, txn); } + public TripleIndex tripleIndex(String indexName) { + return getIndex(indexName); + } + boolean hasTriples(boolean explicit) throws IOException { TripleIndex mainIndex = indexes.get(0); return txnManager.doWith((stack, txn) -> { @@ -1227,7 +1231,7 @@ private void storeProperties(File propFile) throws IOException { } } - class TripleIndex { + public class TripleIndex { private final char[] fieldSeq; private final IndexKeyWriters.KeyWriter keyWriter; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TxnManager.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TxnManager.java index 7b5ec043da..8df8437ee4 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TxnManager.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TxnManager.java @@ -32,7 +32,7 @@ /** * Manager for LMDB transactions. */ -class TxnManager { +public class TxnManager { private final Mode mode; private final IdentityHashMap active = new IdentityHashMap<>(); @@ -160,7 +160,7 @@ enum Mode { NONE } - class Txn implements Closeable, AutoCloseable { + public class Txn implements Closeable, AutoCloseable { private final long txn; private long version; @@ -169,11 +169,11 @@ class Txn implements Closeable, AutoCloseable { this.txn = txn; } - long get() { + public long get() { return txn; } - StampedLongAdderLockManager lockManager() { + public StampedLongAdderLockManager lockManager() { return lockManager; } @@ -226,7 +226,7 @@ void setActive(boolean active) throws IOException { } } - long version() { + public long version() { return version; } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbBenchmarkStore.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbBenchmarkStore.java new file mode 100644 index 0000000000..734de3bde3 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbBenchmarkStore.java @@ -0,0 +1,52 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.io.File; + +import org.eclipse.rdf4j.sail.NotifyingSailConnection; +import org.eclipse.rdf4j.sail.SailException; +import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; + +public class LmdbBenchmarkStore extends LmdbStore { + + private final LmdbLftjCodegenCompiler codegenCompiler; + + public LmdbBenchmarkStore(File dataDir, LmdbStoreConfig config, LmdbLftjCodegenCompiler codegenCompiler) { + super(dataDir, config); + this.codegenCompiler = codegenCompiler; + } + + @Override + protected NotifyingSailConnection getConnectionInternal() throws SailException { + return new BenchmarkStoreConnection(this, codegenCompiler); + } + + public static class BenchmarkStoreConnection extends LmdbStoreConnection { + + private final LmdbLftjCodegenCompiler codegenCompiler; + + protected BenchmarkStoreConnection(LmdbStore sail, LmdbLftjCodegenCompiler codegenCompiler) { + super(sail); + this.codegenCompiler = codegenCompiler; + } + + public LmdbQueryAccess benchmarkQueryAccess(boolean includeInferred) { + return createQueryAccess(includeInferred); + } + + @Override + protected LmdbLftjCodegenCompiler codegenCompiler() { + return codegenCompiler != null ? codegenCompiler : super.codegenCompiler(); + } + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBenchmarkMode.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBenchmarkMode.java new file mode 100644 index 0000000000..605ee5348a --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBenchmarkMode.java @@ -0,0 +1,57 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +public final class LmdbLftjBenchmarkMode { + + public static final String INTERPRETED = "interpreted"; + public static final String EXECUTOR_CODEGEN = "executor_codegen"; + public static final String FULL_CODEGEN = "full_codegen"; + + private LmdbLftjBenchmarkMode() { + } + + public static boolean lftjCodegenEnabled(String mode) { + switch (mode) { + case INTERPRETED: + return false; + case EXECUTOR_CODEGEN: + case FULL_CODEGEN: + return true; + default: + throw invalidMode(mode); + } + } + + public static LmdbLftjCodegenCompiler compiler(String mode) { + switch (mode) { + case INTERPRETED: + return null; + case EXECUTOR_CODEGEN: + return LmdbLftjCodegenCompiler.INSTANCE; + case FULL_CODEGEN: + return LmdbLftjFullCodegenCompiler.INSTANCE; + default: + throw invalidMode(mode); + } + } + + public static void validate(String mode) { + compiler(mode); + } + + private static IllegalArgumentException invalidMode(String mode) { + return new IllegalArgumentException( + "Unsupported LMDB LFTJ benchmark mode: " + mode + + " (expected interpreted, executor_codegen, or full_codegen)"); + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java index 2df5f94df8..2c67c51ae9 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java @@ -12,18 +12,34 @@ package org.eclipse.rdf4j.sail.lmdb; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Field; import java.lang.reflect.Method; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.ArrayList; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.commons.io.FileUtils; import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.model.vocabulary.FOAF; import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet; import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; +import org.eclipse.rdf4j.query.explanation.Explanation; import org.eclipse.rdf4j.query.impl.EmptyBindingSet; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; +import org.eclipse.rdf4j.sail.lmdb.benchmark.FoafCliqueQueryBenchmark; +import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; import org.junit.jupiter.api.Test; class LmdbLftjCodegenTest { @@ -42,6 +58,44 @@ void syntheticQueryAccessShouldEnableCodegenByDefault() { .isTrue(); } + @Test + void fullStackCompilerSourceShouldAvoidGenericTrieHelpers() { + LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); + LmdbLftjExecutionShape shape = new LmdbLftjExecutionShape(plan); + String source = LmdbLftjFullCodegenCompiler.INSTANCE.sourceFor(plan, shape, false); + + assertThat(source) + .contains("AbstractLmdbFullStackCompiledLftjIteration") + .contains("mdb_cursor_open") + .doesNotContain("LmdbPrefixFrontierProvider") + .doesNotContain("LmdbTrieKeyCursor") + .doesNotContain("LmdbTrieDbCursor") + .doesNotContain("LmdbLftjCursor") + .doesNotContain("private LmdbCachedFrontier frontier") + .doesNotContain("valueAt(") + .doesNotContain("long[] lowerBound"); + } + + @Test + void fullStackCompilerShouldFingerprintIncludeInferredSeparately() { + LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); + LmdbLftjExecutionShape shape = new LmdbLftjExecutionShape(plan); + + assertThat(LmdbLftjFullCodegenCompiler.INSTANCE.cacheKey(plan, shape, true)) + .isNotEqualTo(LmdbLftjFullCodegenCompiler.INSTANCE.cacheKey(plan, shape, false)); + } + + @Test + void fullStackCompilerShouldShareDerivedRelationGroupsAcrossEquivalentPatterns() { + LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); + LmdbLftjExecutionShape shape = new LmdbLftjExecutionShape(plan); + String source = LmdbLftjFullCodegenCompiler.INSTANCE.sourceFor(plan, shape, false); + + assertThat(countMatches(source, "private LmdbDerivedBinaryRelation relationGroup\\d+;")).isEqualTo(2); + assertThat(countMatches(source, "private boolean relationGroup\\d+Loaded;")).isEqualTo(2); + assertThat(countMatches(source, "private LmdbDerivedBinaryRelation relation\\d+\\(\\)")).isEqualTo(3); + } + @Test void compiledAndInterpretedShouldProduceSameRowsForSyntheticCycle() { LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); @@ -88,6 +142,169 @@ void compiledAndInterpretedShouldMatchForFullyBoundInput() { assertThat(compiled).containsExactlyElementsOf(interpreted); } + @Test + void fullStackCompilerShouldProduceGeneratedIterationOnRealStore() throws Exception { + LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); + LmdbLftjExecutionShape shape = new LmdbLftjExecutionShape(plan); + Files.writeString(Path.of("/tmp/lmdb-full-stack-generated.java"), + LmdbLftjFullCodegenCompiler.INSTANCE.sourceFor(plan, shape, false)); + LmdbCompiledLftjFactory factory = LmdbLftjFullCodegenCompiler.INSTANCE.compile(plan, shape, false); + assertThat(factory.getClass().getName()).contains("GeneratedLmdbFullStackLftjFactory"); + try (FullCodegenFixture fixture = new FullCodegenFixture()) { + QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep( + fixture.connection.benchmarkQueryAccess(false), plan); + + try (CloseableIteration iteration = evaluationStep.evaluate(EmptyBindingSet.getInstance())) { + assertThat(iteration.getClass().getName()) + .contains("GeneratedLmdbFullStackLftjFactory") + .doesNotContain("LmdbLftjExecutor$LmdbLftjIteration"); + assertThat(iteration).hasNext(); + } + + LmdbLftjCodegenCache.CacheEntry cached = fixture.store.codegenCache() + .get(LmdbLftjFullCodegenCompiler.INSTANCE.cacheKey(plan, shape, false)); + assertThat(cached).isNotNull(); + assertThat(cached.compiled()).isTrue(); + } + } + + @Test + void fullStackCompilerShouldCompileSyntheticCycleWithIncludeInferred() { + LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); + LmdbLftjExecutionShape shape = new LmdbLftjExecutionShape(plan); + try { + Files.writeString(Path.of("/tmp/lmdb-full-stack-generated-inferred.java"), + LmdbLftjFullCodegenCompiler.INSTANCE.sourceFor(plan, shape, true)); + } catch (IOException e) { + throw new RuntimeException(e); + } + + assertThat(LmdbLftjFullCodegenCompiler.INSTANCE.compile(plan, shape, true).getClass().getName()) + .contains("GeneratedLmdbFullStackLftjFactory"); + } + + @Test + void fullCodegenBenchmarkCycleStateShouldUseGeneratedIteration() throws Exception { + LmdbLftjExecutorBenchmark.CycleState state = new LmdbLftjExecutorBenchmark.CycleState(); + state.derivedRelationEnabled = true; + state.benchmarkMode = LmdbLftjBenchmarkMode.FULL_CODEGEN; + state.setup(); + try { + QueryEvaluationStep evaluationStep = (QueryEvaluationStep) readField(state, "evaluationStep"); + try (CloseableIteration iteration = evaluationStep.evaluate(EmptyBindingSet.getInstance())) { + assertThat(iteration.getClass().getName()) + .contains("GeneratedLmdbFullStackLftjFactory") + .doesNotContain("LmdbLftjExecutor$LmdbLftjIteration"); + assertThat(iteration).hasNext(); + } + } finally { + state.tearDown(); + } + } + + @Test + void fullCodegenFoafBenchmarkShouldCompileGeneratedFactory() throws Exception { + assertFoafBenchmarkQueryCompilesGeneratedFactory(3); + } + + @Test + void fullCodegenFoafBenchmarkCycle4ShouldCompileGeneratedFactory() throws Exception { + assertFoafBenchmarkQueryCompilesGeneratedFactory(4); + } + + @Test + void fullCodegenFoafBenchmarkCycle5ShouldCompileGeneratedFactory() throws Exception { + assertFoafBenchmarkQueryCompilesGeneratedFactory(5); + } + + @Test + void fullCodegenFoafBenchmarkSequentialQueriesShouldKeepUsingGeneratedFactories() throws Exception { + FoafCliqueQueryBenchmark benchmark = new FoafCliqueQueryBenchmark(); + benchmark.peopleCount = 300; + benchmark.cliquePercentage = 30; + benchmark.minCliqueSize = 3; + benchmark.maxCliqueSize = 6; + benchmark.randomKnowsEdges = 900; + benchmark.seed = 12345L; + benchmark.benchmarkMode = LmdbLftjBenchmarkMode.FULL_CODEGEN; + benchmark.setup(); + try { + assertThat(benchmark.cycle3()).isPositive(); + assertThat(benchmark.cycle4()).isPositive(); + assertThat(benchmark.cycle5()).isPositive(); + SailRepository repository = (SailRepository) readField(benchmark, "repository"); + LmdbBenchmarkStore store = (LmdbBenchmarkStore) repository.getSail(); + assertThat(compiledFactoryClassNames(store.codegenCache())) + .hasSizeGreaterThanOrEqualTo(3) + .allSatisfy(name -> assertThat(name).contains("GeneratedLmdbFullStackLftjFactory")); + assertThat(cacheEntryDescriptions(store.codegenCache())) + .allSatisfy(description -> assertThat(description).doesNotContain("Unable to compile")); + } finally { + benchmark.tearDown(); + } + } + + @Test + void defaultStoreConnectionShouldUseFullCodegenCompiler() throws Exception { + try (DefaultCodegenFixture fixture = new DefaultCodegenFixture()) { + assertThat(fixture.connection.benchmarkQueryAccess(false).codegenCompiler()) + .isSameAs(LmdbLftjFullCodegenCompiler.INSTANCE); + } + } + + @Test + void compileFailureShouldNotSilentlyFallbackToInterpretedIteration() { + LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); + CachingQueryAccess queryAccess = new CachingQueryAccess(new FailingCompiler()); + QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess, plan); + + assertThatThrownBy(() -> drain(evaluationStep, EmptyBindingSet.getInstance())) + .isInstanceOf(RuntimeException.class) + .hasMessageContaining("LMDB LFTJ execution failed") + .satisfies(throwable -> assertThat(rootCauseOf(throwable)).hasMessageContaining("forced failure")); + } + + private void assertFoafBenchmarkQueryCompilesGeneratedFactory(int cycleSize) throws Exception { + FoafCliqueQueryBenchmark benchmark = new FoafCliqueQueryBenchmark(); + benchmark.peopleCount = 300; + benchmark.cliquePercentage = 30; + benchmark.minCliqueSize = 3; + benchmark.maxCliqueSize = 6; + benchmark.randomKnowsEdges = 900; + benchmark.seed = 12345L; + benchmark.benchmarkMode = LmdbLftjBenchmarkMode.FULL_CODEGEN; + benchmark.setup(); + try { + SailRepository repository = (SailRepository) readField(benchmark, "repository"); + try (SailRepositoryConnection connection = repository.getConnection()) { + assertThat(connection.prepareTupleQuery(foafCycleQuery(cycleSize)) + .explain(Explanation.Level.Optimized) + .toString()) + .contains("LmdbLftjTupleExpr"); + } + assertThat(executeFoafBenchmarkCycle(benchmark, cycleSize)).isPositive(); + LmdbBenchmarkStore store = (LmdbBenchmarkStore) repository.getSail(); + assertThat(compiledFactoryClassNames(store.codegenCache())) + .withFailMessage("cache entries: %s", cacheEntryDescriptions(store.codegenCache())) + .anySatisfy(name -> assertThat(name).contains("GeneratedLmdbFullStackLftjFactory")); + } finally { + benchmark.tearDown(); + } + } + + private long executeFoafBenchmarkCycle(FoafCliqueQueryBenchmark benchmark, int cycleSize) { + switch (cycleSize) { + case 3: + return benchmark.cycle3(); + case 4: + return benchmark.cycle4(); + case 5: + return benchmark.cycle5(); + default: + throw new IllegalArgumentException("Unsupported cycle size: " + cycleSize); + } + } + @Test void codegenCacheShouldCompileOncePerExecutionKey() { LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); @@ -113,10 +330,15 @@ void codegenCacheShouldReuseNegativeResultAfterCompileFailure() { CachingQueryAccess queryAccess = new CachingQueryAccess(compiler); QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess, plan); - List first = drain(evaluationStep, EmptyBindingSet.getInstance()); - List second = drain(evaluationStep, EmptyBindingSet.getInstance()); + assertThatThrownBy(() -> drain(evaluationStep, EmptyBindingSet.getInstance())) + .isInstanceOf(RuntimeException.class) + .hasMessageContaining("LMDB LFTJ execution failed") + .satisfies(throwable -> assertThat(rootCauseOf(throwable)).hasMessageContaining("forced failure")); + assertThatThrownBy(() -> drain(evaluationStep, EmptyBindingSet.getInstance())) + .isInstanceOf(RuntimeException.class) + .hasMessageContaining("LMDB LFTJ execution failed") + .satisfies(throwable -> assertThat(rootCauseOf(throwable)).hasMessageContaining("forced failure")); - assertThat(second).containsExactlyElementsOf(first); assertThat(compiler.compileCalls).isEqualTo(1); assertThat(queryAccess.cachedEntry(plan.executionKey())).isNotNull(); assertThat(queryAccess.cachedEntry(plan.executionKey()).compiled()).isFalse(); @@ -132,6 +354,14 @@ private boolean invokeBooleanGetter(Object target, String getterName) { } } + private Throwable rootCauseOf(Throwable throwable) { + Throwable current = throwable; + while (current.getCause() != null) { + current = current.getCause(); + } + return current; + } + private String invokeStringGetter(Object target, String getterName) { try { Method getter = target.getClass().getMethod(getterName); @@ -157,6 +387,79 @@ private String render(BindingSet row) { + row.getValue("c").stringValue(); } + private int countMatches(String source, String regex) { + int count = 0; + Matcher matcher = Pattern.compile(regex).matcher(source); + while (matcher.find()) { + count++; + } + return count; + } + + @SuppressWarnings("unchecked") + private List compiledFactoryClassNames(LmdbLftjCodegenCache cache) throws Exception { + Field entriesField = LmdbLftjCodegenCache.class.getDeclaredField("entries"); + entriesField.setAccessible(true); + Map entries = (LinkedHashMap) entriesField + .get(cache); + List names = new ArrayList<>(); + for (LmdbLftjCodegenCache.CacheEntry entry : entries.values()) { + if (entry.compiled()) { + names.add(entry.factory().getClass().getName()); + } + } + return names; + } + + @SuppressWarnings("unchecked") + private List cacheEntryDescriptions(LmdbLftjCodegenCache cache) throws Exception { + Field entriesField = LmdbLftjCodegenCache.class.getDeclaredField("entries"); + entriesField.setAccessible(true); + Map entries = (LinkedHashMap) entriesField + .get(cache); + List descriptions = new ArrayList<>(); + for (Map.Entry entry : entries.entrySet()) { + LmdbLftjCodegenCache.CacheEntry cacheEntry = entry.getValue(); + descriptions.add(entry.getKey() + "=" + + (cacheEntry.compiled() ? cacheEntry.factory().getClass().getName() + : cacheEntry.failureMessage())); + } + return descriptions; + } + + private String foafCycleQuery(int size) { + StringBuilder builder = new StringBuilder(); + builder.append("PREFIX foaf: \n"); + builder.append("SELECT * WHERE {\n"); + for (int i = 0; i < size; i++) { + builder.append(" ?") + .append((char) ('a' + i)) + .append(" foaf:knows ?") + .append((char) ('a' + ((i + 1) % size))) + .append(" .\n"); + } + builder.append(" FILTER ("); + boolean first = true; + for (int i = 0; i < size; i++) { + for (int j = i + 1; j < size; j++) { + if (!first) { + builder.append(" && "); + } + builder.append("?").append((char) ('a' + i)).append(" != ?").append((char) ('a' + j)); + first = false; + } + } + builder.append(")\n"); + builder.append("}\n"); + return builder.toString(); + } + + private Object readField(Object target, String name) throws Exception { + Field field = target.getClass().getDeclaredField(name); + field.setAccessible(true); + return field.get(target); + } + private static final class InterpretedQueryAccess extends LmdbLftjSyntheticScenario.TestQueryAccess { private InterpretedQueryAccess() { @@ -244,4 +547,83 @@ LmdbCompiledLftjFactory compile(LmdbLftjPlan plan, LmdbLftjExecutionShape shape) throw new IllegalArgumentException("forced failure"); } } + + private static final class FullCodegenFixture implements AutoCloseable { + private final SailRepository repository; + private final LmdbBenchmarkStore store; + private final LmdbBenchmarkStore.BenchmarkStoreConnection connection; + private final File dataDir; + + private FullCodegenFixture() throws IOException { + dataDir = Files.createTempDirectory("rdf4j-lmdb-full-codegen-test").toFile(); + LmdbStoreConfig config = new LmdbStoreConfig("psoc,posc"); + config.setLftjEnabled(true); + config.setLftjCodegenEnabled(true); + config.setForceSync(false); + config.setValueDBSize(64L * 1024 * 1024); + config.setTripleDBSize(config.getValueDBSize()); + store = new LmdbBenchmarkStore(dataDir, config, LmdbLftjFullCodegenCompiler.INSTANCE); + repository = new SailRepository(store); + repository.init(); + populate(repository); + connection = (LmdbBenchmarkStore.BenchmarkStoreConnection) store.getConnection(); + } + + @Override + public void close() throws Exception { + try { + connection.close(); + } finally { + repository.shutDown(); + FileUtils.deleteDirectory(dataDir); + } + } + + private static void populate(SailRepository repository) { + try (SailRepositoryConnection connection = repository.getConnection()) { + for (long subject = 1; subject <= 4; subject++) { + for (long object = 1; object <= 4; object++) { + if (subject != object) { + connection.add(person(subject), FOAF.KNOWS, person(object)); + } + } + } + } + } + + private static org.eclipse.rdf4j.model.IRI person(long id) { + return LmdbLftjSyntheticScenario.VF.createIRI("urn:person:" + id); + } + } + + private static final class DefaultCodegenFixture implements AutoCloseable { + private final SailRepository repository; + private final LmdbBenchmarkStore store; + private final LmdbBenchmarkStore.BenchmarkStoreConnection connection; + private final File dataDir; + + private DefaultCodegenFixture() throws IOException { + dataDir = Files.createTempDirectory("rdf4j-lmdb-default-codegen-test").toFile(); + LmdbStoreConfig config = new LmdbStoreConfig("psoc,posc"); + config.setLftjEnabled(true); + config.setLftjCodegenEnabled(true); + config.setForceSync(false); + config.setValueDBSize(64L * 1024 * 1024); + config.setTripleDBSize(config.getValueDBSize()); + store = new LmdbBenchmarkStore(dataDir, config, null); + repository = new SailRepository(store); + repository.init(); + connection = (LmdbBenchmarkStore.BenchmarkStoreConnection) store.getConnection(); + } + + @Override + public void close() throws Exception { + try { + connection.close(); + } finally { + repository.shutDown(); + FileUtils.deleteDirectory(dataDir); + } + } + } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorBenchmark.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorBenchmark.java index 4296cfc50f..b40d0b1027 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorBenchmark.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorBenchmark.java @@ -11,13 +11,21 @@ // Some portions generated by Codex package org.eclipse.rdf4j.sail.lmdb; -import java.util.Set; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; import java.util.concurrent.TimeUnit; +import org.apache.commons.io.FileUtils; import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.vocabulary.FOAF; import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; import org.eclipse.rdf4j.query.impl.EmptyBindingSet; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; +import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; import org.openjdk.jmh.Main; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -30,6 +38,7 @@ import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; import org.openjdk.jmh.annotations.Warmup; import org.openjdk.jmh.infra.Blackhole; import org.openjdk.jmh.runner.Runner; @@ -49,16 +58,29 @@ public static class CycleState { @Param({ "true", "false" }) public boolean derivedRelationEnabled; - @Param({ "true", "false" }) - public boolean lftjCodegenEnabled; + @Param({ "interpreted", "executor_codegen", "full_codegen" }) + public String benchmarkMode; + private SailRepository repository; + private LmdbBenchmarkStore.BenchmarkStoreConnection connection; private QueryEvaluationStep evaluationStep; @Setup(Level.Trial) - public void setup() { + public void setup() throws Exception { + LmdbLftjBenchmarkMode.validate(benchmarkMode); + repository = createRepository(derivedRelationEnabled, benchmarkMode); + populate(repository, false); + connection = (LmdbBenchmarkStore.BenchmarkStoreConnection) ((LmdbBenchmarkStore) repository.getSail()) + .getConnection(); evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep( - new BenchmarkQueryAccess(false, derivedRelationEnabled, lftjCodegenEnabled), - LmdbLftjSyntheticScenario.createPlan()); + connection.benchmarkQueryAccess(false), + createPlan(false, derivedRelationEnabled)); + drain(evaluationStep); + } + + @TearDown(Level.Trial) + public void tearDown() throws Exception { + closeResources(connection, repository); } } @@ -68,16 +90,29 @@ public static class HiddenContextState { @Param({ "true", "false" }) public boolean derivedRelationEnabled; - @Param({ "true", "false" }) - public boolean lftjCodegenEnabled; + @Param({ "interpreted", "executor_codegen", "full_codegen" }) + public String benchmarkMode; + private SailRepository repository; + private LmdbBenchmarkStore.BenchmarkStoreConnection connection; private QueryEvaluationStep evaluationStep; @Setup(Level.Trial) - public void setup() { + public void setup() throws Exception { + LmdbLftjBenchmarkMode.validate(benchmarkMode); + repository = createRepository(derivedRelationEnabled, benchmarkMode); + populate(repository, true); + connection = (LmdbBenchmarkStore.BenchmarkStoreConnection) ((LmdbBenchmarkStore) repository.getSail()) + .getConnection(); evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep( - new BenchmarkQueryAccess(true, derivedRelationEnabled, lftjCodegenEnabled), - LmdbLftjSyntheticScenario.createPlanWithHiddenContexts()); + connection.benchmarkQueryAccess(false), + createPlan(true, derivedRelationEnabled)); + drain(evaluationStep); + } + + @TearDown(Level.Trial) + public void tearDown() throws Exception { + closeResources(connection, repository); } } @@ -115,45 +150,79 @@ private static long consume(QueryEvaluationStep evaluationStep, Blackhole blackh return count; } - private static final class BenchmarkQueryAccess extends LmdbLftjSyntheticScenario.TestQueryAccess { - - private static final Set DERIVED_RELATION_INDEXES = Set.of("psoc", "posc"); - private static final Set GENERIC_INDEXES = Set.of("psoc"); - - private final LmdbLftjCodegenCache codegenCache = new LmdbLftjCodegenCache(); - private final Set configuredIndexes; - private final boolean lftjCodegenEnabled; - - private BenchmarkQueryAccess(boolean duplicateContexts, boolean derivedRelationEnabled, - boolean lftjCodegenEnabled) { - super(duplicateContexts); - this.configuredIndexes = derivedRelationEnabled ? DERIVED_RELATION_INDEXES : GENERIC_INDEXES; - this.lftjCodegenEnabled = lftjCodegenEnabled; + private static long drain(QueryEvaluationStep evaluationStep) throws Exception { + long count = 0; + try (CloseableIteration iteration = evaluationStep.evaluate(EmptyBindingSet.getInstance())) { + while (iteration.hasNext()) { + iteration.next(); + count++; + } } + return count; + } - @Override - public Set configuredIndexes() { - return configuredIndexes; - } + private static SailRepository createRepository(boolean derivedRelationEnabled, String benchmarkMode) + throws IOException { + File dataDir = Files.createTempDirectory("rdf4j-lmdb-lftj-executor").toFile(); + LmdbStoreConfig config = new LmdbStoreConfig(derivedRelationEnabled ? "psoc,posc" : "psoc"); + config.setLftjEnabled(true); + config.setLftjCodegenEnabled(LmdbLftjBenchmarkMode.lftjCodegenEnabled(benchmarkMode)); + config.setForceSync(false); + config.setValueDBSize(64L * 1024 * 1024); + config.setTripleDBSize(config.getValueDBSize()); + SailRepository repository = new SailRepository( + new LmdbBenchmarkStore(dataDir, config, LmdbLftjBenchmarkMode.compiler(benchmarkMode))); + repository.init(); + return repository; + } - @Override - public boolean lftjCodegenEnabled() { - return lftjCodegenEnabled; + private static void populate(SailRepository repository, boolean duplicateContexts) { + IRI contextOne = LmdbLftjSyntheticScenario.VF.createIRI("urn:ctx:1"); + IRI contextTwo = LmdbLftjSyntheticScenario.VF.createIRI("urn:ctx:2"); + try (SailRepositoryConnection connection = repository.getConnection()) { + for (long subject = 1; subject <= 4; subject++) { + for (long object = 1; object <= 4; object++) { + if (subject == object) { + continue; + } + connection.add(person(subject), FOAF.KNOWS, person(object)); + if (duplicateContexts) { + connection.add(person(subject), FOAF.KNOWS, person(object), contextOne); + connection.add(person(subject), FOAF.KNOWS, person(object), contextTwo); + } + } + } } + } - @Override - public LmdbLftjCodegenCache.CacheEntry cachedCompiledPlan(String executionKey) { - return codegenCache.get(executionKey); + private static LmdbLftjPlan createPlan(boolean duplicateContexts, boolean derivedRelationEnabled) { + String thirdIndex = derivedRelationEnabled ? "posc" : "psoc"; + if (duplicateContexts) { + return LmdbLftjSyntheticScenario.createPlanWithHiddenContexts("psoc", "psoc", thirdIndex); } + return LmdbLftjSyntheticScenario.createPlan("psoc", "psoc", thirdIndex); + } - @Override - public void cacheCompiledPlanSuccess(String executionKey, LmdbCompiledLftjFactory factory) { - codegenCache.putSuccess(executionKey, factory); - } + private static IRI person(long id) { + return LmdbLftjSyntheticScenario.VF.createIRI("urn:person:" + id); + } - @Override - public void cacheCompiledPlanFailure(String executionKey, String message) { - codegenCache.putFailure(executionKey, message); + private static void closeResources(LmdbBenchmarkStore.BenchmarkStoreConnection connection, + SailRepository repository) throws Exception { + File dataDir = repository != null && repository.getSail() instanceof LmdbStore + ? ((LmdbStore) repository.getSail()).getDataDir() + : null; + try { + if (connection != null) { + connection.close(); + } + } finally { + if (repository != null) { + repository.shutDown(); + } + if (dataDir != null && dataDir.exists()) { + FileUtils.deleteDirectory(dataDir); + } } } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetricsDiagnosticTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetricsDiagnosticTest.java new file mode 100644 index 0000000000..4605020e9d --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetricsDiagnosticTest.java @@ -0,0 +1,196 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.Method; +import java.nio.file.Files; + +import org.apache.commons.io.FileUtils; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; +import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; +import org.junit.jupiter.api.Test; + +class LmdbLftjMetricsDiagnosticTest { + + @Test + void printCycle5MetricsForExecutorAndFullCodegen() throws Exception { + MetricsSnapshot executor = executeCycle5(LmdbLftjCodegenCompiler.INSTANCE); + MetricsSnapshot full = executeCycle5(LmdbLftjFullCodegenCompiler.INSTANCE); + + System.out.println("executor cycle5 metrics: " + executor); + System.out.println("full cycle5 metrics: " + full); + + assertThat(executor.emittedBindings).isPositive(); + assertThat(full.emittedBindings).isEqualTo(executor.emittedBindings); + assertThat(full.frontierHits).isPositive(); + assertThat(full.countHits).isPositive(); + assertThat(full.relationUses).isLessThan(full.witnessScans); + } + + private MetricsSnapshot executeCycle5(LmdbLftjCodegenCompiler delegate) throws Exception { + CapturingCompiler compiler = new CapturingCompiler(delegate); + try (MetricsFixture fixture = new MetricsFixture(compiler)) { + long count = executeCount(fixture.repository, cycleQuery(5)); + assertThat(count).isPositive(); + return compiler.snapshot(); + } + } + + private long executeCount(SailRepository repository, String query) { + try (SailRepositoryConnection connection = repository.getConnection()) { + return connection.prepareTupleQuery(query).evaluate().stream().count(); + } + } + + private static String cycleQuery(int size) { + StringBuilder builder = new StringBuilder(); + builder.append("PREFIX foaf: \n"); + builder.append("SELECT * WHERE {\n"); + for (int i = 0; i < size; i++) { + builder.append(" ?") + .append((char) ('a' + i)) + .append(" foaf:knows ?") + .append((char) ('a' + ((i + 1) % size))) + .append(" .\n"); + } + builder.append(" FILTER ("); + boolean first = true; + for (int i = 0; i < size; i++) { + for (int j = i + 1; j < size; j++) { + if (!first) { + builder.append(" && "); + } + builder.append("?").append((char) ('a' + i)).append(" != ?").append((char) ('a' + j)); + first = false; + } + } + builder.append(")\n"); + builder.append("}\n"); + return builder.toString(); + } + + private static final class CapturingCompiler extends LmdbLftjCodegenCompiler { + + private final LmdbLftjCodegenCompiler delegate; + private LmdbLftjMetrics lastMetrics; + + private CapturingCompiler(LmdbLftjCodegenCompiler delegate) { + this.delegate = delegate; + } + + @Override + String cacheKey(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred) { + return delegate.cacheKey(plan, shape, includeInferred); + } + + @Override + LmdbCompiledLftjFactory compile(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred) { + LmdbCompiledLftjFactory factory = delegate.compile(plan, shape, includeInferred); + return (compiledPlan, compiledShape, state, context, queryAccess, metrics) -> { + lastMetrics = metrics; + return factory.create(compiledPlan, compiledShape, state, context, queryAccess, metrics); + }; + } + + private MetricsSnapshot snapshot() { + assertThat(lastMetrics).isNotNull(); + return new MetricsSnapshot(lastMetrics); + } + } + + private static final class MetricsSnapshot { + private final long candidateScans; + private final long witnessScans; + private final long emittedBindings; + private final long frontierLoads; + private final long frontierHits; + private final long countLoads; + private final long countHits; + private final long relationLoads; + private final long relationHits; + private final long relationUses; + + private MetricsSnapshot(LmdbLftjMetrics metrics) { + this.candidateScans = metrics.candidateScans(); + this.witnessScans = metrics.witnessScans(); + this.emittedBindings = metrics.emittedBindings(); + this.frontierLoads = metrics.frontierLoads(); + this.frontierHits = metrics.frontierHits(); + this.countLoads = metrics.countLoads(); + this.countHits = metrics.countHits(); + this.relationLoads = metrics.relationLoads(); + this.relationHits = metrics.relationHits(); + this.relationUses = metrics.relationUses(); + } + + @Override + public String toString() { + return "candidateScans=" + candidateScans + + ", witnessScans=" + witnessScans + + ", emittedBindings=" + emittedBindings + + ", frontierLoads=" + frontierLoads + + ", frontierHits=" + frontierHits + + ", countLoads=" + countLoads + + ", countHits=" + countHits + + ", relationLoads=" + relationLoads + + ", relationHits=" + relationHits + + ", relationUses=" + relationUses; + } + } + + private static final class MetricsFixture implements AutoCloseable { + + private final SailRepository repository; + private final File dataDir; + + private MetricsFixture(LmdbLftjCodegenCompiler compiler) throws Exception { + dataDir = Files.createTempDirectory("rdf4j-lmdb-metrics-diagnostic").toFile(); + LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); + config.setLftjEnabled(true); + config.setLftjCodegenEnabled(true); + config.setForceSync(false); + config.setValueDBSize(1_073_741_824L); + config.setTripleDBSize(config.getValueDBSize()); + repository = new SailRepository(new LmdbBenchmarkStore(dataDir, config, compiler)); + repository.init(); + populate(repository); + } + + private void populate(SailRepository repository) throws Exception { + Class generatorClass = Class.forName("org.eclipse.rdf4j.sail.lmdb.benchmark.FoafCliqueDataGenerator"); + Constructor constructor = generatorClass.getDeclaredConstructor(int.class, int.class, int.class, + int.class, int.class, long.class); + constructor.setAccessible(true); + Object generator = constructor.newInstance(5000, 30, 3, 8, 15000, 12345L); + Method populate = generatorClass.getDeclaredMethod("populate", SailRepositoryConnection.class); + populate.setAccessible(true); + try (SailRepositoryConnection connection = repository.getConnection()) { + populate.invoke(generator, connection); + } + } + + @Override + public void close() throws IOException { + try { + repository.shutDown(); + } finally { + FileUtils.deleteDirectory(dataDir); + } + } + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSyntheticScenario.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSyntheticScenario.java index 9d89596ce1..080644358e 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSyntheticScenario.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSyntheticScenario.java @@ -45,11 +45,11 @@ final class LmdbLftjSyntheticScenario { private LmdbLftjSyntheticScenario() { } - static QueryEvaluationStep createEvaluationStep(TestQueryAccess queryAccess) { + static QueryEvaluationStep createEvaluationStep(LmdbQueryAccess queryAccess) { return createEvaluationStep(queryAccess, createPlan()); } - static QueryEvaluationStep createEvaluationStep(TestQueryAccess queryAccess, LmdbLftjPlan plan) { + static QueryEvaluationStep createEvaluationStep(LmdbQueryAccess queryAccess, LmdbLftjPlan plan) { QueryEvaluationContext context = new QueryEvaluationContext.Minimal((Dataset) null); LmdbLftjEvaluationStrategy strategy = new LmdbLftjEvaluationStrategy( new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess), @@ -68,6 +68,10 @@ static LmdbLftjPlan createPlan() { } static LmdbLftjPlan createPlanWithHiddenContexts() { + return createPlanWithHiddenContexts("psoc", "psoc", "posc"); + } + + static LmdbLftjPlan createPlanWithHiddenContexts(String firstIndex, String secondIndex, String thirdIndex) { StatementPattern pattern1 = statementPattern("a", "b", "ctx1"); StatementPattern pattern2 = statementPattern("b", "c", "ctx2"); StatementPattern pattern3 = statementPattern("c", "a", "ctx3"); @@ -78,9 +82,9 @@ static LmdbLftjPlan createPlanWithHiddenContexts() { fallbackExpr.getAssuredBindingNames(), List.of("a", "b", "c"), List.of( - new LmdbLftjPatternPlan(pattern1, "psoc"), - new LmdbLftjPatternPlan(pattern2, "psoc"), - new LmdbLftjPatternPlan(pattern3, "posc"))); + new LmdbLftjPatternPlan(pattern1, firstIndex), + new LmdbLftjPatternPlan(pattern2, secondIndex), + new LmdbLftjPatternPlan(pattern3, thirdIndex))); } static LmdbLftjPlan createPlan(String firstIndex, String secondIndex, String thirdIndex) { diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java index b61196400c..5855b30c9d 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java @@ -19,7 +19,8 @@ import org.apache.commons.io.FileUtils; import org.eclipse.rdf4j.repository.sail.SailRepository; import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; -import org.eclipse.rdf4j.sail.lmdb.LmdbStore; +import org.eclipse.rdf4j.sail.lmdb.LmdbBenchmarkStore; +import org.eclipse.rdf4j.sail.lmdb.LmdbLftjBenchmarkMode; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -39,7 +40,7 @@ import org.openjdk.jmh.runner.options.OptionsBuilder; @State(Scope.Benchmark) -@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) @BenchmarkMode(Mode.AverageTime) @Fork(value = 1, jvmArgs = { "-Xms2G", "-Xmx2G", "-XX:+UseG1GC" }) @Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) @@ -68,11 +69,8 @@ public class FoafCliqueQueryBenchmark { @Param({ "12345" }) public long seed; - @Param({ "true", "false" }) - public boolean lftjEnabled; - - @Param({ "true", "false" }) - public boolean lftjCodegenEnabled; + @Param({ "interpreted", "executor_codegen", "full_codegen" }) + public String benchmarkMode; private File dataDir; private SailRepository repository; @@ -86,9 +84,10 @@ public static void main(String[] args) throws RunnerException { @Setup(Level.Trial) public void setup() throws IOException { + LmdbLftjBenchmarkMode.validate(benchmarkMode); dataDir = Files.createTempDirectory("rdf4j-lmdb-foaf-cliques").toFile(); - repository = new SailRepository(new LmdbStore(dataDir, createLftjBenchmarkConfig(lftjEnabled, - lftjCodegenEnabled))); + repository = new SailRepository(new LmdbBenchmarkStore(dataDir, createLftjBenchmarkConfig(benchmarkMode), + LmdbLftjBenchmarkMode.compiler(benchmarkMode))); repository.init(); try (SailRepositoryConnection connection = repository.getConnection()) { @@ -128,10 +127,10 @@ private long executeCount(String query) { } } - private static LmdbStoreConfig createLftjBenchmarkConfig(boolean lftjEnabled, boolean lftjCodegenEnabled) { + private static LmdbStoreConfig createLftjBenchmarkConfig(String benchmarkMode) { LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); - config.setLftjEnabled(lftjEnabled); - config.setLftjCodegenEnabled(lftjCodegenEnabled); + config.setLftjEnabled(true); + config.setLftjCodegenEnabled(LmdbLftjBenchmarkMode.lftjCodegenEnabled(benchmarkMode)); config.setForceSync(false); config.setValueDBSize(1_073_741_824L); config.setTripleDBSize(config.getValueDBSize()); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md index a30d1299d4..49cdf1950c 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md @@ -1,13 +1,19 @@ +``` Benchmark (cliquePercentage) (lftjCodegenEnabled) (lftjEnabled) (maxCliqueSize) (minCliqueSize) (peopleCount) (randomKnowsEdges) (seed) Mode Cnt Score Error Units -FoafCliqueQueryBenchmark.cycle3 30 true true 8 3 5000 15000 12345 avgt 3 22.886 ± 3.922 ms/op -FoafCliqueQueryBenchmark.cycle3 30 true false 8 3 5000 15000 12345 avgt 3 90.273 ± 22.294 ms/op -FoafCliqueQueryBenchmark.cycle3 30 false true 8 3 5000 15000 12345 avgt 3 25.738 ± 5.604 ms/op FoafCliqueQueryBenchmark.cycle3 30 false false 8 3 5000 15000 12345 avgt 3 89.718 ± 6.997 ms/op -FoafCliqueQueryBenchmark.cycle4 30 true true 8 3 5000 15000 12345 avgt 3 116.551 ± 35.375 ms/op -FoafCliqueQueryBenchmark.cycle4 30 true false 8 3 5000 15000 12345 avgt 3 590.218 ± 82.850 ms/op -FoafCliqueQueryBenchmark.cycle4 30 false true 8 3 5000 15000 12345 avgt 3 134.350 ± 28.883 ms/op FoafCliqueQueryBenchmark.cycle4 30 false false 8 3 5000 15000 12345 avgt 3 569.446 ± 38.531 ms/op -FoafCliqueQueryBenchmark.cycle5 30 true true 8 3 5000 15000 12345 avgt 3 712.650 ± 127.855 ms/op -FoafCliqueQueryBenchmark.cycle5 30 true false 8 3 5000 15000 12345 avgt 3 3783.058 ± 345.017 ms/op -FoafCliqueQueryBenchmark.cycle5 30 false true 8 3 5000 15000 12345 avgt 3 852.463 ± 217.004 ms/op FoafCliqueQueryBenchmark.cycle5 30 false false 8 3 5000 15000 12345 avgt 3 3814.985 ± 530.638 ms/op +``` + +``` +Benchmark (benchmarkMode) (cliquePercentage) (maxCliqueSize) (minCliqueSize) (peopleCount) (randomKnowsEdges) (seed) Mode Cnt Score Error Units +FoafCliqueQueryBenchmark.cycle3 interpreted 30 8 3 5000 15000 12345 avgt 3 24.902 ± 5.810 ms/op +FoafCliqueQueryBenchmark.cycle3 executor_codegen 30 8 3 5000 15000 12345 avgt 3 23.965 ± 4.651 ms/op +FoafCliqueQueryBenchmark.cycle3 full_codegen 30 8 3 5000 15000 12345 avgt 3 16.673 ± 3.116 ms/op +FoafCliqueQueryBenchmark.cycle4 interpreted 30 8 3 5000 15000 12345 avgt 3 121.399 ± 61.014 ms/op +FoafCliqueQueryBenchmark.cycle4 executor_codegen 30 8 3 5000 15000 12345 avgt 3 108.525 ± 27.221 ms/op +FoafCliqueQueryBenchmark.cycle4 full_codegen 30 8 3 5000 15000 12345 avgt 3 82.306 ± 30.173 ms/op +FoafCliqueQueryBenchmark.cycle5 interpreted 30 8 3 5000 15000 12345 avgt 3 702.982 ± 103.059 ms/op +FoafCliqueQueryBenchmark.cycle5 executor_codegen 30 8 3 5000 15000 12345 avgt 3 663.095 ± 236.201 ms/op +FoafCliqueQueryBenchmark.cycle5 full_codegen 30 8 3 5000 15000 12345 avgt 3 520.210 ± 120.747 ms/op +``` From 9f0d33038777ca22941f53338d1b4d82f57aa6ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 22:24:18 +0200 Subject: [PATCH 19/32] fastest yet, with codegen --- .../sail/lmdb/LmdbDerivedBinaryRelation.java | 142 ++++++ .../sail/lmdb/LmdbLftjExecutionShape.java | 2 +- .../lmdb/LmdbLftjFullCodegenCompiler.java | 434 ++++++++++++------ .../rdf4j/sail/lmdb/LmdbLftjCodegenTest.java | 60 ++- .../benchmark/FoafCliqueQueryBenchmark.java | 9 +- 5 files changed, 492 insertions(+), 155 deletions(-) diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbDerivedBinaryRelation.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbDerivedBinaryRelation.java index 838e22c7ef..566dec02a9 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbDerivedBinaryRelation.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbDerivedBinaryRelation.java @@ -11,12 +11,17 @@ // Some portions generated by Codex package org.eclipse.rdf4j.sail.lmdb; +import java.util.ArrayDeque; +import java.util.Arrays; +import java.util.Deque; import java.util.HashMap; import java.util.Map; import java.util.Objects; public final class LmdbDerivedBinaryRelation { + private static final ThreadLocal> SCRATCH_POOL = ThreadLocal.withInitial(ArrayDeque::new); + private final int sourceComponent; private final int targetComponent; private final LmdbCachedFrontier rootFrontier; @@ -58,6 +63,24 @@ public long count(long sourceValue, long targetValue) { return frontier(sourceValue).countFor(targetValue); } + public int sourceCount() { + return adjacency.size(); + } + + public int rootFrontierSize() { + return rootFrontier.size(); + } + + public static RelationScratch borrowScratch() { + RelationScratch scratch = SCRATCH_POOL.get().pollFirst(); + return scratch == null ? new RelationScratch() : scratch; + } + + public static void releaseScratch(RelationScratch scratch) { + scratch.release(); + SCRATCH_POOL.get().offerFirst(scratch); + } + public static final class Builder { private final int sourceComponent; @@ -190,4 +213,123 @@ void clear() { size = 0; } } + + public static final class RelationScratch { + private static final int MIN_CAPACITY = 8; + + private long[] frontierKeys = new long[MIN_CAPACITY]; + private long[][] frontierValues = new long[MIN_CAPACITY][]; + private int[] frontierGenerations = new int[MIN_CAPACITY]; + private int frontierGeneration = 1; + + private long[] countSourceKeys = new long[MIN_CAPACITY]; + private long[] countTargetKeys = new long[MIN_CAPACITY]; + private long[] countValues = new long[MIN_CAPACITY]; + private int[] countGenerations = new int[MIN_CAPACITY]; + private int countGeneration = 1; + + public void prepare(int sourceCount, int rootFrontierSize) { + int expectedEntries = Math.max(sourceCount, rootFrontierSize); + ensureFrontierCapacity(tableCapacity(expectedEntries)); + frontierGeneration = nextGeneration(frontierGeneration, frontierGenerations); + + ensureCountCapacity(tableCapacity(expectedEntries)); + countGeneration = nextGeneration(countGeneration, countGenerations); + } + + public int frontierMask() { + return frontierKeys.length - 1; + } + + public boolean frontierUsed(int slot) { + return frontierGenerations[slot] == frontierGeneration; + } + + public long frontierKey(int slot) { + return frontierKeys[slot]; + } + + public long[] frontierValue(int slot) { + return frontierValues[slot]; + } + + public void storeFrontier(int slot, long key, long[] values) { + frontierGenerations[slot] = frontierGeneration; + frontierKeys[slot] = key; + frontierValues[slot] = values; + } + + public int countMask() { + return countSourceKeys.length - 1; + } + + public boolean countUsed(int slot) { + return countGenerations[slot] == countGeneration; + } + + public long countSourceKey(int slot) { + return countSourceKeys[slot]; + } + + public long countTargetKey(int slot) { + return countTargetKeys[slot]; + } + + public long countValue(int slot) { + return countValues[slot]; + } + + public void storeCount(int slot, long sourceKey, long targetKey, long value) { + countGenerations[slot] = countGeneration; + countSourceKeys[slot] = sourceKey; + countTargetKeys[slot] = targetKey; + countValues[slot] = value; + } + + void release() { + Arrays.fill(frontierValues, null); + Arrays.fill(frontierGenerations, 0); + Arrays.fill(countGenerations, 0); + frontierGeneration = 1; + countGeneration = 1; + } + + private void ensureFrontierCapacity(int requestedCapacity) { + if (frontierKeys.length >= requestedCapacity) { + return; + } + frontierKeys = new long[requestedCapacity]; + frontierValues = new long[requestedCapacity][]; + frontierGenerations = new int[requestedCapacity]; + frontierGeneration = 1; + } + + private void ensureCountCapacity(int requestedCapacity) { + if (countSourceKeys.length >= requestedCapacity) { + return; + } + countSourceKeys = new long[requestedCapacity]; + countTargetKeys = new long[requestedCapacity]; + countValues = new long[requestedCapacity]; + countGenerations = new int[requestedCapacity]; + countGeneration = 1; + } + + private static int tableCapacity(int expectedEntries) { + int requested = Math.max(MIN_CAPACITY, expectedEntries << 1); + int capacity = MIN_CAPACITY; + while (capacity < requested) { + capacity <<= 1; + } + return capacity; + } + + private static int nextGeneration(int generation, int[] slots) { + if (generation == Integer.MAX_VALUE) { + Arrays.fill(slots, 0); + return 1; + } + return generation + 1; + } + } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java index a6be5798db..bea0d2abab 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java @@ -19,7 +19,7 @@ public final class LmdbLftjExecutionShape { - private static final int FULL_STACK_TEMPLATE_VERSION = 2; + private static final int FULL_STACK_TEMPLATE_VERSION = 3; private final int variableCount; private final int[][] cursorOrdinalsBySlot; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java index ad788a3bfa..b082e4ae37 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java @@ -70,6 +70,7 @@ private String build() { source.append("import static org.lwjgl.util.lmdb.LMDB.MDB_NEXT;\n"); source.append("import static org.lwjgl.util.lmdb.LMDB.MDB_SET_RANGE;\n"); source.append("import static org.lwjgl.util.lmdb.LMDB.MDB_SUCCESS;\n"); + source.append("import static org.lwjgl.util.lmdb.LMDB.mdb_cmp;\n"); source.append("import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_close;\n"); source.append("import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_get;\n"); source.append("import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_open;\n"); @@ -138,6 +139,11 @@ private void appendIterationClass(StringBuilder source) { for (int patternOrdinal = 0; patternOrdinal < shape.patternCount(); patternOrdinal++) { appendPatternConstructor(source, patternOrdinal); } + for (RelationGroup relationGroup : relationGroups) { + source.append(" this.relationGroup") + .append(relationGroup.groupId) + .append("Scratch = LmdbDerivedBinaryRelation.borrowScratch();\n"); + } source.append(" this.depth = firstDepth();\n"); source.append(" }\n\n"); @@ -187,66 +193,65 @@ private void appendPatternFields(StringBuilder source, int patternOrdinal) { .append(slot) .append(";\n"); } - appendRawCursorFields(source, patternOrdinal, -1); + appendRawCursorFields(source, patternOrdinal, patternShape, -1); return; } for (int slot : patternShape.visibleSlots()) { - appendRawCursorFields(source, patternOrdinal, slot); + appendRawCursorFields(source, patternOrdinal, patternShape, slot); } - appendRawCursorFields(source, patternOrdinal, -1); + appendRawCursorFields(source, patternOrdinal, patternShape, -1); } private void appendRelationGroupFields(StringBuilder source, int groupId) { source.append(" private LmdbDerivedBinaryRelation relationGroup").append(groupId).append(";\n"); + source.append(" private final LmdbDerivedBinaryRelation.RelationScratch relationGroup") + .append(groupId) + .append("Scratch;\n"); source.append(" private boolean relationGroup").append(groupId).append("Loaded;\n"); source.append(" private long[] relationGroup").append(groupId).append("RootFrontierValues;\n"); source.append(" private boolean relationGroup").append(groupId).append("RootFrontierLoaded;\n"); - source.append(" private final long[] relationGroup") - .append(groupId) - .append("FrontierKeys = new long[65536];\n"); - source.append(" private final long[][] relationGroup") - .append(groupId) - .append("FrontierValues = new long[65536][];\n"); - source.append(" private final boolean[] relationGroup") - .append(groupId) - .append("FrontierUsed = new boolean[65536];\n"); - source.append(" private final long[] relationGroup") - .append(groupId) - .append("CountSourceKeys = new long[65536];\n"); - source.append(" private final long[] relationGroup") - .append(groupId) - .append("CountTargetKeys = new long[65536];\n"); - source.append(" private final long[] relationGroup") - .append(groupId) - .append("CountValues = new long[65536];\n"); - source.append(" private final boolean[] relationGroup") - .append(groupId) - .append("CountUsed = new boolean[65536];\n"); } - private void appendRawCursorFields(StringBuilder source, int patternOrdinal, int slot) { + private void appendRawCursorFields(StringBuilder source, int patternOrdinal, + LmdbLftjExecutionShape.PatternShape patternShape, int slot) { String suffix = slotSuffix(patternOrdinal, slot); source.append(" private final int dbi").append(suffix).append("Explicit;\n"); source.append(" private final MDBVal key").append(suffix).append("Explicit;\n"); source.append(" private final MDBVal data").append(suffix).append("Explicit;\n"); source.append(" private final ByteBuffer lower").append(suffix).append("Explicit;\n"); + source.append(" private final MDBVal upper").append(suffix).append("Explicit;\n"); + source.append(" private final ByteBuffer upperBuffer").append(suffix).append("Explicit;\n"); source.append(" private final long cursor").append(suffix).append("Explicit;\n"); source.append(" private boolean available").append(suffix).append("Explicit;\n"); - source.append(" private long subj").append(suffix).append("Explicit;\n"); - source.append(" private long pred").append(suffix).append("Explicit;\n"); - source.append(" private long obj").append(suffix).append("Explicit;\n"); - source.append(" private long ctx").append(suffix).append("Explicit;\n"); + source.append(" private int prefixLength").append(suffix).append("Explicit;\n"); + source.append(" private boolean templateDirty").append(suffix).append("Explicit = true;\n"); + if (slot >= 0) { + source.append(" private long value").append(suffix).append("Explicit;\n"); + } else { + source.append(" private long subj").append(suffix).append("Explicit;\n"); + source.append(" private long pred").append(suffix).append("Explicit;\n"); + source.append(" private long obj").append(suffix).append("Explicit;\n"); + source.append(" private long ctx").append(suffix).append("Explicit;\n"); + } if (includeInferred) { source.append(" private final int dbi").append(suffix).append("Inferred;\n"); source.append(" private final MDBVal key").append(suffix).append("Inferred;\n"); source.append(" private final MDBVal data").append(suffix).append("Inferred;\n"); source.append(" private final ByteBuffer lower").append(suffix).append("Inferred;\n"); + source.append(" private final MDBVal upper").append(suffix).append("Inferred;\n"); + source.append(" private final ByteBuffer upperBuffer").append(suffix).append("Inferred;\n"); source.append(" private final long cursor").append(suffix).append("Inferred;\n"); source.append(" private boolean available").append(suffix).append("Inferred;\n"); - source.append(" private long subj").append(suffix).append("Inferred;\n"); - source.append(" private long pred").append(suffix).append("Inferred;\n"); - source.append(" private long obj").append(suffix).append("Inferred;\n"); - source.append(" private long ctx").append(suffix).append("Inferred;\n"); + source.append(" private int prefixLength").append(suffix).append("Inferred;\n"); + source.append(" private boolean templateDirty").append(suffix).append("Inferred = true;\n"); + if (slot >= 0) { + source.append(" private long value").append(suffix).append("Inferred;\n"); + } else { + source.append(" private long subj").append(suffix).append("Inferred;\n"); + source.append(" private long pred").append(suffix).append("Inferred;\n"); + source.append(" private long obj").append(suffix).append("Inferred;\n"); + source.append(" private long ctx").append(suffix).append("Inferred;\n"); + } } if (slot >= 0) { source.append(" private long current").append(suffix).append(";\n"); @@ -262,16 +267,17 @@ private void appendPatternConstructor(StringBuilder source, int patternOrdinal) .append(patternShape.indexName()) .append("\");\n"); if (patternShape.derivedBinaryRelation()) { - appendRawCursorConstructor(source, patternOrdinal, -1); + appendRawCursorConstructor(source, patternOrdinal, patternShape, -1); return; } for (int slot : patternShape.visibleSlots()) { - appendRawCursorConstructor(source, patternOrdinal, slot); + appendRawCursorConstructor(source, patternOrdinal, patternShape, slot); } - appendRawCursorConstructor(source, patternOrdinal, -1); + appendRawCursorConstructor(source, patternOrdinal, patternShape, -1); } - private void appendRawCursorConstructor(StringBuilder source, int patternOrdinal, int slot) { + private void appendRawCursorConstructor(StringBuilder source, int patternOrdinal, + LmdbLftjExecutionShape.PatternShape patternShape, int slot) { String suffix = slotSuffix(patternOrdinal, slot); source.append(" this.dbi") .append(suffix) @@ -281,6 +287,8 @@ private void appendRawCursorConstructor(StringBuilder source, int patternOrdinal source.append(" this.key").append(suffix).append("Explicit = pool.getVal();\n"); source.append(" this.data").append(suffix).append("Explicit = pool.getVal();\n"); source.append(" this.lower").append(suffix).append("Explicit = pool.getKeyBuffer();\n"); + source.append(" this.upper").append(suffix).append("Explicit = pool.getVal();\n"); + source.append(" this.upperBuffer").append(suffix).append("Explicit = pool.getKeyBuffer();\n"); source.append(" this.cursor") .append(suffix) .append("Explicit = openCursor(dbi") @@ -295,6 +303,8 @@ private void appendRawCursorConstructor(StringBuilder source, int patternOrdinal source.append(" this.key").append(suffix).append("Inferred = pool.getVal();\n"); source.append(" this.data").append(suffix).append("Inferred = pool.getVal();\n"); source.append(" this.lower").append(suffix).append("Inferred = pool.getKeyBuffer();\n"); + source.append(" this.upper").append(suffix).append("Inferred = pool.getVal();\n"); + source.append(" this.upperBuffer").append(suffix).append("Inferred = pool.getKeyBuffer();\n"); source.append(" this.cursor") .append(suffix) .append("Inferred = openCursor(dbi") @@ -414,6 +424,7 @@ private void appendBacktrackFromDepth(StringBuilder source, int variableCount) { private void appendReleaseDepth(StringBuilder source, int slot) { source.append(" private void releaseDepth").append(slot).append("() {\n"); source.append(" state().clear(").append(slot).append(");\n"); + source.append(" markAllCursorTemplatesDirty();\n"); source.append(" depth").append(slot).append("Initialized = false;\n"); source.append(" depth").append(slot).append("Advance = false;\n"); for (int patternOrdinal : shape.cursorOrdinals(slot)) { @@ -473,6 +484,7 @@ private void appendPositionDepth(StringBuilder source, int slot, int[] cursorOrd .append('_') .append(slot) .append("());\n"); + source.append(" markAllCursorTemplatesDirty();\n"); source.append(" return true;\n"); source.append(" }\n\n"); return; @@ -522,6 +534,7 @@ private void appendPositionDepth(StringBuilder source, int slot, int[] cursorOrd } source.append(" if (allMatch) {\n"); source.append(" state().assign(").append(slot).append(", current);\n"); + source.append(" markAllCursorTemplatesDirty();\n"); source.append(" return true;\n"); source.append(" }\n"); source.append(" current = max;\n"); @@ -603,6 +616,13 @@ private void appendRelationGroupAccessor(StringBuilder source, RelationGroup rel } source.append(" }\n"); source.append(" relationGroup").append(relationGroup.groupId).append(" = builder.build();\n"); + source.append(" relationGroup") + .append(relationGroup.groupId) + .append("Scratch.prepare(relationGroup") + .append(relationGroup.groupId) + .append(".sourceCount(), relationGroup") + .append(relationGroup.groupId) + .append(".rootFrontierSize());\n"); source.append(" relationGroup").append(relationGroup.groupId).append("Loaded = true;\n"); source.append(" return relationGroup").append(relationGroup.groupId).append(";\n"); source.append(" }\n\n"); @@ -718,11 +738,11 @@ private void appendRelationGroupCacheMethods(StringBuilder source, int groupId) source.append(" private long[] frontierValuesForRelationGroup") .append(groupId) .append("(long sourceValue) {\n"); - source.append(" int mask = relationGroup").append(groupId).append("FrontierKeys.length - 1;\n"); + source.append(" int mask = relationGroup").append(groupId).append("Scratch.frontierMask();\n"); source.append(" int slot = mixFrontierCacheKey(sourceValue) & mask;\n"); source.append(" int evictionSlot = slot;\n"); source.append(" for (int probe = 0; probe < 8; probe++) {\n"); - source.append(" if (!relationGroup").append(groupId).append("FrontierUsed[slot]) {\n"); + source.append(" if (!relationGroup").append(groupId).append("Scratch.frontierUsed(slot)) {\n"); source.append(" metrics().recordFrontierLoad();\n"); source.append(" boolean relationLoaded = relationGroup").append(groupId).append("Loaded;\n"); source.append(" long[] values = relationGroup") @@ -732,14 +752,16 @@ private void appendRelationGroupCacheMethods(StringBuilder source, int groupId) source.append(" metrics().recordRelationHit();\n"); source.append(" }\n"); source.append(" metrics().recordRelationUse();\n"); - source.append(" relationGroup").append(groupId).append("FrontierUsed[slot] = true;\n"); - source.append(" relationGroup").append(groupId).append("FrontierKeys[slot] = sourceValue;\n"); - source.append(" relationGroup").append(groupId).append("FrontierValues[slot] = values;\n"); + source.append(" relationGroup") + .append(groupId) + .append("Scratch.storeFrontier(slot, sourceValue, values);\n"); source.append(" return values;\n"); source.append(" }\n"); - source.append(" if (relationGroup").append(groupId).append("FrontierKeys[slot] == sourceValue) {\n"); + source.append(" if (relationGroup") + .append(groupId) + .append("Scratch.frontierKey(slot) == sourceValue) {\n"); source.append(" metrics().recordFrontierHit();\n"); - source.append(" return relationGroup").append(groupId).append("FrontierValues[slot];\n"); + source.append(" return relationGroup").append(groupId).append("Scratch.frontierValue(slot);\n"); source.append(" }\n"); source.append(" slot = (slot + 1) & mask;\n"); source.append(" }\n"); @@ -752,19 +774,19 @@ private void appendRelationGroupCacheMethods(StringBuilder source, int groupId) source.append(" metrics().recordRelationHit();\n"); source.append(" }\n"); source.append(" metrics().recordRelationUse();\n"); - source.append(" relationGroup").append(groupId).append("FrontierUsed[evictionSlot] = true;\n"); - source.append(" relationGroup").append(groupId).append("FrontierKeys[evictionSlot] = sourceValue;\n"); - source.append(" relationGroup").append(groupId).append("FrontierValues[evictionSlot] = values;\n"); + source.append(" relationGroup") + .append(groupId) + .append("Scratch.storeFrontier(evictionSlot, sourceValue, values);\n"); source.append(" return values;\n"); source.append(" }\n\n"); source.append(" private long countForRelationGroup") .append(groupId) .append("(long sourceValue, long targetValue) {\n"); - source.append(" int mask = relationGroup").append(groupId).append("CountSourceKeys.length - 1;\n"); + source.append(" int mask = relationGroup").append(groupId).append("Scratch.countMask();\n"); source.append(" int slot = mixCountCacheKey(sourceValue, targetValue) & mask;\n"); source.append(" int evictionSlot = slot;\n"); source.append(" for (int probe = 0; probe < 8; probe++) {\n"); - source.append(" if (!relationGroup").append(groupId).append("CountUsed[slot]) {\n"); + source.append(" if (!relationGroup").append(groupId).append("Scratch.countUsed(slot)) {\n"); source.append(" metrics().recordCountLoad();\n"); source.append(" boolean relationLoaded = relationGroup").append(groupId).append("Loaded;\n"); source.append(" long count = relationGroup") @@ -774,18 +796,19 @@ private void appendRelationGroupCacheMethods(StringBuilder source, int groupId) source.append(" metrics().recordRelationHit();\n"); source.append(" }\n"); source.append(" metrics().recordRelationUse();\n"); - source.append(" relationGroup").append(groupId).append("CountUsed[slot] = true;\n"); - source.append(" relationGroup").append(groupId).append("CountSourceKeys[slot] = sourceValue;\n"); - source.append(" relationGroup").append(groupId).append("CountTargetKeys[slot] = targetValue;\n"); - source.append(" relationGroup").append(groupId).append("CountValues[slot] = count;\n"); + source.append(" relationGroup") + .append(groupId) + .append("Scratch.storeCount(slot, sourceValue, targetValue, count);\n"); source.append(" return count;\n"); source.append(" }\n"); - source.append(" if (relationGroup").append(groupId).append("CountSourceKeys[slot] == sourceValue\n"); + source.append(" if (relationGroup") + .append(groupId) + .append("Scratch.countSourceKey(slot) == sourceValue\n"); source.append(" && relationGroup") .append(groupId) - .append("CountTargetKeys[slot] == targetValue) {\n"); + .append("Scratch.countTargetKey(slot) == targetValue) {\n"); source.append(" metrics().recordCountHit();\n"); - source.append(" return relationGroup").append(groupId).append("CountValues[slot];\n"); + source.append(" return relationGroup").append(groupId).append("Scratch.countValue(slot);\n"); source.append(" }\n"); source.append(" slot = (slot + 1) & mask;\n"); source.append(" }\n"); @@ -798,14 +821,9 @@ private void appendRelationGroupCacheMethods(StringBuilder source, int groupId) source.append(" metrics().recordRelationHit();\n"); source.append(" }\n"); source.append(" metrics().recordRelationUse();\n"); - source.append(" relationGroup").append(groupId).append("CountUsed[evictionSlot] = true;\n"); - source.append(" relationGroup") - .append(groupId) - .append("CountSourceKeys[evictionSlot] = sourceValue;\n"); source.append(" relationGroup") .append(groupId) - .append("CountTargetKeys[evictionSlot] = targetValue;\n"); - source.append(" relationGroup").append(groupId).append("CountValues[evictionSlot] = count;\n"); + .append("Scratch.storeCount(evictionSlot, sourceValue, targetValue, count);\n"); source.append(" return count;\n"); source.append(" }\n\n"); } @@ -833,8 +851,7 @@ private void appendCandidateMethods(StringBuilder source, int patternOrdinal, source.append(" current") .append(suffix) .append(" = ") - .append(componentAccessor(patternOrdinal, slot, "Explicit", - patternShape.componentForSlot(slot))) + .append(cursorValueAccessor(patternOrdinal, slot, "Explicit")) .append(";\n"); source.append(" return true;\n"); } @@ -848,16 +865,14 @@ private void appendCandidateMethods(StringBuilder source, int patternOrdinal, source.append(" while (available") .append(slotSuffix(patternOrdinal, slot)) .append("Explicit && ") - .append(componentAccessor(patternOrdinal, slot, "Explicit", - patternShape.componentForSlot(slot))) + .append(cursorValueAccessor(patternOrdinal, slot, "Explicit")) .append(" == previous) {\n"); source.append(" advanceCursor").append(slotSuffix(patternOrdinal, slot)).append("Explicit();\n"); source.append(" }\n"); source.append(" while (available") .append(slotSuffix(patternOrdinal, slot)) .append("Inferred && ") - .append(componentAccessor(patternOrdinal, slot, "Inferred", - patternShape.componentForSlot(slot))) + .append(cursorValueAccessor(patternOrdinal, slot, "Inferred")) .append(" == previous) {\n"); source.append(" advanceCursor").append(slotSuffix(patternOrdinal, slot)).append("Inferred();\n"); source.append(" }\n"); @@ -871,8 +886,7 @@ private void appendCandidateMethods(StringBuilder source, int patternOrdinal, .append(slotSuffix(patternOrdinal, slot)) .append("Explicit()) {\n"); source.append(" long candidate = ") - .append(componentAccessor(patternOrdinal, slot, "Explicit", - patternShape.componentForSlot(slot))) + .append(cursorValueAccessor(patternOrdinal, slot, "Explicit")) .append(";\n"); source.append(" if (candidate != previous) {\n"); source.append(" current").append(suffix).append(" = candidate;\n"); @@ -904,7 +918,7 @@ private void appendMergeDistinctCurrent(StringBuilder source, int patternOrdinal source.append(" current") .append(suffix) .append(" = ") - .append(componentAccessor(patternOrdinal, slot, "Explicit", component)) + .append(cursorValueAccessor(patternOrdinal, slot, "Explicit")) .append(";\n"); source.append(" currentAvailable").append(suffix).append(" = true;\n"); source.append(" return true;\n"); @@ -913,16 +927,16 @@ private void appendMergeDistinctCurrent(StringBuilder source, int patternOrdinal source.append(" current") .append(suffix) .append(" = ") - .append(componentAccessor(patternOrdinal, slot, "Inferred", component)) + .append(cursorValueAccessor(patternOrdinal, slot, "Inferred")) .append(";\n"); source.append(" currentAvailable").append(suffix).append(" = true;\n"); source.append(" return true;\n"); source.append(" }\n"); source.append(" long explicitValue = ") - .append(componentAccessor(patternOrdinal, slot, "Explicit", component)) + .append(cursorValueAccessor(patternOrdinal, slot, "Explicit")) .append(";\n"); source.append(" long inferredValue = ") - .append(componentAccessor(patternOrdinal, slot, "Inferred", component)) + .append(cursorValueAccessor(patternOrdinal, slot, "Inferred")) .append(";\n"); source.append(" current") .append(suffix) @@ -971,18 +985,21 @@ private void appendWitnessSeek(StringBuilder source, int patternOrdinal, private void appendMergedWitnessRowSelection(StringBuilder source, int patternOrdinal, LmdbLftjExecutionShape.PatternShape patternShape, boolean buildRelation) { String suffix = slotSuffix(patternOrdinal, -1); + source.append(" int rowCompare = available") + .append(suffix) + .append("Explicit && available") + .append(suffix) + .append("Inferred ? compareWitnessRows") + .append(patternOrdinal) + .append("() : 0;\n"); source.append(" if (!available") .append(suffix) .append("Inferred || (available") .append(suffix) - .append("Explicit && compareWitnessRows") - .append(patternOrdinal) - .append("() <= 0)) {\n"); + .append("Explicit && rowCompare <= 0)) {\n"); source.append(" boolean duplicate = available") .append(suffix) - .append("Inferred && compareWitnessRows") - .append(patternOrdinal) - .append("() == 0;\n"); + .append("Inferred && rowCompare == 0;\n"); if (buildRelation) { source.append(" builder.add(") .append(componentAccessor(patternOrdinal, -1, "Explicit", @@ -1016,28 +1033,17 @@ private void appendMergedWitnessRowSelection(StringBuilder source, int patternOr private void appendCompareWitnessMethod(StringBuilder source, int patternOrdinal) { source.append(" private int compareWitnessRows").append(patternOrdinal).append("() {\n"); - appendCompareRows(source, patternOrdinal, -1); + String suffix = slotSuffix(patternOrdinal, -1); + source.append(" return mdb_cmp(txn, dbi") + .append(suffix) + .append("Explicit, key") + .append(suffix) + .append("Explicit, key") + .append(suffix) + .append("Inferred);\n"); source.append(" }\n\n"); } - private void appendCompareRows(StringBuilder source, int patternOrdinal, int slot) { - LmdbLftjExecutionShape.PatternShape patternShape = shape.pattern(patternOrdinal); - for (int i = 0; i < 4; i++) { - int component = patternShape.indexComponent(i); - source.append(" int compare") - .append(i) - .append(" = Long.compare(") - .append(componentAccessor(patternOrdinal, slot, "Explicit", component)) - .append(", ") - .append(componentAccessor(patternOrdinal, slot, "Inferred", component)) - .append(");\n"); - source.append(" if (compare").append(i).append(" != 0) {\n"); - source.append(" return compare").append(i).append(";\n"); - source.append(" }\n"); - } - source.append(" return 0;\n"); - } - private void appendAdvanceWitnessMethod(StringBuilder source, int patternOrdinal, String kind) { String suffix = slotSuffix(patternOrdinal, -1); source.append(" private boolean advanceWitness").append(patternOrdinal).append(kind).append("() {\n"); @@ -1048,43 +1054,44 @@ private void appendAdvanceWitnessMethod(StringBuilder source, int patternOrdinal private void appendSeekCursorInvocation(StringBuilder source, int patternOrdinal, LmdbLftjExecutionShape.PatternShape patternShape, int slot, String kind, String targetExpr) { String suffix = slotSuffix(patternOrdinal, slot); - int keyFieldIndex = slot >= 0 ? patternShape.keyFieldIndexForSlot(slot) - : patternShape.visibleSlots().length == 0 - ? 0 - : patternShape.indexFields().length - (patternShape.hasHiddenTerms() ? 1 : 0); - if (slot < 0) { - keyFieldIndex = patternShape.hasHiddenTerms() ? patternShape.indexFields().length - 1 - : patternShape.indexFields().length; - } - source.append(" lower").append(suffix).append(kind).append(".clear();\n"); - for (int i = 0; i < patternShape.indexFields().length; i++) { - int component = patternShape.indexComponent(i); - if (slot >= 0 && i == keyFieldIndex) { - source.append(" Varint.writeUnsigned(lower") - .append(suffix) - .append(kind) - .append(", ") - .append(targetExpr) - .append(");\n"); - break; - } - if (slot < 0 && i == keyFieldIndex) { - break; - } + source.append(" ensureTemplate").append(suffix).append(kind).append("();\n"); + source.append(" lower") + .append(suffix) + .append(kind) + .append(".limit(lower") + .append(suffix) + .append(kind) + .append(".capacity());\n"); + source.append(" lower") + .append(suffix) + .append(kind) + .append(".position(prefixLength") + .append(suffix) + .append(kind) + .append(");\n"); + if (slot >= 0) { source.append(" Varint.writeUnsigned(lower") .append(suffix) .append(kind) .append(", ") - .append(componentValueExpression(patternOrdinal, patternShape, component)) + .append(targetExpr) .append(");\n"); } + source.append(" lower") + .append(suffix) + .append(kind) + .append(".limit(lower") + .append(suffix) + .append(kind) + .append(".position());\n"); + source.append(" lower").append(suffix).append(kind).append(".position(0);\n"); source.append(" key") .append(suffix) .append(kind) .append(".mv_data(lower") .append(suffix) .append(kind) - .append(".flip());\n"); + .append(");\n"); source.append(" if (mdb_cursor_get(cursor") .append(suffix) .append(kind) @@ -1100,7 +1107,6 @@ private void appendSeekCursorInvocation(StringBuilder source, int patternOrdinal source.append(" return false;\n"); } source.append(" } else {\n"); - source.append(" decodeRow").append(suffix).append(kind).append("();\n"); source.append(" available") .append(suffix) .append(kind) @@ -1108,6 +1114,9 @@ private void appendSeekCursorInvocation(StringBuilder source, int patternOrdinal .append(suffix) .append(kind) .append("();\n"); + source.append(" if (available").append(suffix).append(kind).append(") {\n"); + appendDecodeInvocation(source, patternShape, slot, suffix, kind, " "); + source.append(" }\n"); source.append(" }\n"); if (slot >= 0) { source.append(" return available").append(suffix).append(kind).append(";\n"); @@ -1128,6 +1137,11 @@ private void appendCloseResources(StringBuilder source) { } appendCloseCursorResources(source, patternOrdinal, -1); } + for (RelationGroup relationGroup : relationGroups) { + source.append(" LmdbDerivedBinaryRelation.releaseScratch(relationGroup") + .append(relationGroup.groupId) + .append("Scratch);\n"); + } source.append(" }\n\n"); } @@ -1137,11 +1151,15 @@ private void appendCloseCursorResources(StringBuilder source, int patternOrdinal source.append(" pool.free(key").append(suffix).append("Explicit);\n"); source.append(" pool.free(data").append(suffix).append("Explicit);\n"); source.append(" pool.free(lower").append(suffix).append("Explicit);\n"); + source.append(" pool.free(upper").append(suffix).append("Explicit);\n"); + source.append(" pool.free(upperBuffer").append(suffix).append("Explicit);\n"); if (includeInferred) { source.append(" mdb_cursor_close(cursor").append(suffix).append("Inferred);\n"); source.append(" pool.free(key").append(suffix).append("Inferred);\n"); source.append(" pool.free(data").append(suffix).append("Inferred);\n"); source.append(" pool.free(lower").append(suffix).append("Inferred);\n"); + source.append(" pool.free(upper").append(suffix).append("Inferred);\n"); + source.append(" pool.free(upperBuffer").append(suffix).append("Inferred);\n"); } } @@ -1159,6 +1177,19 @@ private void appendHelpers(StringBuilder source) { source.append(" return 31 * Long.hashCode(sourceValue) + Long.hashCode(targetValue);\n"); source.append(" }\n\n"); } + source.append(" private void markAllCursorTemplatesDirty() {\n"); + for (int patternOrdinal = 0; patternOrdinal < shape.patternCount(); patternOrdinal++) { + LmdbLftjExecutionShape.PatternShape patternShape = shape.pattern(patternOrdinal); + if (patternShape.derivedBinaryRelation()) { + appendCursorTemplateDirtyAssignment(source, patternOrdinal, -1); + continue; + } + for (int slot : patternShape.visibleSlots()) { + appendCursorTemplateDirtyAssignment(source, patternOrdinal, slot); + } + appendCursorTemplateDirtyAssignment(source, patternOrdinal, -1); + } + source.append(" }\n\n"); source.append(" private long openCursor(int dbi) {\n"); source.append(" long readStamp = readLock();\n"); source.append(" MemoryStack stack = MemoryStack.stackPush();\n"); @@ -1199,28 +1230,105 @@ private void appendHelpers(StringBuilder source) { private void appendCursorRowHelpers(StringBuilder source, int patternOrdinal, int slot, LmdbLftjExecutionShape.PatternShape patternShape) { String suffix = slotSuffix(patternOrdinal, slot); + appendPrepareTemplateMethod(source, patternOrdinal, slot, patternShape, "Explicit"); + appendEnsureTemplateMethod(source, suffix, "Explicit"); appendDecodeMethod(source, patternOrdinal, slot, patternShape, "Explicit"); appendWithinUpperBoundMethod(source, patternOrdinal, slot, patternShape, "Explicit"); appendAdvanceCursorMethod(source, patternOrdinal, slot, patternShape, "Explicit"); if (includeInferred) { + appendPrepareTemplateMethod(source, patternOrdinal, slot, patternShape, "Inferred"); + appendEnsureTemplateMethod(source, suffix, "Inferred"); appendDecodeMethod(source, patternOrdinal, slot, patternShape, "Inferred"); appendWithinUpperBoundMethod(source, patternOrdinal, slot, patternShape, "Inferred"); appendAdvanceCursorMethod(source, patternOrdinal, slot, patternShape, "Inferred"); } } - private void appendDecodeMethod(StringBuilder source, int patternOrdinal, int slot, + private void appendPrepareTemplateMethod(StringBuilder source, int patternOrdinal, int slot, LmdbLftjExecutionShape.PatternShape patternShape, String kind) { String suffix = slotSuffix(patternOrdinal, slot); - source.append(" private void decodeRow").append(suffix).append(kind).append("() {\n"); - source.append(" ByteBuffer key = key").append(suffix).append(kind).append(".mv_data().duplicate();\n"); + int keyFieldIndex = keyFieldIndex(patternShape, slot); + source.append(" private void prepareTemplate").append(suffix).append(kind).append("() {\n"); + source.append(" lower").append(suffix).append(kind).append(".clear();\n"); + for (int i = 0; i < keyFieldIndex; i++) { + int component = patternShape.indexComponent(i); + source.append(" Varint.writeUnsigned(lower") + .append(suffix) + .append(kind) + .append(", ") + .append(componentValueExpression(patternOrdinal, patternShape, component)) + .append(");\n"); + } + source.append(" prefixLength") + .append(suffix) + .append(kind) + .append(" = lower") + .append(suffix) + .append(kind) + .append(".position();\n"); + source.append(" upperBuffer").append(suffix).append(kind).append(".clear();\n"); for (int i = 0; i < patternShape.indexFields().length; i++) { - String fieldName = componentFieldName(patternShape.indexComponent(i)); - source.append(" ") - .append(fieldName) + int component = patternShape.indexComponent(i); + source.append(" Varint.writeUnsigned(upperBuffer") .append(suffix) .append(kind) - .append(" = Varint.readUnsigned(key);\n"); + .append(", ") + .append(upperBoundExpression(patternOrdinal, patternShape, slot, component)) + .append(");\n"); + } + source.append(" upperBuffer").append(suffix).append(kind).append(".flip();\n"); + source.append(" upper") + .append(suffix) + .append(kind) + .append(".mv_data(upperBuffer") + .append(suffix) + .append(kind) + .append(");\n"); + source.append(" templateDirty").append(suffix).append(kind).append(" = false;\n"); + source.append(" }\n\n"); + } + + private void appendEnsureTemplateMethod(StringBuilder source, String suffix, String kind) { + source.append(" private void ensureTemplate").append(suffix).append(kind).append("() {\n"); + source.append(" if (templateDirty").append(suffix).append(kind).append(") {\n"); + source.append(" prepareTemplate").append(suffix).append(kind).append("();\n"); + source.append(" }\n"); + source.append(" }\n\n"); + } + + private void appendDecodeMethod(StringBuilder source, int patternOrdinal, int slot, + LmdbLftjExecutionShape.PatternShape patternShape, String kind) { + String suffix = slotSuffix(patternOrdinal, slot); + source.append(" private void decodeKeyValues").append(suffix).append(kind).append("() {\n"); + if (slot >= 0) { + int keyFieldIndex = patternShape.keyFieldIndexForSlot(slot); + source.append(" ByteBuffer key = key").append(suffix).append(kind).append(".mv_data();\n"); + source.append(" int offset = 0;\n"); + for (int i = 0; i < keyFieldIndex; i++) { + source.append(" offset += Varint.firstToLength(key.get(offset));\n"); + } + source.append(" value") + .append(suffix) + .append(kind) + .append(" = Varint.readUnsigned(key, offset);\n"); + } else if (patternShape.derivedBinaryRelation()) { + source.append(" ByteBuffer key = key").append(suffix).append(kind).append(".mv_data();\n"); + source.append(" int offset = 0;\n"); + for (int i = 0; i < patternShape.indexFields().length; i++) { + int component = patternShape.indexComponent(i); + String fieldName = componentFieldName(component); + if (component == patternShape.derivedSourceComponent() + || component == patternShape.derivedTargetComponent()) { + source.append(" ") + .append(fieldName) + .append(suffix) + .append(kind) + .append(" = Varint.readUnsigned(key, offset);\n"); + } + if (i + 1 < patternShape.indexFields().length) { + source.append(" offset += Varint.firstToLength(key.get(offset));\n"); + } + } } source.append(" }\n\n"); } @@ -1229,22 +1337,16 @@ private void appendWithinUpperBoundMethod(StringBuilder source, int patternOrdin LmdbLftjExecutionShape.PatternShape patternShape, String kind) { String suffix = slotSuffix(patternOrdinal, slot); source.append(" private boolean withinUpperBound").append(suffix).append(kind).append("() {\n"); - for (int i = 0; i < patternShape.indexFields().length; i++) { - int component = patternShape.indexComponent(i); - String valueExpr = componentAccessor(patternOrdinal, slot, kind, component); - String upperExpr = upperBoundExpression(patternOrdinal, patternShape, slot, component); - source.append(" int compare") - .append(i) - .append(" = Long.compare(") - .append(valueExpr) - .append(", ") - .append(upperExpr) - .append(");\n"); - source.append(" if (compare").append(i).append(" != 0) {\n"); - source.append(" return compare").append(i).append(" < 0;\n"); - source.append(" }\n"); - } - source.append(" return true;\n"); + source.append(" return mdb_cmp(txn, dbi") + .append(suffix) + .append(kind) + .append(", key") + .append(suffix) + .append(kind) + .append(", upper") + .append(suffix) + .append(kind) + .append(") <= 0;\n"); source.append(" }\n\n"); } @@ -1270,7 +1372,6 @@ private void appendAdvanceCursorMethod(StringBuilder source, int patternOrdinal, source.append(" available").append(suffix).append(kind).append(" = false;\n"); source.append(" return false;\n"); source.append(" }\n"); - source.append(" decodeRow").append(suffix).append(kind).append("();\n"); source.append(" available") .append(suffix) .append(kind) @@ -1278,6 +1379,9 @@ private void appendAdvanceCursorMethod(StringBuilder source, int patternOrdinal, .append(suffix) .append(kind) .append("();\n"); + source.append(" if (available").append(suffix).append(kind).append(") {\n"); + appendDecodeInvocation(source, patternShape, slot, suffix, kind, " "); + source.append(" }\n"); source.append(" return available").append(suffix).append(kind).append(";\n"); source.append(" } finally {\n"); source.append(" txnLockManager.unlockRead(readStamp);\n"); @@ -1293,6 +1397,38 @@ private String componentAccessor(int patternOrdinal, int slot, String kind, int return componentFieldName(component) + slotSuffix(patternOrdinal, slot) + kind; } + private String cursorValueAccessor(int patternOrdinal, int slot, String kind) { + return "value" + slotSuffix(patternOrdinal, slot) + kind; + } + + private void appendCursorTemplateDirtyAssignment(StringBuilder source, int patternOrdinal, int slot) { + String suffix = slotSuffix(patternOrdinal, slot); + source.append(" templateDirty").append(suffix).append("Explicit = true;\n"); + if (includeInferred) { + source.append(" templateDirty").append(suffix).append("Inferred = true;\n"); + } + } + + private void appendDecodeInvocation(StringBuilder source, LmdbLftjExecutionShape.PatternShape patternShape, + int slot, String suffix, String kind, String indent) { + if (!needsDecode(patternShape, slot)) { + return; + } + source.append(indent).append("decodeKeyValues").append(suffix).append(kind).append("();\n"); + } + + private boolean needsDecode(LmdbLftjExecutionShape.PatternShape patternShape, int slot) { + return slot >= 0 || patternShape.derivedBinaryRelation(); + } + + private int keyFieldIndex(LmdbLftjExecutionShape.PatternShape patternShape, int slot) { + if (slot >= 0) { + return patternShape.keyFieldIndexForSlot(slot); + } + return patternShape.hasHiddenTerms() ? patternShape.indexFields().length - 1 + : patternShape.indexFields().length; + } + private String componentFieldName(int component) { switch (component) { case TripleStore.SUBJ_IDX: diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java index 2c67c51ae9..04b16c49f0 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.lang.reflect.Field; import java.lang.reflect.Method; +import java.nio.ByteBuffer; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; @@ -73,7 +74,35 @@ void fullStackCompilerSourceShouldAvoidGenericTrieHelpers() { .doesNotContain("LmdbLftjCursor") .doesNotContain("private LmdbCachedFrontier frontier") .doesNotContain("valueAt(") - .doesNotContain("long[] lowerBound"); + .doesNotContain("long[] lowerBound") + .doesNotContain(".mv_data().duplicate()") + .doesNotContain("new long[65536]") + .doesNotContain("new boolean[65536]"); + } + + @Test + void fullStackCompilerSourceShouldEmitRawBoundChecksAndCursorTemplates() { + LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); + LmdbLftjExecutionShape shape = new LmdbLftjExecutionShape(plan); + String source = LmdbLftjFullCodegenCompiler.INSTANCE.sourceFor(plan, shape, true); + + assertThat(source) + .contains("templateDirtyP") + .contains("prefixLengthP") + .contains("prepareTemplateP") + .contains("ensureTemplateP") + .contains("upperP") + .contains("return mdb_cmp(txn, dbi") + .contains(", upper") + .contains("return mdb_cmp(txn, dbiP0WExplicit, keyP0WExplicit, keyP0WInferred);"); + } + + @Test + void reusedSeekBufferShouldHandleVarintBoundaryGrowthAndShrink() { + assertPatchedSeekBuffer(7L, 240L, 241L); + assertPatchedSeekBuffer(7L, 241L, 240L); + assertPatchedSeekBuffer(7L, 2287L, 2288L); + assertPatchedSeekBuffer(7L, 2288L, 2287L); } @Test @@ -387,6 +416,35 @@ private String render(BindingSet row) { + row.getValue("c").stringValue(); } + private void assertPatchedSeekBuffer(long prefixValue, long firstTarget, long secondTarget) { + ByteBuffer keyBuffer = ByteBuffer.allocate(TripleStore.MAX_KEY_LENGTH); + Varint.writeUnsigned(keyBuffer, prefixValue); + int prefixLength = keyBuffer.position(); + + patchSeekTail(keyBuffer, prefixLength, firstTarget); + assertThat(readPatchedPair(keyBuffer)).containsExactly(prefixValue, firstTarget); + + patchSeekTail(keyBuffer, prefixLength, secondTarget); + assertThat(readPatchedPair(keyBuffer)).containsExactly(prefixValue, secondTarget); + } + + private void patchSeekTail(ByteBuffer keyBuffer, int prefixLength, long target) { + keyBuffer.limit(keyBuffer.capacity()); + keyBuffer.position(prefixLength); + Varint.writeUnsigned(keyBuffer, target); + keyBuffer.limit(keyBuffer.position()); + keyBuffer.position(0); + } + + private long[] readPatchedPair(ByteBuffer keyBuffer) { + ByteBuffer key = keyBuffer.duplicate(); + key.position(0); + return new long[] { + Varint.readUnsigned(key), + Varint.readUnsigned(key) + }; + } + private int countMatches(String source, String regex) { int count = 0; Matcher matcher = Pattern.compile(regex).matcher(source); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java index 5855b30c9d..a5196f6e01 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java @@ -40,7 +40,7 @@ import org.openjdk.jmh.runner.options.OptionsBuilder; @State(Scope.Benchmark) -@Warmup(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Warmup(iterations = 30, time = 1, timeUnit = TimeUnit.SECONDS) @BenchmarkMode(Mode.AverageTime) @Fork(value = 1, jvmArgs = { "-Xms2G", "-Xmx2G", "-XX:+UseG1GC" }) @Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) @@ -69,7 +69,8 @@ public class FoafCliqueQueryBenchmark { @Param({ "12345" }) public long seed; - @Param({ "interpreted", "executor_codegen", "full_codegen" }) +// @Param({ "interpreted", "executor_codegen", "full_codegen" }) + @Param({ "full_codegen" }) public String benchmarkMode; private File dataDir; @@ -106,12 +107,12 @@ public void tearDown() throws IOException { } } - @Benchmark +// @Benchmark public long cycle3() { return executeCount(QUERY_CYCLE_3); } - @Benchmark +// @Benchmark public long cycle4() { return executeCount(QUERY_CYCLE_4); } From 63632746103fc74409910e4f1a068558c71f49f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Sun, 5 Apr 2026 23:19:41 +0200 Subject: [PATCH 20/32] fastest yet, with codegen --- .../rdf4j/sail/lmdb/LmdbLftjBindingState.java | 29 +- .../sail/lmdb/LmdbLftjExecutionShape.java | 2 +- .../rdf4j/sail/lmdb/LmdbLftjExecutor.java | 13 + .../lmdb/LmdbLftjFullCodegenCompiler.java | 35 ++ .../rdf4j/sail/lmdb/LmdbLftjOptimizer.java | 305 +++++++++++++++++- .../eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java | 120 ++++++- .../rdf4j/sail/lmdb/LmdbLftjPlanner.java | 74 ++++- .../sail/lmdb/LmdbLftjPreparedPlanCache.java | 17 + .../rdf4j/sail/lmdb/LmdbQueryAccess.java | 4 + .../rdf4j/sail/lmdb/LmdbStoreConnection.java | 26 +- .../rdf4j/sail/lmdb/LmdbLftjCodegenTest.java | 198 +++++++++++- .../sail/lmdb/LmdbLftjOptimizerTest.java | 115 +++++++ .../sail/lmdb/LmdbLftjSyntheticScenario.java | 5 + .../benchmark/FoafCliqueQueryBenchmark.java | 4 +- 14 files changed, 917 insertions(+), 30 deletions(-) diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java index 99c571cf4e..a4472a8d48 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java @@ -13,6 +13,7 @@ import java.util.HashMap; import java.util.IdentityHashMap; +import java.util.List; import java.util.Map; import java.util.function.BiConsumer; @@ -28,6 +29,8 @@ public final class LmdbLftjBindingState { private final BindingSet inputBindings; private final LmdbQueryAccess queryAccess; private final String[] variableNames; + private final String[] outputNames; + private final int[] outputSlots; private final Map variableSlots = new HashMap<>(); private final long[] fixedValues; private final boolean[] fixedPresent; @@ -52,6 +55,19 @@ public final class LmdbLftjBindingState { for (int i = 0; i < variableCount; i++) { variableSlots.put(variableNames[i], i); } + List outputBindings = plan.outputBindings(); + this.outputNames = new String[outputBindings.size()]; + this.outputSlots = new int[outputBindings.size()]; + for (int i = 0; i < outputBindings.size(); i++) { + LmdbLftjPlan.OutputBinding outputBinding = outputBindings.get(i); + Integer slot = variableSlots.get(outputBinding.sourceVariable()); + if (slot == null) { + throw new IllegalArgumentException( + "Unknown LMDB LFTJ output source variable: " + outputBinding.sourceVariable()); + } + outputNames[i] = outputBinding.outputName(); + outputSlots[i] = slot; + } } public boolean initialize() { @@ -150,9 +166,10 @@ long fixedId(LmdbLftjPatternPlan.TermRef term) { public BindingSet materialize(QueryEvaluationContext context) { MutableBindingSet result = context.createBindingSet(inputBindings); BiConsumer[] setters = bindingSetters(context); - for (int slot = 0; slot < variableNames.length; slot++) { - if (assignedPresent[slot]) { - setters[slot].accept(queryAccess.resolveValue(assignedValues[slot]), result); + for (int outputIndex = 0; outputIndex < outputSlots.length; outputIndex++) { + int slot = outputSlots[outputIndex]; + if (isBound(slot)) { + setters[outputIndex].accept(queryAccess.lazyValue(value(slot)), result); } } return result; @@ -182,9 +199,9 @@ private BiConsumer[] bindingSetters(QueryEvaluationCon if (bindingSetters != null && bindingSettersContext == context) { return bindingSetters; } - BiConsumer[] setters = new BiConsumer[variableNames.length]; - for (int slot = 0; slot < variableNames.length; slot++) { - setters[slot] = context.setBinding(variableNames[slot]); + BiConsumer[] setters = new BiConsumer[outputNames.length]; + for (int outputIndex = 0; outputIndex < outputNames.length; outputIndex++) { + setters[outputIndex] = context.setBinding(outputNames[outputIndex]); } bindingSetters = setters; bindingSettersContext = context; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java index bea0d2abab..e1ad4f05c7 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java @@ -19,7 +19,7 @@ public final class LmdbLftjExecutionShape { - private static final int FULL_STACK_TEMPLATE_VERSION = 3; + private static final int FULL_STACK_TEMPLATE_VERSION = 4; private final int variableCount; private final int[][] cursorOrdinalsBySlot; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java index 6bdb3f7186..e0684bcc05 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java @@ -157,6 +157,10 @@ protected void handleClose() { private BindingSet computeNextElement() { while (depth >= 0) { if (depth == searchSlots.length) { + if (!passesInequalityConstraints(plan, state)) { + backtrackAfterLeaf(); + continue; + } long multiplicity = witnessMultiplicity(plan, metrics, frontierProvider); backtrackAfterLeaf(); if (multiplicity > 0) { @@ -258,6 +262,15 @@ private void releaseDepth(int depth) { } } + private static boolean passesInequalityConstraints(LmdbLftjPlan plan, LmdbLftjBindingState state) { + for (LmdbLftjPlan.InequalityConstraint inequality : plan.inequalityConstraints()) { + if (state.value(inequality.leftVariable()) == state.value(inequality.rightVariable())) { + return false; + } + } + return true; + } + private int[] collectSearchSlots(LmdbLftjPlan plan, LmdbLftjBindingState state) { int[] searchSlots = new int[plan.variableOrder().size()]; int count = 0; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java index b082e4ae37..b22aad3484 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java @@ -318,6 +318,10 @@ private void appendComputeNextElement(StringBuilder source, int variableCount) { source.append(" protected BindingSet computeNextElement() {\n"); source.append(" while (depth >= 0) {\n"); source.append(" if (depth == ").append(variableCount).append(") {\n"); + source.append(" if (!passesInequalityConstraints()) {\n"); + source.append(" backtrackAfterLeaf();\n"); + source.append(" continue;\n"); + source.append(" }\n"); source.append(" long multiplicity = 1L;\n"); for (int patternOrdinal = 0; patternOrdinal < shape.patternCount(); patternOrdinal++) { source.append(" long witnesses") @@ -1214,6 +1218,7 @@ private void appendHelpers(StringBuilder source) { source.append(" throw new SailException(e);\n"); source.append(" }\n"); source.append(" }\n\n"); + appendInequalityHelper(source); for (int patternOrdinal = 0; patternOrdinal < shape.patternCount(); patternOrdinal++) { LmdbLftjExecutionShape.PatternShape patternShape = shape.pattern(patternOrdinal); if (patternShape.derivedBinaryRelation()) { @@ -1227,6 +1232,28 @@ private void appendHelpers(StringBuilder source) { } } + private void appendInequalityHelper(StringBuilder source) { + source.append(" private boolean passesInequalityConstraints() {\n"); + if (plan.inequalityConstraints().isEmpty()) { + source.append(" return true;\n"); + } else { + source.append(" return "); + for (int i = 0; i < plan.inequalityConstraints().size(); i++) { + LmdbLftjPlan.InequalityConstraint inequality = plan.inequalityConstraints().get(i); + if (i > 0) { + source.append("\n && "); + } + source.append("state().value(") + .append(variableSlot(inequality.leftVariable())) + .append(") != state().value(") + .append(variableSlot(inequality.rightVariable())) + .append(')'); + } + source.append(";\n"); + } + source.append(" }\n\n"); + } + private void appendCursorRowHelpers(StringBuilder source, int patternOrdinal, int slot, LmdbLftjExecutionShape.PatternShape patternShape) { String suffix = slotSuffix(patternOrdinal, slot); @@ -1393,6 +1420,14 @@ private String slotSuffix(int patternOrdinal, int slot) { return "P" + patternOrdinal + (slot >= 0 ? "S" + slot : "W"); } + private int variableSlot(String variableName) { + int slot = plan.variableOrder().indexOf(variableName); + if (slot < 0) { + throw new IllegalArgumentException("Unknown LMDB LFTJ variable in full-stack codegen: " + variableName); + } + return slot; + } + private String componentAccessor(int patternOrdinal, int slot, String kind, int component) { return componentFieldName(component) + slotSuffix(patternOrdinal, slot) + kind; } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java index 0f70939439..92ef00f5ff 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java @@ -12,14 +12,28 @@ package org.eclipse.rdf4j.sail.lmdb; import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashSet; import java.util.List; +import java.util.Map; import java.util.Set; import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.Dataset; +import org.eclipse.rdf4j.query.algebra.And; +import org.eclipse.rdf4j.query.algebra.Compare; +import org.eclipse.rdf4j.query.algebra.Extension; +import org.eclipse.rdf4j.query.algebra.ExtensionElem; +import org.eclipse.rdf4j.query.algebra.Filter; import org.eclipse.rdf4j.query.algebra.Join; +import org.eclipse.rdf4j.query.algebra.Projection; +import org.eclipse.rdf4j.query.algebra.ProjectionElem; +import org.eclipse.rdf4j.query.algebra.QueryModelNode; import org.eclipse.rdf4j.query.algebra.StatementPattern; import org.eclipse.rdf4j.query.algebra.TupleExpr; +import org.eclipse.rdf4j.query.algebra.UnaryTupleOperator; +import org.eclipse.rdf4j.query.algebra.ValueExpr; +import org.eclipse.rdf4j.query.algebra.Var; import org.eclipse.rdf4j.query.algebra.evaluation.QueryOptimizer; import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource; import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor; @@ -63,20 +77,29 @@ private boolean transform(Join node, LmdbQueryAccess queryAccess) { List operands = new ArrayList<>(); collectOperands(node, operands); - List patterns = operands.stream() - .filter(StatementPattern.class::isInstance) - .map(StatementPattern.class::cast) - .toList(); + FusionTarget fusionTarget = tryExtractFusionTarget(node, operands); + List patterns = (fusionTarget != null ? fusionTarget.patterns() + : operands.stream() + .filter(StatementPattern.class::isInstance) + .map(StatementPattern.class::cast) + .toList()); if (patterns.size() < 3) { return false; } Set configuredIndexes = queryAccess.configuredIndexes(); - TupleExpr fallbackExpr = rebuildJoin(patterns.stream().map(TupleExpr::clone).toList()); - String cacheKey = LmdbLftjPreparedPlanCache.normalizedKey(patterns, configuredIndexes); + TupleExpr fallbackExpr = fusionTarget != null ? fusionTarget.root().clone() + : rebuildJoin(patterns.stream().map(TupleExpr::clone).toList()); + List outputBindings = fusionTarget != null ? fusionTarget.outputBindings() + : List.of(); + List inequalityConstraints = fusionTarget != null + ? fusionTarget.inequalityConstraints() + : List.of(); + String cacheKey = LmdbLftjPreparedPlanCache.normalizedKey(patterns, configuredIndexes, outputBindings, + inequalityConstraints); LmdbLftjPlanner.PlanningResult plan = queryAccess.cachedPlanningResult(cacheKey); if (plan == null) { - plan = planner.plan(fallbackExpr, patterns, configuredIndexes); + plan = planner.plan(fallbackExpr, patterns, configuredIndexes, outputBindings, inequalityConstraints); queryAccess.cachePlanningResult(cacheKey, plan); } if (!plan.planned()) { @@ -85,6 +108,10 @@ private boolean transform(Join node, LmdbQueryAccess queryAccess) { } LmdbLftjTupleExpr lftjNode = new LmdbLftjTupleExpr(plan.plan()); + if (fusionTarget != null) { + fusionTarget.root().replaceWith(lftjNode); + return true; + } List rebuiltOperands = new ArrayList<>(); boolean inserted = false; for (TupleExpr operand : operands) { @@ -105,6 +132,219 @@ private boolean transform(Join node, LmdbQueryAccess queryAccess) { return true; } + private FusionTarget tryExtractFusionTarget(Join node, List operands) { + List extractedPatterns = new ArrayList<>(operands.size()); + for (TupleExpr operand : operands) { + ExtractedPattern extractedPattern = extractFilteredPattern(operand); + if (extractedPattern == null) { + return null; + } + extractedPatterns.add(extractedPattern); + } + + List patterns = extractedPatterns.stream().map(ExtractedPattern::pattern).toList(); + List visibleVariables = collectVisibleVariables(patterns); + QueryModelNode current = node; + List filters = new ArrayList<>(); + Extension extension = null; + Projection projection = null; + + while (current.getParentNode() instanceof UnaryTupleOperator + && ((UnaryTupleOperator) current.getParentNode()).getArg() == current) { + QueryModelNode parent = current.getParentNode(); + if (parent instanceof Filter) { + filters.add((Filter) parent); + current = parent; + continue; + } + if (parent instanceof Extension && extension == null) { + extension = (Extension) parent; + current = parent; + continue; + } + if (parent instanceof Projection && projection == null) { + projection = (Projection) parent; + current = parent; + continue; + } + break; + } + + List inequalities = new ArrayList<>(); + for (ExtractedPattern extractedPattern : extractedPatterns) { + inequalities.addAll(extractedPattern.inequalityConstraints()); + } + if (filters.isEmpty() && inequalities.isEmpty() && extension == null && projection == null) { + return null; + } + + List outerInequalities = collectInequalities(filters, visibleVariables); + if (outerInequalities == null) { + return null; + } + inequalities.addAll(outerInequalities); + if (!supportsVisibleVariables(inequalities, Set.copyOf(visibleVariables))) { + return null; + } + + List outputBindings = collectOutputBindings(projection, extension, + visibleVariables); + if (outputBindings == null) { + return null; + } + + return new FusionTarget((TupleExpr) current, patterns, outputBindings, inequalities); + } + + private ExtractedPattern extractFilteredPattern(TupleExpr operand) { + List inequalities = new ArrayList<>(); + TupleExpr current = operand; + while (current instanceof Filter) { + if (!appendInequalities(((Filter) current).getCondition(), inequalities)) { + return null; + } + current = ((Filter) current).getArg(); + } + if (!(current instanceof StatementPattern)) { + return null; + } + return new ExtractedPattern((StatementPattern) current, inequalities); + } + + private List collectVisibleVariables(List patterns) { + LinkedHashSet variableNames = new LinkedHashSet<>(); + for (StatementPattern pattern : patterns) { + for (Var var : pattern.getVarList()) { + if (var != null && !var.hasValue() && !var.isAnonymous() && var.getName() != null) { + variableNames.add(var.getName()); + } + } + } + return List.copyOf(variableNames); + } + + private List collectInequalities(List filters, + List visibleVariables) { + if (filters.isEmpty()) { + return List.of(); + } + List inequalities = new ArrayList<>(); + for (Filter filter : filters) { + if (!appendInequalities(filter.getCondition(), inequalities)) { + return null; + } + } + if (!supportsVisibleVariables(inequalities, Set.copyOf(visibleVariables))) { + return null; + } + return inequalities; + } + + private boolean appendInequalities(ValueExpr condition, List inequalities) { + if (condition instanceof And) { + And and = (And) condition; + return appendInequalities(and.getLeftArg(), inequalities) + && appendInequalities(and.getRightArg(), inequalities); + } + if (!(condition instanceof Compare)) { + return false; + } + Compare compare = (Compare) condition; + if (compare.getOperator() != Compare.CompareOp.NE) { + return false; + } + if (!(compare.getLeftArg() instanceof Var) || !(compare.getRightArg() instanceof Var)) { + return false; + } + Var left = (Var) compare.getLeftArg(); + Var right = (Var) compare.getRightArg(); + if (!isNamedVariable(left) || !isNamedVariable(right)) { + return false; + } + inequalities.add(new LmdbLftjPlan.InequalityConstraint(left.getName(), right.getName())); + return true; + } + + private boolean supportsVisibleVariables(List inequalities, + Set visibleVariables) { + return inequalities.stream() + .allMatch(inequality -> visibleVariables.contains(inequality.leftVariable()) + && visibleVariables.contains(inequality.rightVariable())); + } + + private boolean isNamedVariable(Var var) { + return !var.hasValue() && !var.isAnonymous() && var.getName() != null; + } + + private List collectOutputBindings(Projection projection, Extension extension, + List visibleVariables) { + if (projection == null) { + return extension == null ? List.of() : null; + } + Map extensionBindings = collectExtensionBindings(extension, visibleVariables); + if (extensionBindings == null) { + return null; + } + Set visible = Set.copyOf(visibleVariables); + List outputBindings = new ArrayList<>(); + for (ProjectionElem projectionElem : projection.getProjectionElemList().getElements()) { + String sourceVariable = resolveProjectedSource(projectionElem, extensionBindings, visible); + if (sourceVariable == null) { + return null; + } + outputBindings.add(new LmdbLftjPlan.OutputBinding( + projectionElem.getProjectionAlias().orElse(projectionElem.getName()), + sourceVariable)); + } + return outputBindings; + } + + private Map collectExtensionBindings(Extension extension, List visibleVariables) { + if (extension == null) { + return Map.of(); + } + Set visible = Set.copyOf(visibleVariables); + Map extensionBindings = new HashMap<>(); + for (ExtensionElem element : extension.getElements()) { + if (!(element.getExpr() instanceof Var)) { + return null; + } + Var source = (Var) element.getExpr(); + if (!isNamedVariable(source) || !visible.contains(source.getName())) { + return null; + } + extensionBindings.put(element.getName(), source.getName()); + } + return extensionBindings; + } + + private String resolveProjectedSource(ProjectionElem projectionElem, Map extensionBindings, + Set visibleVariables) { + String sourceExpression = resolveSourceExpression(projectionElem.getSourceExpression(), visibleVariables); + if (sourceExpression != null) { + return sourceExpression; + } + String sourceName = projectionElem.getName(); + if (extensionBindings.containsKey(sourceName)) { + return extensionBindings.get(sourceName); + } + if (visibleVariables.contains(sourceName)) { + return sourceName; + } + return null; + } + + private String resolveSourceExpression(ExtensionElem sourceExpression, Set visibleVariables) { + if (sourceExpression == null || !(sourceExpression.getExpr() instanceof Var)) { + return null; + } + Var source = (Var) sourceExpression.getExpr(); + if (!isNamedVariable(source) || !visibleVariables.contains(source.getName())) { + return null; + } + return source.getName(); + } + private void collectOperands(TupleExpr expr, List operands) { if (expr instanceof Join) { Join join = (Join) expr; @@ -128,4 +368,55 @@ private TupleExpr rebuildJoin(List operands) { } return rebuilt; } + + private static final class FusionTarget { + private final TupleExpr root; + private final List patterns; + private final List outputBindings; + private final List inequalityConstraints; + + private FusionTarget(TupleExpr root, List patterns, + List outputBindings, + List inequalityConstraints) { + this.root = root; + this.patterns = List.copyOf(patterns); + this.outputBindings = List.copyOf(outputBindings); + this.inequalityConstraints = List.copyOf(inequalityConstraints); + } + + private TupleExpr root() { + return root; + } + + private List patterns() { + return patterns; + } + + private List outputBindings() { + return outputBindings; + } + + private List inequalityConstraints() { + return inequalityConstraints; + } + } + + private static final class ExtractedPattern { + private final StatementPattern pattern; + private final List inequalityConstraints; + + private ExtractedPattern(StatementPattern pattern, + List inequalityConstraints) { + this.pattern = pattern; + this.inequalityConstraints = List.copyOf(inequalityConstraints); + } + + private StatementPattern pattern() { + return pattern; + } + + private List inequalityConstraints() { + return inequalityConstraints; + } + } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java index 424300eb62..cba2b275bf 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java @@ -26,16 +26,28 @@ public final class LmdbLftjPlan { private final Set assuredBindingNames; private final List variableOrder; private final List patternPlans; + private final List outputBindings; + private final List inequalityConstraints; private final String executionKey; LmdbLftjPlan(TupleExpr fallbackExpr, Set bindingNames, Set assuredBindingNames, List variableOrder, List patternPlans) { + this(fallbackExpr, bindingNames, assuredBindingNames, variableOrder, patternPlans, + identityOutputBindings(variableOrder), List.of()); + } + + LmdbLftjPlan(TupleExpr fallbackExpr, Set bindingNames, Set assuredBindingNames, + List variableOrder, List patternPlans, List outputBindings, + List inequalityConstraints) { this.fallbackExpr = fallbackExpr; this.bindingNames = Set.copyOf(new LinkedHashSet<>(bindingNames)); this.assuredBindingNames = Set.copyOf(new LinkedHashSet<>(assuredBindingNames)); this.variableOrder = List.copyOf(variableOrder); this.patternPlans = List.copyOf(patternPlans); - this.executionKey = executionKey(this.variableOrder, this.patternPlans); + this.outputBindings = List.copyOf(outputBindings); + this.inequalityConstraints = List.copyOf(inequalityConstraints); + this.executionKey = executionKey(this.variableOrder, this.patternPlans, this.outputBindings, + this.inequalityConstraints); } TupleExpr fallbackExpr() { @@ -58,6 +70,14 @@ List patternPlans() { return patternPlans; } + List outputBindings() { + return outputBindings; + } + + List inequalityConstraints() { + return inequalityConstraints; + } + public String executionKey() { return executionKey; } @@ -72,7 +92,8 @@ int patternCount() { LmdbLftjPlan copy() { return new LmdbLftjPlan(fallbackExpr.clone(), bindingNames, assuredBindingNames, variableOrder, - patternPlans.stream().map(LmdbLftjPatternPlan::copy).collect(Collectors.toList())); + patternPlans.stream().map(LmdbLftjPatternPlan::copy).collect(Collectors.toList()), outputBindings, + inequalityConstraints); } @Override @@ -85,16 +106,22 @@ public boolean equals(Object other) { && Objects.equals(bindingNames, o.bindingNames) && Objects.equals(assuredBindingNames, o.assuredBindingNames) && Objects.equals(variableOrder, o.variableOrder) - && Objects.equals(patternPlans, o.patternPlans); + && Objects.equals(patternPlans, o.patternPlans) + && Objects.equals(outputBindings, o.outputBindings) + && Objects.equals(inequalityConstraints, o.inequalityConstraints); } @Override public int hashCode() { - return Objects.hash(fallbackExpr, bindingNames, assuredBindingNames, variableOrder, patternPlans); + return Objects.hash(fallbackExpr, bindingNames, assuredBindingNames, variableOrder, patternPlans, + outputBindings, inequalityConstraints); } - private static String executionKey(List variableOrder, List patternPlans) { - StringBuilder builder = new StringBuilder(variableOrder.size() * 16 + patternPlans.size() * 48); + private static String executionKey(List variableOrder, List patternPlans, + List outputBindings, List inequalityConstraints) { + StringBuilder builder = new StringBuilder( + variableOrder.size() * 16 + patternPlans.size() * 48 + outputBindings.size() * 24 + + inequalityConstraints.size() * 24); builder.append("varOrder="); for (String variable : variableOrder) { builder.append(variable).append(','); @@ -103,9 +130,26 @@ private static String executionKey(List variableOrder, List pattern } List visibleVariables = collectVisibleVariables(patternList); + if (!supportsOutputs(outputBindings, visibleVariables)) { + return PlanningResult.rejected("unsupported-output-binding"); + } + if (!supportsInequalities(inequalityConstraints, visibleVariables)) { + return PlanningResult.rejected("unsupported-inequality"); + } PlanningCandidate candidate = chooseCandidate(patternList, configuredIndexes, visibleVariables); if (candidate == null) { return PlanningResult.rejected("incompatible-index-order"); @@ -67,8 +80,67 @@ PlanningResult plan(TupleExpr fallbackExpr, Collection pattern patternPlans.add(new LmdbLftjPatternPlan(patternList.get(i), candidate.indexNames.get(i))); } + if (outputBindings.isEmpty() && inequalityConstraints.isEmpty()) { + return PlanningResult.planned(new LmdbLftjPlan(fallbackExpr, fallbackExpr.getBindingNames(), + fallbackExpr.getAssuredBindingNames(), candidate.variableOrder, patternPlans)); + } + return PlanningResult.planned(new LmdbLftjPlan(fallbackExpr, fallbackExpr.getBindingNames(), - fallbackExpr.getAssuredBindingNames(), candidate.variableOrder, patternPlans)); + fallbackExpr.getAssuredBindingNames(), candidate.variableOrder, patternPlans, + effectiveOutputs(outputBindings, candidate.variableOrder), + canonicalInequalities(inequalityConstraints))); + } + + private boolean supportsOutputs(List outputBindings, List visibleVariables) { + if (outputBindings.isEmpty()) { + return true; + } + Set visible = Set.copyOf(visibleVariables); + return outputBindings.stream().allMatch(outputBinding -> visible.contains(outputBinding.sourceVariable())); + } + + private boolean supportsInequalities(List inequalityConstraints, + List visibleVariables) { + if (inequalityConstraints.isEmpty()) { + return true; + } + Set visible = Set.copyOf(visibleVariables); + return inequalityConstraints.stream() + .allMatch( + inequality -> visible.contains(inequality.leftVariable()) + && visible.contains(inequality.rightVariable())); + } + + private List effectiveOutputs(List outputBindings, + List variableOrder) { + if (!outputBindings.isEmpty()) { + return List.copyOf(outputBindings); + } + return variableOrder.stream() + .map(variable -> new LmdbLftjPlan.OutputBinding(variable, variable)) + .collect(Collectors.toList()); + } + + private List canonicalInequalities( + List inequalityConstraints) { + return inequalityConstraints.stream() + .map(this::normalizeInequality) + .distinct() + .sorted((left, right) -> { + int leftCompare = left.leftVariable().compareTo(right.leftVariable()); + if (leftCompare != 0) { + return leftCompare; + } + return left.rightVariable().compareTo(right.rightVariable()); + }) + .collect(Collectors.toList()); + } + + private LmdbLftjPlan.InequalityConstraint normalizeInequality(LmdbLftjPlan.InequalityConstraint inequality) { + if (inequality.leftVariable().compareTo(inequality.rightVariable()) <= 0) { + return inequality; + } + return new LmdbLftjPlan.InequalityConstraint(inequality.rightVariable(), inequality.leftVariable()); } private boolean hasRepeatedVariable(StatementPattern pattern) { diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPreparedPlanCache.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPreparedPlanCache.java index b983bff540..a23b23ef3b 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPreparedPlanCache.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPreparedPlanCache.java @@ -44,6 +44,12 @@ synchronized void clear() { } static String normalizedKey(List patterns, Set configuredIndexes) { + return normalizedKey(patterns, configuredIndexes, List.of(), List.of()); + } + + static String normalizedKey(List patterns, Set configuredIndexes, + List outputBindings, + List inequalityConstraints) { StringBuilder builder = new StringBuilder(configuredIndexes.size() * 6 + patterns.size() * 32); builder.append("indexes="); configuredIndexes.stream().sorted().forEach(indexName -> builder.append(indexName).append(',')); @@ -52,6 +58,17 @@ static String normalizedKey(List patterns, Set configu .map(LmdbLftjPreparedPlanCache::patternKey) .sorted() .forEach(builder::append); + builder.append(";outputs="); + for (LmdbLftjPlan.OutputBinding outputBinding : outputBindings) { + builder.append(outputBinding.outputName()).append('<').append(outputBinding.sourceVariable()).append(';'); + } + builder.append(";ineq="); + for (LmdbLftjPlan.InequalityConstraint inequalityConstraint : inequalityConstraints) { + builder.append(inequalityConstraint.leftVariable()) + .append('!') + .append(inequalityConstraint.rightVariable()) + .append(';'); + } return builder.toString(); } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java index e539b2d020..198680d391 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java @@ -27,6 +27,10 @@ public interface LmdbQueryAccess { Value resolveValue(long id); + default Value lazyValue(long id) { + return resolveValue(id); + } + boolean includeInferred(); Set configuredIndexes(); diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java index 652cdc6fe1..07d3ce0124 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java @@ -138,10 +138,15 @@ public boolean addInferredStatement(Resource subj, IRI pred, Value obj, Resource protected CloseableIteration evaluateInternal(TupleExpr tupleExpr, Dataset dataset, BindingSet bindings, boolean includeInferred) throws SailException { - // ensure that all elements of the binding set are initialized (lazy values are resolved) - return new IterationWrapper( - evaluateWithTripleSource(tupleExpr, dataset, bindings, includeInferred, - rdfDataset -> createTripleSource(rdfDataset, dataset, includeInferred))) { + boolean lftjRuntimeSafe = isLftjRuntimeSafe(dataset); + CloseableIteration iteration = evaluateWithTripleSource(tupleExpr, dataset, bindings, + includeInferred, + rdfDataset -> createTripleSource(rdfDataset, includeInferred, lftjRuntimeSafe)); + if (lftjRuntimeSafe) { + return iteration; + } + // Non-LFTJ query paths keep the historical eager-init behavior. + return new IterationWrapper(iteration) { @Override public BindingSet next() throws QueryEvaluationException { BindingSet bs = super.next(); @@ -151,9 +156,9 @@ public BindingSet next() throws QueryEvaluationException { }; } - private TripleSource createTripleSource(SailDataset rdfDataset, Dataset dataset, boolean includeInferred) { + private TripleSource createTripleSource(SailDataset rdfDataset, boolean includeInferred, boolean lftjRuntimeSafe) { TripleSource delegate = new SailDatasetTripleSource(lmdbStore.getValueFactory(), rdfDataset); - if (!isLftjRuntimeSafe(dataset)) { + if (!lftjRuntimeSafe) { return delegate; } return new LmdbLftjTripleSource(delegate, createQueryAccess(includeInferred)); @@ -225,6 +230,15 @@ public Value resolveValue(long id) { } } + @Override + public Value lazyValue(long id) { + try { + return valueStore.getLazyValue(id); + } catch (IOException e) { + throw new SailException(e); + } + } + @Override public boolean includeInferred() { return includeInferred; diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java index 04b16c49f0..6844df9a17 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java @@ -26,19 +26,29 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.io.FileUtils; import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.common.iteration.CloseableIteratorIteration; +import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.model.vocabulary.FOAF; import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.Dataset; +import org.eclipse.rdf4j.query.algebra.SingletonSet; +import org.eclipse.rdf4j.query.algebra.TupleExpr; import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet; import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; +import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource; import org.eclipse.rdf4j.query.explanation.Explanation; import org.eclipse.rdf4j.query.impl.EmptyBindingSet; import org.eclipse.rdf4j.repository.sail.SailRepository; import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; +import org.eclipse.rdf4j.sail.SailException; +import org.eclipse.rdf4j.sail.base.SailDataset; import org.eclipse.rdf4j.sail.lmdb.benchmark.FoafCliqueQueryBenchmark; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; import org.junit.jupiter.api.Test; @@ -114,6 +124,19 @@ void fullStackCompilerShouldFingerprintIncludeInferredSeparately() { .isNotEqualTo(LmdbLftjFullCodegenCompiler.INSTANCE.cacheKey(plan, shape, false)); } + @Test + void fullStackCompilerSourceShouldInlineInequalityGuardsForAliasedPlans() { + LmdbLftjPlan plan = syntheticAliasedPlan(); + LmdbLftjExecutionShape shape = new LmdbLftjExecutionShape(plan); + String source = LmdbLftjFullCodegenCompiler.INSTANCE.sourceFor(plan, shape, false); + + assertThat(source) + .contains("passesInequalityConstraints") + .contains("state().value(0) != state().value(1)") + .doesNotContain("FilterIterator") + .doesNotContain("ProjectionIterator"); + } + @Test void fullStackCompilerShouldShareDerivedRelationGroupsAcrossEquivalentPatterns() { LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); @@ -171,6 +194,63 @@ void compiledAndInterpretedShouldMatchForFullyBoundInput() { assertThat(compiled).containsExactlyElementsOf(interpreted); } + @Test + void compiledIterationShouldNotEagerlyResolveMaterializedValues() { + LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); + CachingQueryAccess queryAccess = new CachingQueryAccess(new CountingCompiler()); + QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess, plan); + + List rows = drain(evaluationStep, EmptyBindingSet.getInstance()); + + assertThat(rows).isNotEmpty(); + assertThat(queryAccess.resolveValueCalls).isZero(); + } + + @Test + void compiledIterationShouldMaterializeAliasedBindingsLazily() { + LmdbLftjPlan plan = syntheticAliasedPlan(); + CachingQueryAccess queryAccess = new CachingQueryAccess(new CountingCompiler()); + QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess, plan); + + try (CloseableIteration iteration = evaluationStep.evaluate(EmptyBindingSet.getInstance())) { + assertThat(iteration).hasNext(); + BindingSet row = iteration.next(); + assertThat(row.getBindingNames()).containsExactlyInAnyOrder("x", "y", "z"); + assertThat(row.getValue("x")).isEqualTo(LmdbLftjSyntheticScenario.VF.createIRI("urn:person:1")); + assertThat(row.getValue("y")).isEqualTo(LmdbLftjSyntheticScenario.VF.createIRI("urn:person:2")); + assertThat(row.getValue("z")).isEqualTo(LmdbLftjSyntheticScenario.VF.createIRI("urn:person:3")); + assertThat(row.hasBinding("a")).isFalse(); + assertThat(row.hasBinding("b")).isFalse(); + assertThat(row.hasBinding("c")).isFalse(); + } + + assertThat(queryAccess.resolveValueCalls).isZero(); + } + + @Test + void evaluateInternalShouldSkipInitValueForLftjRuntimeSafeQueries() throws Exception { + try (FullCodegenFixture fixture = new FullCodegenFixture()) { + TrackingBenchmarkStoreConnection connection = new TrackingBenchmarkStoreConnection(fixture.store); + LmdbQueryAccess queryAccess = fixture.connection.benchmarkQueryAccess(false); + long id = queryAccess.resolveId(FullCodegenFixture.person(1)); + Value lazyValue = queryAccess.lazyValue(id); + QueryBindingSet row = new QueryBindingSet(); + row.setBinding("x", lazyValue); + connection.nextIteration = new CloseableIteratorIteration<>(List.of(row).iterator()); + + try (CloseableIteration iteration = connection.evaluateInternal(new SingletonSet(), + null, EmptyBindingSet.getInstance(), false)) { + assertThat(iteration).hasNext(); + assertThat(iteration.next().getValue("x")).isSameAs(lazyValue); + } + + assertThat(connection.initValueCalls).isZero(); + assertThat(isInitializedLmdbValue(lazyValue)).isFalse(); + assertThat(lazyValue.stringValue()).isEqualTo("urn:person:1"); + assertThat(isInitializedLmdbValue(lazyValue)).isTrue(); + } + } + @Test void fullStackCompilerShouldProduceGeneratedIterationOnRealStore() throws Exception { LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); @@ -246,6 +326,29 @@ void fullCodegenFoafBenchmarkCycle5ShouldCompileGeneratedFactory() throws Except assertFoafBenchmarkQueryCompilesGeneratedFactory(5); } + @Test + void fullCodegenShouldFuseSupportedFilterAndProjectionIntoLftjPlan() throws Exception { + try (FullCodegenFixture fixture = new FullCodegenFixture()) { + String query = foafCycleAliasQuery(3); + + try (SailRepositoryConnection connection = fixture.repository.getConnection()) { + String unoptimizedPlan = connection.prepareTupleQuery(query) + .explain(Explanation.Level.Unoptimized) + .toString(); + String optimizedPlan = connection.prepareTupleQuery(query) + .explain(Explanation.Level.Optimized) + .toString(); + assertThat(optimizedPlan) + .withFailMessage("unoptimized=%s%noptimized=%s%ncachedPlans=%s", unoptimizedPlan, optimizedPlan, + preparedPlanDescriptions(fixture.store.preparedPlanCache())) + .contains("LmdbLftjTupleExpr") + .doesNotContain("Filter") + .doesNotContain("Projection") + .doesNotContain("Extension"); + } + } + } + @Test void fullCodegenFoafBenchmarkSequentialQueriesShouldKeepUsingGeneratedFactories() throws Exception { FoafCliqueQueryBenchmark benchmark = new FoafCliqueQueryBenchmark(); @@ -485,6 +588,21 @@ private List cacheEntryDescriptions(LmdbLftjCodegenCache cache) throws E return descriptions; } + @SuppressWarnings("unchecked") + private List preparedPlanDescriptions(LmdbLftjPreparedPlanCache cache) throws Exception { + Field entriesField = LmdbLftjPreparedPlanCache.class.getDeclaredField("entries"); + entriesField.setAccessible(true); + Map entries = (LinkedHashMap) entriesField + .get(cache); + List descriptions = new ArrayList<>(); + for (Map.Entry entry : entries.entrySet()) { + LmdbLftjPlanner.PlanningResult result = entry.getValue(); + descriptions.add(entry.getKey() + "=" + (result.planned() ? result.plan().fallbackExpr().getSignature() + : result.rejectionReason())); + } + return descriptions; + } + private String foafCycleQuery(int size) { StringBuilder builder = new StringBuilder(); builder.append("PREFIX foaf: \n"); @@ -512,12 +630,68 @@ private String foafCycleQuery(int size) { return builder.toString(); } + private LmdbLftjPlan syntheticAliasedPlan() { + LmdbLftjPlan basePlan = LmdbLftjSyntheticScenario.createPlan(); + return new LmdbLftjPlan(basePlan.fallbackExpr().clone(), Set.of("x", "y", "z"), Set.of("x", "y", "z"), + basePlan.variableOrder(), basePlan.patternPlans(), + List.of( + new LmdbLftjPlan.OutputBinding("x", "a"), + new LmdbLftjPlan.OutputBinding("y", "b"), + new LmdbLftjPlan.OutputBinding("z", "c")), + List.of( + new LmdbLftjPlan.InequalityConstraint("a", "b"), + new LmdbLftjPlan.InequalityConstraint("a", "c"), + new LmdbLftjPlan.InequalityConstraint("b", "c"))); + } + + private String foafCycleAliasQuery(int size) { + StringBuilder builder = new StringBuilder(); + builder.append("PREFIX foaf: \n"); + builder.append("SELECT "); + for (int i = 0; i < size; i++) { + if (i > 0) { + builder.append(' '); + } + char variable = (char) ('a' + i); + char alias = (char) ('x' + i); + builder.append("(?").append(variable).append(" AS ?").append(alias).append(')'); + } + builder.append(" WHERE {\n"); + for (int i = 0; i < size; i++) { + builder.append(" ?") + .append((char) ('a' + i)) + .append(" foaf:knows ?") + .append((char) ('a' + ((i + 1) % size))) + .append(" .\n"); + } + builder.append(" FILTER ("); + boolean first = true; + for (int i = 0; i < size; i++) { + for (int j = i + 1; j < size; j++) { + if (!first) { + builder.append(" && "); + } + builder.append("?").append((char) ('a' + i)).append(" != ?").append((char) ('a' + j)); + first = false; + } + } + builder.append(")\n"); + builder.append("}\n"); + return builder.toString(); + } + private Object readField(Object target, String name) throws Exception { Field field = target.getClass().getDeclaredField(name); field.setAccessible(true); return field.get(target); } + private boolean isInitializedLmdbValue(Value value) throws Exception { + Field field = value.getClass().getDeclaredField("initialized"); + field.setAccessible(true); + return field.getBoolean(value); + } + private static final class InterpretedQueryAccess extends LmdbLftjSyntheticScenario.TestQueryAccess { private InterpretedQueryAccess() { @@ -614,7 +788,7 @@ private static final class FullCodegenFixture implements AutoCloseable { private FullCodegenFixture() throws IOException { dataDir = Files.createTempDirectory("rdf4j-lmdb-full-codegen-test").toFile(); - LmdbStoreConfig config = new LmdbStoreConfig("psoc,posc"); + LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); config.setLftjEnabled(true); config.setLftjCodegenEnabled(true); config.setForceSync(false); @@ -684,4 +858,26 @@ public void close() throws Exception { } } } + + private static final class TrackingBenchmarkStoreConnection extends LmdbBenchmarkStore.BenchmarkStoreConnection { + private CloseableIteration nextIteration; + private int initValueCalls; + + private TrackingBenchmarkStoreConnection(LmdbStore sail) { + super(sail, null); + } + + @Override + protected CloseableIteration evaluateWithTripleSource(TupleExpr tupleExpr, + Dataset dataset, + BindingSet bindings, boolean includeInferred, + Function tripleSourceFactory) throws SailException { + return nextIteration; + } + + @Override + protected void initValue(Value value) { + initValueCalls++; + } + } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java index af5156f0f6..9626e34740 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java @@ -19,6 +19,7 @@ import java.util.Map; import java.util.Set; +import org.eclipse.rdf4j.collection.factory.impl.DefaultCollectionFactory; import org.eclipse.rdf4j.common.iteration.EmptyIteration; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Resource; @@ -28,13 +29,25 @@ import org.eclipse.rdf4j.model.vocabulary.FOAF; import org.eclipse.rdf4j.query.Dataset; import org.eclipse.rdf4j.query.QueryEvaluationException; +import org.eclipse.rdf4j.query.QueryLanguage; +import org.eclipse.rdf4j.query.algebra.And; +import org.eclipse.rdf4j.query.algebra.Compare; +import org.eclipse.rdf4j.query.algebra.Extension; +import org.eclipse.rdf4j.query.algebra.ExtensionElem; +import org.eclipse.rdf4j.query.algebra.Filter; import org.eclipse.rdf4j.query.algebra.Join; +import org.eclipse.rdf4j.query.algebra.Projection; +import org.eclipse.rdf4j.query.algebra.ProjectionElem; +import org.eclipse.rdf4j.query.algebra.ProjectionElemList; import org.eclipse.rdf4j.query.algebra.QueryRoot; import org.eclipse.rdf4j.query.algebra.StatementPattern; import org.eclipse.rdf4j.query.algebra.TupleExpr; import org.eclipse.rdf4j.query.algebra.Var; import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics; import org.eclipse.rdf4j.query.impl.EmptyBindingSet; +import org.eclipse.rdf4j.query.parser.ParsedTupleQuery; +import org.eclipse.rdf4j.query.parser.QueryParserUtil; import org.junit.jupiter.api.Test; class LmdbLftjOptimizerTest { @@ -80,6 +93,80 @@ void optimizeShouldReusePreparedPlanAcrossEquivalentJoinReorders() { "commuted join order should still hit the normalized prepared-plan cache"); } + @Test + void optimizeShouldFuseProjectionExtensionAndInequalityFilter() { + TestQueryAccess queryAccess = new TestQueryAccess(); + LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer( + new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess)); + + QueryRoot tupleExpr = new QueryRoot(aliasedCycle()); + + optimizer.optimize(tupleExpr, (Dataset) null, EmptyBindingSet.getInstance()); + + LmdbLftjTupleExpr lftj = lftjNode(tupleExpr); + assertEquals(List.of( + new LmdbLftjPlan.OutputBinding("x", "a"), + new LmdbLftjPlan.OutputBinding("y", "b"), + new LmdbLftjPlan.OutputBinding("z", "c")), lftj.plan().outputBindings()); + assertEquals(List.of( + new LmdbLftjPlan.InequalityConstraint("a", "b"), + new LmdbLftjPlan.InequalityConstraint("a", "c"), + new LmdbLftjPlan.InequalityConstraint("b", "c")), lftj.plan().inequalityConstraints()); + } + + @Test + void optimizeShouldFuseParsedAliasProjectionQuery() throws Exception { + TestQueryAccess queryAccess = new TestQueryAccess(); + LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer( + new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess)); + + ParsedTupleQuery parsed = QueryParserUtil.parseTupleQuery(QueryLanguage.SPARQL, aliasProjectionQuery(), null); + TupleExpr tupleExpr = parsed.getTupleExpr().clone(); + if (!(tupleExpr instanceof QueryRoot)) { + tupleExpr = new QueryRoot(tupleExpr); + } + + optimizer.optimize(tupleExpr, (Dataset) null, EmptyBindingSet.getInstance()); + + LmdbLftjTupleExpr lftj = lftjNode(tupleExpr); + assertEquals(List.of( + new LmdbLftjPlan.OutputBinding("x", "a"), + new LmdbLftjPlan.OutputBinding("y", "b"), + new LmdbLftjPlan.OutputBinding("z", "c")), lftj.plan().outputBindings()); + assertEquals(List.of( + new LmdbLftjPlan.InequalityConstraint("a", "b"), + new LmdbLftjPlan.InequalityConstraint("a", "c"), + new LmdbLftjPlan.InequalityConstraint("b", "c")), lftj.plan().inequalityConstraints()); + } + + @Test + void optimizerPipelineShouldFuseParsedAliasProjectionQuery() throws Exception { + TestQueryAccess queryAccess = new TestQueryAccess(); + TripleSource tripleSource = new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess); + EvaluationStatistics evaluationStatistics = new EvaluationStatistics(); + LmdbLftjEvaluationStrategy strategy = new LmdbLftjEvaluationStrategy(tripleSource, null, null, 0, + evaluationStatistics, false, DefaultCollectionFactory::new); + strategy.setOptimizerPipeline(new LmdbLftjOptimizerPipeline(strategy, tripleSource, evaluationStatistics)); + + ParsedTupleQuery parsed = QueryParserUtil.parseTupleQuery(QueryLanguage.SPARQL, aliasProjectionQuery(), null); + TupleExpr tupleExpr = parsed.getTupleExpr().clone(); + if (!(tupleExpr instanceof QueryRoot)) { + tupleExpr = new QueryRoot(tupleExpr); + } + + strategy.optimize(tupleExpr, evaluationStatistics, EmptyBindingSet.getInstance()); + + LmdbLftjTupleExpr lftj = lftjNode(tupleExpr); + assertEquals(List.of( + new LmdbLftjPlan.OutputBinding("x", "a"), + new LmdbLftjPlan.OutputBinding("y", "b"), + new LmdbLftjPlan.OutputBinding("z", "c")), lftj.plan().outputBindings()); + assertEquals(List.of( + new LmdbLftjPlan.InequalityConstraint("a", "b"), + new LmdbLftjPlan.InequalityConstraint("a", "c"), + new LmdbLftjPlan.InequalityConstraint("b", "c")), lftj.plan().inequalityConstraints()); + } + private TupleExpr cycle(String a, String b, String c) { StatementPattern pattern1 = statementPattern(a, b); StatementPattern pattern2 = statementPattern(b, c); @@ -105,6 +192,34 @@ private TupleExpr reorderedDistinctPredicateCycle(String a, String b, String c) return new Join(new Join(pattern2, pattern3), pattern1); } + private TupleExpr aliasedCycle() { + Join join = new Join(new Join(statementPattern("a", "b"), statementPattern("b", "c")), + statementPattern("c", "a")); + Filter filter = new Filter(join, new And( + new Compare(new Var("a"), new Var("b"), Compare.CompareOp.NE), + new And( + new Compare(new Var("a"), new Var("c"), Compare.CompareOp.NE), + new Compare(new Var("b"), new Var("c"), Compare.CompareOp.NE)))); + Extension extension = new Extension(filter); + extension.addElement(new ExtensionElem(new Var("a"), "x")); + extension.addElement(new ExtensionElem(new Var("b"), "y")); + extension.addElement(new ExtensionElem(new Var("c"), "z")); + return new Projection(extension, new ProjectionElemList( + new ProjectionElem("x"), + new ProjectionElem("y"), + new ProjectionElem("z"))); + } + + private String aliasProjectionQuery() { + return "PREFIX foaf: \n" + + "SELECT (?a AS ?x) (?b AS ?y) (?c AS ?z) WHERE {\n" + + " ?a foaf:knows ?b .\n" + + " ?b foaf:knows ?c .\n" + + " ?c foaf:knows ?a .\n" + + " FILTER (?a != ?b && ?a != ?c && ?b != ?c)\n" + + "}\n"; + } + private LmdbLftjTupleExpr lftjNode(TupleExpr tupleExpr) { return assertInstanceOf(LmdbLftjTupleExpr.class, assertInstanceOf(QueryRoot.class, tupleExpr).getArg()); } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSyntheticScenario.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSyntheticScenario.java index 080644358e..8473bc1df4 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSyntheticScenario.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSyntheticScenario.java @@ -211,6 +211,11 @@ public Value resolveValue(long id) { return valuesById.get((int) id); } + @Override + public Value lazyValue(long id) { + return valuesById.get((int) id); + } + @Override public boolean includeInferred() { return false; diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java index a5196f6e01..f0dd048ab8 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java @@ -107,12 +107,12 @@ public void tearDown() throws IOException { } } -// @Benchmark + @Benchmark public long cycle3() { return executeCount(QUERY_CYCLE_3); } -// @Benchmark + @Benchmark public long cycle4() { return executeCount(QUERY_CYCLE_4); } From 38a8216ad5637de427226e4813e044f62b391965 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Mon, 6 Apr 2026 06:40:49 +0200 Subject: [PATCH 21/32] more tests --- .../lmdb/LmdbLftjBoundInputParityTest.java | 229 +++++++++++++++ .../rdf4j/sail/lmdb/LmdbLftjCodegenTest.java | 169 ++++++++++++ .../rdf4j/sail/lmdb/LmdbLftjExecutorTest.java | 32 +++ .../lmdb/LmdbLftjFusionCorrectnessTest.java | 260 ++++++++++++++++++ .../sail/lmdb/LmdbLftjOptimizerTest.java | 192 +++++++++++++ 5 files changed, 882 insertions(+) create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBoundInputParityTest.java create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFusionCorrectnessTest.java diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBoundInputParityTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBoundInputParityTest.java new file mode 100644 index 0000000000..7f21946c18 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBoundInputParityTest.java @@ -0,0 +1,229 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; +import org.eclipse.rdf4j.query.impl.EmptyBindingSet; +import org.junit.jupiter.api.Test; + +class LmdbLftjBoundInputParityTest { + + @Test + void aliasedPlanShouldRespectPartiallyBoundInputsAcrossInterpretedAndCompiledPaths() { + LmdbLftjPlan plan = syntheticAliasedPlan(); + QueryBindingSet bindings = new QueryBindingSet(); + bindings.setBinding("a", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:1")); + bindings.setBinding("b", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:2")); + + List interpreted = drainRows(new InterpretedQueryAccess(), plan, bindings); + List compiled = drainRows(new CompiledQueryAccess(), plan, bindings); + + assertThat(compiled).containsExactlyElementsOf(interpreted); + assertThat(compiled).containsExactly( + "a=urn:person:1|b=urn:person:2|x=urn:person:1|y=urn:person:2|z=urn:person:3", + "a=urn:person:1|b=urn:person:2|x=urn:person:1|y=urn:person:2|z=urn:person:4"); + } + + @Test + void duplicateAliasedPlanShouldRespectPartiallyBoundInputsAcrossInterpretedAndCompiledPaths() { + LmdbLftjPlan plan = syntheticDuplicateAliasedPlan(); + QueryBindingSet bindings = new QueryBindingSet(); + bindings.setBinding("a", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:1")); + bindings.setBinding("b", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:2")); + + List interpreted = drainRows(new InterpretedQueryAccess(), plan, bindings); + List compiled = drainRows(new CompiledQueryAccess(), plan, bindings); + + assertThat(compiled).containsExactlyElementsOf(interpreted); + assertThat(compiled).containsExactly( + "a=urn:person:1|b=urn:person:2|x=urn:person:1|x2=urn:person:1|z=urn:person:3", + "a=urn:person:1|b=urn:person:2|x=urn:person:1|x2=urn:person:1|z=urn:person:4"); + } + + @Test + void aliasedPlanShouldRetainUnrelatedIncomingBindingsAcrossInterpretedAndCompiledPaths() { + LmdbLftjPlan plan = syntheticAliasedPlan(); + QueryBindingSet bindings = new QueryBindingSet(); + bindings.setBinding("a", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:1")); + bindings.setBinding("b", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:2")); + bindings.setBinding("seed", LmdbLftjSyntheticScenario.VF.createIRI("urn:seed:fixed")); + + List interpreted = drainRows(new InterpretedQueryAccess(), plan, bindings); + List compiled = drainRows(new CompiledQueryAccess(), plan, bindings); + + assertThat(compiled).containsExactlyElementsOf(interpreted); + assertThat(compiled).containsExactly( + "a=urn:person:1|b=urn:person:2|seed=urn:seed:fixed|x=urn:person:1|y=urn:person:2|z=urn:person:3", + "a=urn:person:1|b=urn:person:2|seed=urn:seed:fixed|x=urn:person:1|y=urn:person:2|z=urn:person:4"); + } + + @Test + void aliasedPlanShouldRejectConflictingBoundInputsAcrossInterpretedAndCompiledPaths() { + LmdbLftjPlan plan = syntheticAliasedPlan(); + QueryBindingSet bindings = new QueryBindingSet(); + bindings.setBinding("a", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:1")); + bindings.setBinding("b", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:1")); + + List interpreted = drainRows(new InterpretedQueryAccess(), plan, bindings); + List compiled = drainRows(new CompiledQueryAccess(), plan, bindings); + + assertThat(interpreted).isEmpty(); + assertThat(compiled).isEmpty(); + } + + @Test + void aliasedPlanShouldReturnEmptyForUnknownBoundSourceValuesAcrossInterpretedAndCompiledPaths() { + LmdbLftjPlan plan = syntheticAliasedPlan(); + QueryBindingSet bindings = new QueryBindingSet(); + bindings.setBinding("a", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:99")); + + List interpreted = drainRows(new InterpretedQueryAccess(), plan, bindings); + List compiled = drainRows(new CompiledQueryAccess(), plan, bindings); + + assertThat(interpreted).isEmpty(); + assertThat(compiled).isEmpty(); + } + + @Test + void projectedHiddenContextPlanShouldPreserveCollapsedMultiplicityAcrossInterpretedAndCompiledPaths() { + LmdbLftjPlan plan = projectedHiddenContextPlan(); + QueryBindingSet bindings = new QueryBindingSet(); + bindings.setBinding("a", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:1")); + bindings.setBinding("b", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:2")); + + List interpreted = drainRows(new InterpretedQueryAccess(true), plan, bindings); + List compiled = drainRows(new CompiledQueryAccess(true), plan, bindings); + + String expectedRow = "a=urn:person:1|b=urn:person:2|x=urn:person:1|y=urn:person:2"; + assertThat(compiled).containsExactlyElementsOf(interpreted); + assertThat(compiled).hasSize(54).allMatch(expectedRow::equals); + } + + private List drainRows(LmdbQueryAccess queryAccess, LmdbLftjPlan plan, BindingSet bindings) { + QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess, plan); + List rows = new ArrayList<>(); + try (CloseableIteration iteration = evaluationStep.evaluate(bindings == null + ? EmptyBindingSet.getInstance() + : bindings)) { + while (iteration.hasNext()) { + rows.add(render(iteration.next())); + } + } + Collections.sort(rows); + return rows; + } + + private String render(BindingSet row) { + List names = new ArrayList<>(row.getBindingNames()); + Collections.sort(names); + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < names.size(); i++) { + if (i > 0) { + builder.append('|'); + } + String name = names.get(i); + builder.append(name).append('=').append(row.getValue(name).stringValue()); + } + return builder.toString(); + } + + private LmdbLftjPlan syntheticAliasedPlan() { + LmdbLftjPlan basePlan = LmdbLftjSyntheticScenario.createPlan(); + return new LmdbLftjPlan(basePlan.fallbackExpr().clone(), Set.of("x", "y", "z"), Set.of("x", "y", "z"), + basePlan.variableOrder(), basePlan.patternPlans(), + List.of( + new LmdbLftjPlan.OutputBinding("x", "a"), + new LmdbLftjPlan.OutputBinding("y", "b"), + new LmdbLftjPlan.OutputBinding("z", "c")), + List.of( + new LmdbLftjPlan.InequalityConstraint("a", "b"), + new LmdbLftjPlan.InequalityConstraint("a", "c"), + new LmdbLftjPlan.InequalityConstraint("b", "c"))); + } + + private LmdbLftjPlan syntheticDuplicateAliasedPlan() { + LmdbLftjPlan basePlan = LmdbLftjSyntheticScenario.createPlan(); + return new LmdbLftjPlan(basePlan.fallbackExpr().clone(), Set.of("x", "x2", "z"), Set.of("x", "x2", "z"), + basePlan.variableOrder(), basePlan.patternPlans(), + List.of( + new LmdbLftjPlan.OutputBinding("z", "c"), + new LmdbLftjPlan.OutputBinding("x", "a"), + new LmdbLftjPlan.OutputBinding("x2", "a")), + List.of( + new LmdbLftjPlan.InequalityConstraint("a", "b"), + new LmdbLftjPlan.InequalityConstraint("a", "c"), + new LmdbLftjPlan.InequalityConstraint("b", "c"))); + } + + private LmdbLftjPlan projectedHiddenContextPlan() { + LmdbLftjPlan basePlan = LmdbLftjSyntheticScenario.createPlanWithHiddenContexts(); + return new LmdbLftjPlan(basePlan.fallbackExpr().clone(), Set.of("x", "y"), Set.of("x", "y"), + basePlan.variableOrder(), basePlan.patternPlans(), + List.of( + new LmdbLftjPlan.OutputBinding("x", "a"), + new LmdbLftjPlan.OutputBinding("y", "b")), + List.of()); + } + + private static final class InterpretedQueryAccess extends LmdbLftjSyntheticScenario.TestQueryAccess { + + private InterpretedQueryAccess() { + } + + private InterpretedQueryAccess(boolean duplicateContexts) { + super(duplicateContexts); + } + + @Override + public boolean lftjCodegenEnabled() { + return false; + } + } + + private static final class CompiledQueryAccess extends LmdbLftjSyntheticScenario.TestQueryAccess { + private final Map compiledPlans = new HashMap<>(); + + private CompiledQueryAccess() { + } + + private CompiledQueryAccess(boolean duplicateContexts) { + super(duplicateContexts); + } + + @Override + public LmdbLftjCodegenCache.CacheEntry cachedCompiledPlan(String executionKey) { + return compiledPlans.get(executionKey); + } + + @Override + public void cacheCompiledPlanSuccess(String executionKey, LmdbCompiledLftjFactory factory) { + compiledPlans.put(executionKey, LmdbLftjCodegenCache.CacheEntry.success(factory)); + } + + @Override + public void cacheCompiledPlanFailure(String executionKey, String message) { + compiledPlans.put(executionKey, LmdbLftjCodegenCache.CacheEntry.failure(message)); + } + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java index 6844df9a17..bdf49a0236 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java @@ -63,6 +63,22 @@ void planShouldExposeStableExecutionKeyAcrossCopies() { .isEqualTo(invokeStringGetter(plan.copy(), "executionKey")); } + @Test + void executionKeyShouldDifferentiateHiddenContextMultiplicityPlans() { + assertThat(invokeStringGetter(LmdbLftjSyntheticScenario.createPlan(), "executionKey")) + .isNotEqualTo(invokeStringGetter(LmdbLftjSyntheticScenario.createPlanWithHiddenContexts(), + "executionKey")); + } + + @Test + void executionKeyShouldDifferentiateProjectedAliasLayouts() { + LmdbLftjPlan aliasedPlan = syntheticAliasedPlan(); + LmdbLftjPlan duplicateAliasedPlan = syntheticDuplicateAliasedPlan(); + + assertThat(invokeStringGetter(aliasedPlan, "executionKey")) + .isNotEqualTo(invokeStringGetter(duplicateAliasedPlan, "executionKey")); + } + @Test void syntheticQueryAccessShouldEnableCodegenByDefault() { assertThat(invokeBooleanGetter(new LmdbLftjSyntheticScenario.TestQueryAccess(), "lftjCodegenEnabled")) @@ -177,6 +193,41 @@ void compiledAndInterpretedShouldMatchForHiddenContextMultiplicity() { assertThat(compiled).hasSize(648).containsExactlyElementsOf(interpreted); } + @Test + void compiledAndInterpretedShouldMatchForPartiallyBoundHiddenContextInput() { + LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlanWithHiddenContexts(); + QueryBindingSet bindings = new QueryBindingSet(); + bindings.setBinding("a", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:1")); + bindings.setBinding("b", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:2")); + + List interpreted = drain( + LmdbLftjSyntheticScenario.createEvaluationStep(new InterpretedQueryAccess(true), plan), bindings); + List compiled = drain( + LmdbLftjSyntheticScenario.createEvaluationStep( + new CachingQueryAccess(true, new CountingCompiler()), plan), + bindings); + + assertThat(compiled).hasSize(54).containsExactlyElementsOf(interpreted); + } + + @Test + void compiledAndInterpretedShouldMatchForFullyBoundHiddenContextInput() { + LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlanWithHiddenContexts(); + QueryBindingSet bindings = new QueryBindingSet(); + bindings.setBinding("a", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:1")); + bindings.setBinding("b", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:2")); + bindings.setBinding("c", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:3")); + + List interpreted = drain( + LmdbLftjSyntheticScenario.createEvaluationStep(new InterpretedQueryAccess(true), plan), bindings); + List compiled = drain( + LmdbLftjSyntheticScenario.createEvaluationStep( + new CachingQueryAccess(true, new CountingCompiler()), plan), + bindings); + + assertThat(compiled).hasSize(27).containsExactlyElementsOf(interpreted); + } + @Test void compiledAndInterpretedShouldMatchForFullyBoundInput() { LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); @@ -227,6 +278,27 @@ void compiledIterationShouldMaterializeAliasedBindingsLazily() { assertThat(queryAccess.resolveValueCalls).isZero(); } + @Test + void compiledIterationShouldMaterializeDuplicateAndReorderedAliasesLazily() { + LmdbLftjPlan plan = syntheticDuplicateAliasedPlan(); + CachingQueryAccess queryAccess = new CachingQueryAccess(new CountingCompiler()); + QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess, plan); + + try (CloseableIteration iteration = evaluationStep.evaluate(EmptyBindingSet.getInstance())) { + assertThat(iteration).hasNext(); + BindingSet row = iteration.next(); + assertThat(row.getBindingNames()).containsExactlyInAnyOrder("z", "x", "x2"); + assertThat(row.getValue("z")).isEqualTo(LmdbLftjSyntheticScenario.VF.createIRI("urn:person:3")); + assertThat(row.getValue("x")).isEqualTo(LmdbLftjSyntheticScenario.VF.createIRI("urn:person:1")); + assertThat(row.getValue("x2")).isEqualTo(LmdbLftjSyntheticScenario.VF.createIRI("urn:person:1")); + assertThat(row.hasBinding("a")).isFalse(); + assertThat(row.hasBinding("b")).isFalse(); + assertThat(row.hasBinding("c")).isFalse(); + } + + assertThat(queryAccess.resolveValueCalls).isZero(); + } + @Test void evaluateInternalShouldSkipInitValueForLftjRuntimeSafeQueries() throws Exception { try (FullCodegenFixture fixture = new FullCodegenFixture()) { @@ -251,6 +323,27 @@ void evaluateInternalShouldSkipInitValueForLftjRuntimeSafeQueries() throws Excep } } + @Test + void lazyValuesFromSameStoreShouldCompareByIdWithoutInitialization() throws Exception { + try (FullCodegenFixture fixture = new FullCodegenFixture()) { + LmdbQueryAccess queryAccess = fixture.connection.benchmarkQueryAccess(false); + long firstId = queryAccess.resolveId(FullCodegenFixture.person(1)); + long secondId = queryAccess.resolveId(FullCodegenFixture.person(2)); + Value firstLeft = queryAccess.lazyValue(firstId); + Value firstRight = queryAccess.lazyValue(firstId); + Value second = queryAccess.lazyValue(secondId); + + assertThat(isInitializedLmdbValue(firstLeft)).isFalse(); + assertThat(isInitializedLmdbValue(firstRight)).isFalse(); + assertThat(isInitializedLmdbValue(second)).isFalse(); + assertThat(firstLeft).isEqualTo(firstRight); + assertThat(firstLeft).isNotEqualTo(second); + assertThat(isInitializedLmdbValue(firstLeft)).isFalse(); + assertThat(isInitializedLmdbValue(firstRight)).isFalse(); + assertThat(isInitializedLmdbValue(second)).isFalse(); + } + } + @Test void fullStackCompilerShouldProduceGeneratedIterationOnRealStore() throws Exception { LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); @@ -455,6 +548,44 @@ void codegenCacheShouldCompileOncePerExecutionKey() { .isTrue(); } + @Test + void codegenCacheShouldReuseCompiledFactoryAcrossEquivalentPlanCopies() { + LmdbLftjPlan first = syntheticAliasedPlan(); + LmdbLftjPlan second = first.copy(); + CountingCompiler compiler = new CountingCompiler(); + CachingQueryAccess queryAccess = new CachingQueryAccess(compiler); + + List firstRows = drainGeneric(LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess, first), + EmptyBindingSet.getInstance()); + List secondRows = drainGeneric(LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess, second), + EmptyBindingSet.getInstance()); + + assertThat(secondRows).containsExactlyElementsOf(firstRows); + assertThat(compiler.compileCalls).isEqualTo(1); + assertThat(queryAccess.cachedEntry(first.executionKey())).isNotNull(); + assertThat(queryAccess.cachedEntry(second.executionKey())).isNotNull(); + } + + @Test + void codegenCacheShouldCompileSeparatelyForDistinctAliasLayouts() { + LmdbLftjPlan aliased = syntheticAliasedPlan(); + LmdbLftjPlan duplicateAliased = syntheticDuplicateAliasedPlan(); + CountingCompiler compiler = new CountingCompiler(); + CachingQueryAccess queryAccess = new CachingQueryAccess(compiler); + + List aliasedRows = drainGeneric(LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess, aliased), + EmptyBindingSet.getInstance()); + List duplicateRows = drainGeneric( + LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess, duplicateAliased), + EmptyBindingSet.getInstance()); + + assertThat(aliasedRows).isNotEmpty(); + assertThat(duplicateRows).isNotEmpty(); + assertThat(compiler.compileCalls).isEqualTo(2); + assertThat(queryAccess.cachedEntry(aliased.executionKey())).isNotNull(); + assertThat(queryAccess.cachedEntry(duplicateAliased.executionKey())).isNotNull(); + } + @Test void codegenCacheShouldReuseNegativeResultAfterCompileFailure() { LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); @@ -514,11 +645,35 @@ private List drain(QueryEvaluationStep evaluationStep, BindingSet bindin return rows; } + private List drainGeneric(QueryEvaluationStep evaluationStep, BindingSet bindings) { + List rows = new ArrayList<>(); + try (CloseableIteration iteration = evaluationStep.evaluate(bindings)) { + while (iteration.hasNext()) { + rows.add(renderBindingSet(iteration.next())); + } + } + return rows; + } + private String render(BindingSet row) { return row.getValue("a").stringValue() + "|" + row.getValue("b").stringValue() + "|" + row.getValue("c").stringValue(); } + private String renderBindingSet(BindingSet row) { + List bindingNames = new ArrayList<>(row.getBindingNames()); + bindingNames.sort(String::compareTo); + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < bindingNames.size(); i++) { + if (i > 0) { + builder.append('|'); + } + String bindingName = bindingNames.get(i); + builder.append(bindingName).append('=').append(row.getValue(bindingName).stringValue()); + } + return builder.toString(); + } + private void assertPatchedSeekBuffer(long prefixValue, long firstTarget, long secondTarget) { ByteBuffer keyBuffer = ByteBuffer.allocate(TripleStore.MAX_KEY_LENGTH); Varint.writeUnsigned(keyBuffer, prefixValue); @@ -644,6 +799,20 @@ private LmdbLftjPlan syntheticAliasedPlan() { new LmdbLftjPlan.InequalityConstraint("b", "c"))); } + private LmdbLftjPlan syntheticDuplicateAliasedPlan() { + LmdbLftjPlan basePlan = LmdbLftjSyntheticScenario.createPlan(); + return new LmdbLftjPlan(basePlan.fallbackExpr().clone(), Set.of("x", "x2", "z"), Set.of("x", "x2", "z"), + basePlan.variableOrder(), basePlan.patternPlans(), + List.of( + new LmdbLftjPlan.OutputBinding("z", "c"), + new LmdbLftjPlan.OutputBinding("x", "a"), + new LmdbLftjPlan.OutputBinding("x2", "a")), + List.of( + new LmdbLftjPlan.InequalityConstraint("a", "b"), + new LmdbLftjPlan.InequalityConstraint("a", "c"), + new LmdbLftjPlan.InequalityConstraint("b", "c"))); + } + private String foafCycleAliasQuery(int size) { StringBuilder builder = new StringBuilder(); builder.append("PREFIX foaf: \n"); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java index f98f0719ba..9dff0af689 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java @@ -63,6 +63,38 @@ void evaluateShouldRespectFullyBoundInputBindings() { } } + @Test + void evaluateShouldReturnEmptyWithoutOpeningTxnForUnknownBoundInput() { + LmdbLftjSyntheticScenario.TestQueryAccess queryAccess = new LmdbLftjSyntheticScenario.TestQueryAccess(); + QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess); + QueryBindingSet bindings = new QueryBindingSet(); + bindings.setBinding("a", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:99")); + + try (CloseableIteration iteration = evaluationStep.evaluate(bindings)) { + assertTrue(!iteration.hasNext(), "unknown bound ids should short-circuit to an empty result"); + } + + assertEquals(0, queryAccess.openScanCalls, "unknown bound ids should not open any LMDB scans"); + assertEquals(0, queryAccess.releaseReadTxnCalls, "unknown bound ids should not acquire a read transaction"); + } + + @Test + void evaluateShouldRetainUnrelatedIncomingBindingsInResults() { + LmdbLftjSyntheticScenario.TestQueryAccess queryAccess = new LmdbLftjSyntheticScenario.TestQueryAccess(); + QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess); + QueryBindingSet bindings = new QueryBindingSet(); + bindings.setBinding("a", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:1")); + bindings.setBinding("b", LmdbLftjSyntheticScenario.VF.createIRI("urn:person:2")); + bindings.setBinding("seed", LmdbLftjSyntheticScenario.VF.createIRI("urn:seed:fixed")); + + try (CloseableIteration iteration = evaluationStep.evaluate(bindings)) { + assertTrue(iteration.hasNext(), "matching bindings should still produce a result"); + BindingSet row = iteration.next(); + assertEquals("urn:seed:fixed", row.getValue("seed").stringValue(), + "materialized results should keep unrelated incoming bindings"); + } + } + @Test void evaluateShouldCloseLiveScansOnEarlyClose() { LmdbLftjSyntheticScenario.TestQueryAccess queryAccess = new LmdbLftjSyntheticScenario.TestQueryAccess(); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFusionCorrectnessTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFusionCorrectnessTest.java new file mode 100644 index 0000000000..d4ce84df94 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFusionCorrectnessTest.java @@ -0,0 +1,260 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.File; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.TupleQuery; +import org.eclipse.rdf4j.query.TupleQueryResult; +import org.eclipse.rdf4j.repository.Repository; +import org.eclipse.rdf4j.repository.RepositoryConnection; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; +import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +class LmdbLftjFusionCorrectnessTest { + + @Test + void aliasedCycleRowsShouldMatchRegularEvaluation(@TempDir java.nio.file.Path tempDir) { + assertRowsMatch(tempDir, aliasedCycleQuery()); + } + + @Test + void reorderedDuplicateProjectionRowsShouldMatchRegularEvaluation(@TempDir java.nio.file.Path tempDir) { + assertRowsMatch(tempDir, reorderedDuplicateAliasQuery()); + } + + @Test + void duplicateAndReversedInequalityRowsShouldMatchRegularEvaluation(@TempDir java.nio.file.Path tempDir) { + assertRowsMatch(tempDir, duplicateReversedInequalityQuery()); + } + + @Test + void unsupportedOrFilterRowsShouldMatchRegularEvaluation(@TempDir java.nio.file.Path tempDir) { + assertRowsMatch(tempDir, unsupportedOrFilterQuery()); + } + + @Test + void computedExtensionRowsShouldMatchRegularEvaluation(@TempDir java.nio.file.Path tempDir) { + assertRowsMatch(tempDir, computedExtensionQuery()); + } + + @Test + void chainedAliasExtensionRowsShouldMatchRegularEvaluation(@TempDir java.nio.file.Path tempDir) { + assertRowsMatch(tempDir, chainedAliasExtensionQuery()); + } + + @Test + void aliasedCycleRowsShouldMatchRegularEvaluationWithBoundSourceVariables(@TempDir java.nio.file.Path tempDir) { + assertEquals(List.of( + "x=urn:person:1|y=urn:person:2|z=urn:person:3", + "x=urn:person:1|y=urn:person:2|z=urn:person:4"), + assertRowsMatch(tempDir, aliasedCycleQuery(), + Map.of("a", person(1), "b", person(2)))); + } + + @Test + void reorderedDuplicateProjectionRowsShouldMatchRegularEvaluationWithBoundSourceVariables( + @TempDir java.nio.file.Path tempDir) { + assertEquals(List.of( + "x=urn:person:1|x2=urn:person:1|z=urn:person:3", + "x=urn:person:1|x2=urn:person:1|z=urn:person:4"), + assertRowsMatch(tempDir, reorderedDuplicateAliasQuery(), + Map.of("a", person(1), "b", person(2)))); + } + + @Test + void duplicateAndReversedInequalityRowsShouldMatchRegularEvaluationWithBoundSourceVariables( + @TempDir java.nio.file.Path tempDir) { + assertEquals(List.of( + "x=urn:person:1|y=urn:person:2|z=urn:person:3", + "x=urn:person:1|y=urn:person:2|z=urn:person:4"), + assertRowsMatch(tempDir, duplicateReversedInequalityQuery(), + Map.of("a", person(1), "b", person(2)))); + } + + @Test + void unsupportedOrFilterRowsShouldMatchRegularEvaluationWithBoundSourceVariables( + @TempDir java.nio.file.Path tempDir) { + assertRowsMatch(tempDir, unsupportedOrFilterQuery(), Map.of("a", person(1), "b", person(2))); + } + + @Test + void aliasedCycleRowsShouldStayEmptyForConflictingBoundSourceVariables(@TempDir java.nio.file.Path tempDir) { + assertEquals(List.of(), assertRowsMatch(tempDir, aliasedCycleQuery(), Map.of("a", person(1), "b", person(1)))); + } + + @Test + void aliasedCycleRowsShouldStayEmptyForUnknownBoundSourceVariables(@TempDir java.nio.file.Path tempDir) { + assertEquals(List.of(), assertRowsMatch(tempDir, aliasedCycleQuery(), Map.of("a", person(99)))); + } + + private void assertRowsMatch(java.nio.file.Path tempDir, String query) { + assertRowsMatch(tempDir, query, Map.of()); + } + + private List assertRowsMatch(java.nio.file.Path tempDir, String query, Map bindings) { + Repository fallbackRepository = createRepository(tempDir.resolve("fallback").toFile(), false, false); + Repository interpretedRepository = createRepository(tempDir.resolve("interpreted").toFile(), true, false); + Repository compiledRepository = createRepository(tempDir.resolve("compiled").toFile(), true, true); + + try { + populate(fallbackRepository); + populate(interpretedRepository); + populate(compiledRepository); + + List expected = executeRows(fallbackRepository, query, bindings); + List interpreted = executeRows(interpretedRepository, query, bindings); + List compiled = executeRows(compiledRepository, query, bindings); + + assertEquals(expected, interpreted, "Interpreted LFTJ must preserve query rows"); + assertEquals(expected, compiled, "Compiled LFTJ must preserve query rows"); + return expected; + } finally { + fallbackRepository.shutDown(); + interpretedRepository.shutDown(); + compiledRepository.shutDown(); + } + } + + private Repository createRepository(File dataDir, boolean lftjEnabled, boolean lftjCodegenEnabled) { + LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); + config.setLftjEnabled(lftjEnabled); + config.setLftjCodegenEnabled(lftjCodegenEnabled); + config.setForceSync(false); + config.setValueDBSize(1_073_741_824L); + config.setTripleDBSize(config.getValueDBSize()); + + Repository repository = new SailRepository(new LmdbStore(dataDir, config)); + repository.init(); + return repository; + } + + private void populate(Repository repository) { + try (SailRepositoryConnection connection = (SailRepositoryConnection) repository.getConnection()) { + for (long subject = 1; subject <= 4; subject++) { + for (long object = 1; object <= 4; object++) { + if (subject != object) { + connection.add(person(subject), org.eclipse.rdf4j.model.vocabulary.FOAF.KNOWS, person(object)); + } + } + } + } + } + + private List executeRows(Repository repository, String query, Map bindings) { + List rows = new ArrayList<>(); + try (RepositoryConnection connection = repository.getConnection()) { + TupleQuery tupleQuery = connection.prepareTupleQuery(query); + for (Map.Entry binding : bindings.entrySet()) { + tupleQuery.setBinding(binding.getKey(), binding.getValue()); + } + try (TupleQueryResult result = tupleQuery.evaluate()) { + while (result.hasNext()) { + rows.add(render(result.next())); + } + } + } + Collections.sort(rows); + return rows; + } + + private String render(BindingSet row) { + List names = new ArrayList<>(row.getBindingNames()); + Collections.sort(names); + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < names.size(); i++) { + if (i > 0) { + builder.append('|'); + } + String name = names.get(i); + builder.append(name).append('=').append(row.getValue(name).stringValue()); + } + return builder.toString(); + } + + private org.eclipse.rdf4j.model.IRI person(long id) { + return org.eclipse.rdf4j.model.impl.SimpleValueFactory.getInstance().createIRI("urn:person:" + id); + } + + private String aliasedCycleQuery() { + return "PREFIX foaf: \n" + + "SELECT (?a AS ?x) (?b AS ?y) (?c AS ?z) WHERE {\n" + + " ?a foaf:knows ?b .\n" + + " ?b foaf:knows ?c .\n" + + " ?c foaf:knows ?a .\n" + + " FILTER (?a != ?b && ?a != ?c && ?b != ?c)\n" + + "}\n"; + } + + private String reorderedDuplicateAliasQuery() { + return "PREFIX foaf: \n" + + "SELECT (?c AS ?z) (?a AS ?x) (?a AS ?x2) WHERE {\n" + + " ?a foaf:knows ?b .\n" + + " ?b foaf:knows ?c .\n" + + " ?c foaf:knows ?a .\n" + + " FILTER (?a != ?b && ?a != ?c && ?b != ?c)\n" + + "}\n"; + } + + private String duplicateReversedInequalityQuery() { + return "PREFIX foaf: \n" + + "SELECT (?a AS ?x) (?b AS ?y) (?c AS ?z) WHERE {\n" + + " ?a foaf:knows ?b .\n" + + " ?b foaf:knows ?c .\n" + + " ?c foaf:knows ?a .\n" + + " FILTER (?b != ?a && ?a != ?b && ?c != ?a && ?b != ?c)\n" + + "}\n"; + } + + private String unsupportedOrFilterQuery() { + return "PREFIX foaf: \n" + + "SELECT (?a AS ?x) (?b AS ?y) (?c AS ?z) WHERE {\n" + + " ?a foaf:knows ?b .\n" + + " ?b foaf:knows ?c .\n" + + " ?c foaf:knows ?a .\n" + + " FILTER (?a != ?b || ?a != ?c)\n" + + "}\n"; + } + + private String computedExtensionQuery() { + return "PREFIX foaf: \n" + + "SELECT (STR(?a) AS ?x) ?b ?c WHERE {\n" + + " ?a foaf:knows ?b .\n" + + " ?b foaf:knows ?c .\n" + + " ?c foaf:knows ?a .\n" + + " FILTER (?a != ?b && ?a != ?c && ?b != ?c)\n" + + "}\n"; + } + + private String chainedAliasExtensionQuery() { + return "PREFIX foaf: \n" + + "SELECT ?y ?b ?c WHERE {\n" + + " ?a foaf:knows ?b .\n" + + " ?b foaf:knows ?c .\n" + + " ?c foaf:knows ?a .\n" + + " FILTER (?a != ?b && ?a != ?c && ?b != ?c)\n" + + " BIND(?a AS ?x)\n" + + " BIND(?x AS ?y)\n" + + "}\n"; + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java index 9626e34740..c8e0877672 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java @@ -13,6 +13,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertNull; import java.util.HashMap; import java.util.List; @@ -39,12 +40,14 @@ import org.eclipse.rdf4j.query.algebra.Projection; import org.eclipse.rdf4j.query.algebra.ProjectionElem; import org.eclipse.rdf4j.query.algebra.ProjectionElemList; +import org.eclipse.rdf4j.query.algebra.QueryModelNode; import org.eclipse.rdf4j.query.algebra.QueryRoot; import org.eclipse.rdf4j.query.algebra.StatementPattern; import org.eclipse.rdf4j.query.algebra.TupleExpr; import org.eclipse.rdf4j.query.algebra.Var; import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource; import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics; +import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor; import org.eclipse.rdf4j.query.impl.EmptyBindingSet; import org.eclipse.rdf4j.query.parser.ParsedTupleQuery; import org.eclipse.rdf4j.query.parser.QueryParserUtil; @@ -167,6 +170,111 @@ void optimizerPipelineShouldFuseParsedAliasProjectionQuery() throws Exception { new LmdbLftjPlan.InequalityConstraint("b", "c")), lftj.plan().inequalityConstraints()); } + @Test + void optimizeShouldFuseReorderedDuplicateProjectionQuery() throws Exception { + TestQueryAccess queryAccess = new TestQueryAccess(); + LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer( + new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess)); + + TupleExpr tupleExpr = parsedQueryRoot(reorderedDuplicateAliasProjectionQuery()); + + optimizer.optimize(tupleExpr, (Dataset) null, EmptyBindingSet.getInstance()); + + LmdbLftjTupleExpr lftj = lftjNode(tupleExpr); + assertEquals(List.of( + new LmdbLftjPlan.OutputBinding("z", "c"), + new LmdbLftjPlan.OutputBinding("x", "a"), + new LmdbLftjPlan.OutputBinding("x2", "a")), lftj.plan().outputBindings()); + assertEquals(List.of( + new LmdbLftjPlan.InequalityConstraint("a", "b"), + new LmdbLftjPlan.InequalityConstraint("a", "c"), + new LmdbLftjPlan.InequalityConstraint("b", "c")), lftj.plan().inequalityConstraints()); + } + + @Test + void optimizeShouldCanonicalizeDuplicateAndReversedInequalities() throws Exception { + TestQueryAccess queryAccess = new TestQueryAccess(); + LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer( + new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess)); + + TupleExpr tupleExpr = parsedQueryRoot(duplicateReversedInequalityQuery()); + + optimizer.optimize(tupleExpr, (Dataset) null, EmptyBindingSet.getInstance()); + + LmdbLftjTupleExpr lftj = lftjNode(tupleExpr); + assertEquals(List.of( + new LmdbLftjPlan.InequalityConstraint("a", "b"), + new LmdbLftjPlan.InequalityConstraint("a", "c"), + new LmdbLftjPlan.InequalityConstraint("b", "c")), lftj.plan().inequalityConstraints()); + } + + @Test + void optimizeShouldLeaveUnsupportedOrFilterOutsideLftjNode() throws Exception { + TestQueryAccess queryAccess = new TestQueryAccess(); + LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer( + new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess)); + + TupleExpr tupleExpr = parsedQueryRoot(unsupportedOrFilterQuery()); + + optimizer.optimize(tupleExpr, (Dataset) null, EmptyBindingSet.getInstance()); + + LmdbLftjTupleExpr lftj = findNode(tupleExpr, LmdbLftjTupleExpr.class); + assertEquals(List.of(), lftj.plan().inequalityConstraints()); + assertInstanceOf(Filter.class, findNode(tupleExpr, Filter.class)); + assertInstanceOf(Projection.class, findNode(tupleExpr, Projection.class)); + } + + @Test + void optimizeShouldLeaveRepeatedVariablePatternOutsideLftjNode() throws Exception { + TestQueryAccess queryAccess = new TestQueryAccess(); + LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer( + new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess)); + + TupleExpr tupleExpr = parsedQueryRoot(repeatedVariableQuery()); + + optimizer.optimize(tupleExpr, (Dataset) null, EmptyBindingSet.getInstance()); + + assertNull(findNode(tupleExpr, LmdbLftjTupleExpr.class)); + } + + @Test + void optimizeShouldLeaveComputedExtensionOutsideLftjNode() throws Exception { + TestQueryAccess queryAccess = new TestQueryAccess(); + LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer( + new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess)); + + TupleExpr tupleExpr = parsedQueryRoot(computedExtensionQuery()); + + optimizer.optimize(tupleExpr, (Dataset) null, EmptyBindingSet.getInstance()); + + LmdbLftjTupleExpr lftj = findNode(tupleExpr, LmdbLftjTupleExpr.class); + assertEquals(List.of(), lftj.plan().inequalityConstraints()); + assertInstanceOf(Filter.class, findNode(tupleExpr, Filter.class)); + assertInstanceOf(Extension.class, findNode(tupleExpr, Extension.class)); + assertInstanceOf(Projection.class, findNode(tupleExpr, Projection.class)); + } + + @Test + void optimizeShouldLeaveChainedAliasExtensionOutsideFusedOutputs() throws Exception { + TestQueryAccess queryAccess = new TestQueryAccess(); + LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer( + new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess)); + + TupleExpr tupleExpr = parsedQueryRoot(chainedAliasProjectionQuery()); + + optimizer.optimize(tupleExpr, (Dataset) null, EmptyBindingSet.getInstance()); + + LmdbLftjTupleExpr lftj = findNode(tupleExpr, LmdbLftjTupleExpr.class); + assertEquals(List.of( + new LmdbLftjPlan.OutputBinding("a", "a"), + new LmdbLftjPlan.OutputBinding("b", "b"), + new LmdbLftjPlan.OutputBinding("c", "c")), lftj.plan().outputBindings()); + assertEquals(List.of(), lftj.plan().inequalityConstraints()); + assertInstanceOf(Filter.class, findNode(tupleExpr, Filter.class)); + assertInstanceOf(Extension.class, findNode(tupleExpr, Extension.class)); + assertInstanceOf(Projection.class, findNode(tupleExpr, Projection.class)); + } + private TupleExpr cycle(String a, String b, String c) { StatementPattern pattern1 = statementPattern(a, b); StatementPattern pattern2 = statementPattern(b, c); @@ -220,10 +328,94 @@ private String aliasProjectionQuery() { + "}\n"; } + private String reorderedDuplicateAliasProjectionQuery() { + return "PREFIX foaf: \n" + + "SELECT (?c AS ?z) (?a AS ?x) (?a AS ?x2) WHERE {\n" + + " ?a foaf:knows ?b .\n" + + " ?b foaf:knows ?c .\n" + + " ?c foaf:knows ?a .\n" + + " FILTER (?a != ?b && ?a != ?c && ?b != ?c)\n" + + "}\n"; + } + + private String duplicateReversedInequalityQuery() { + return "PREFIX foaf: \n" + + "SELECT (?a AS ?x) (?b AS ?y) (?c AS ?z) WHERE {\n" + + " ?a foaf:knows ?b .\n" + + " ?b foaf:knows ?c .\n" + + " ?c foaf:knows ?a .\n" + + " FILTER (?b != ?a && ?a != ?b && ?c != ?a && ?b != ?c)\n" + + "}\n"; + } + + private String unsupportedOrFilterQuery() { + return "PREFIX foaf: \n" + + "SELECT (?a AS ?x) (?b AS ?y) (?c AS ?z) WHERE {\n" + + " ?a foaf:knows ?b .\n" + + " ?b foaf:knows ?c .\n" + + " ?c foaf:knows ?a .\n" + + " FILTER (?a != ?b || ?a != ?c)\n" + + "}\n"; + } + + private String computedExtensionQuery() { + return "PREFIX foaf: \n" + + "SELECT (STR(?a) AS ?x) ?b ?c WHERE {\n" + + " ?a foaf:knows ?b .\n" + + " ?b foaf:knows ?c .\n" + + " ?c foaf:knows ?a .\n" + + " FILTER (?a != ?b && ?a != ?c && ?b != ?c)\n" + + "}\n"; + } + + private String chainedAliasProjectionQuery() { + return "PREFIX foaf: \n" + + "SELECT ?y ?b ?c WHERE {\n" + + " ?a foaf:knows ?b .\n" + + " ?b foaf:knows ?c .\n" + + " ?c foaf:knows ?a .\n" + + " FILTER (?a != ?b && ?a != ?c && ?b != ?c)\n" + + " BIND(?a AS ?x)\n" + + " BIND(?x AS ?y)\n" + + "}\n"; + } + + private String repeatedVariableQuery() { + return "PREFIX foaf: \n" + + "SELECT * WHERE {\n" + + " ?a foaf:knows ?a .\n" + + " ?a foaf:knows ?b .\n" + + " ?b foaf:knows ?a .\n" + + "}\n"; + } + + private TupleExpr parsedQueryRoot(String query) throws Exception { + ParsedTupleQuery parsed = QueryParserUtil.parseTupleQuery(QueryLanguage.SPARQL, query, null); + TupleExpr tupleExpr = parsed.getTupleExpr().clone(); + if (!(tupleExpr instanceof QueryRoot)) { + tupleExpr = new QueryRoot(tupleExpr); + } + return tupleExpr; + } + private LmdbLftjTupleExpr lftjNode(TupleExpr tupleExpr) { return assertInstanceOf(LmdbLftjTupleExpr.class, assertInstanceOf(QueryRoot.class, tupleExpr).getArg()); } + private T findNode(TupleExpr tupleExpr, Class type) { + QueryModelNode[] result = new QueryModelNode[1]; + tupleExpr.visit(new AbstractQueryModelVisitor() { + @Override + protected void meetNode(QueryModelNode node) { + if (result[0] == null && type.isInstance(node)) { + result[0] = node; + } + super.meetNode(node); + } + }); + return type.cast(result[0]); + } + private static final class EmptyTripleSource implements TripleSource { @Override From e110c96265a6bf93363a1b82343864924c2cdf0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Mon, 6 Apr 2026 08:13:35 +0200 Subject: [PATCH 22/32] more tests and fixes --- ...actLmdbFullStackCompiledLftjIteration.java | 9 + .../sail/lmdb/LmdbLftjCodegenCompiler.java | 51 +++- .../lmdb/LmdbLftjFullCodegenCompiler.java | 46 ++-- .../sail/lmdb/LmdbPrefixFrontierProvider.java | 34 ++- .../rdf4j/sail/lmdb/LmdbLftjCodegenTest.java | 220 +++++++++++++++--- .../sail/lmdb/LmdbLftjOptimizerTest.java | 43 ++++ .../benchmark/FoafCliqueDataGenerator.java | 153 +++++++++++- .../FoafCliqueLftjCorrectnessTest.java | 63 ++--- .../benchmark/FoafCliqueQueryBenchmark.java | 100 ++++---- .../benchmark/FoafCliqueQueryCatalog.java | 176 ++++++++++++++ 10 files changed, 735 insertions(+), 160 deletions(-) create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryCatalog.java diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbFullStackCompiledLftjIteration.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbFullStackCompiledLftjIteration.java index d64efafeab..32725b466f 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbFullStackCompiledLftjIteration.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbFullStackCompiledLftjIteration.java @@ -68,6 +68,15 @@ protected final LmdbLftjPlan plan() { return plan; } + protected final LmdbLftjPatternPlan patternPlan(int patternOrdinal) { + return plan.patternPlans().get(patternOrdinal); + } + + protected final LmdbDerivedBinaryRelation loadDerivedRelation(int patternOrdinal, long predicateId) { + return LmdbPrefixFrontierProvider.loadDerivedRelation(queryAccess, state.txn(), patternPlan(patternOrdinal), + predicateId); + } + protected final LmdbLftjExecutionShape shape() { return shape; } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCompiler.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCompiler.java index 2a6d0cfbe5..5bfb11eb5a 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCompiler.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCompiler.java @@ -11,6 +11,9 @@ // Some portions generated by Codex package org.eclipse.rdf4j.sail.lmdb; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.concurrent.atomic.AtomicLong; import org.codehaus.janino.SimpleCompiler; @@ -54,18 +57,30 @@ String sourceFor(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includ return sourceFor("GeneratedLmdbLftjSource", plan, shape, includeInferred); } + Path dumpSourceFor(Path outputFile, LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred) + throws IOException { + Path parent = outputFile.getParent(); + if (parent != null) { + Files.createDirectories(parent); + } + Files.writeString(outputFile, sourceFor(plan, shape, includeInferred)); + return outputFile; + } + protected String sourceFor(String simpleClassName, LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred) { - return new SourceBuilder(simpleClassName, shape).build(); + return new SourceBuilder(simpleClassName, plan, shape).build(); } protected static final class SourceBuilder { private final String simpleClassName; + private final LmdbLftjPlan plan; private final LmdbLftjExecutionShape shape; - private SourceBuilder(String simpleClassName, LmdbLftjExecutionShape shape) { + private SourceBuilder(String simpleClassName, LmdbLftjPlan plan, LmdbLftjExecutionShape shape) { this.simpleClassName = simpleClassName; + this.plan = plan; this.shape = shape; } @@ -124,6 +139,10 @@ private void appendIterationClass(StringBuilder source) { source.append(" protected BindingSet computeNextElement() {\n"); source.append(" while (depth >= 0) {\n"); source.append(" if (depth == ").append(variableCount).append(") {\n"); + source.append(" if (!passesInequalityConstraints()) {\n"); + source.append(" backtrackAfterLeaf();\n"); + source.append(" continue;\n"); + source.append(" }\n"); source.append(" long multiplicity = 1L;\n"); for (int patternOrdinal = 0; patternOrdinal < shape.patternCount(); patternOrdinal++) { source.append(" long witnesses") @@ -163,6 +182,7 @@ private void appendIterationClass(StringBuilder source) { appendReleaseDepth(source, slot); appendPositionDepth(source, slot, shape.cursorOrdinals(slot)); } + appendInequalityHelper(source); source.append(" @Override\n"); source.append(" protected void closeCursors() {\n"); @@ -323,5 +343,32 @@ private void appendPositionDepth(StringBuilder source, int slot, int[] cursorOrd source.append(" }\n"); source.append(" }\n\n"); } + + private void appendInequalityHelper(StringBuilder source) { + source.append(" private boolean passesInequalityConstraints() {\n"); + if (plan.inequalityConstraints().isEmpty()) { + source.append(" return true;\n"); + } else { + for (LmdbLftjPlan.InequalityConstraint inequality : plan.inequalityConstraints()) { + source.append(" if (state().value(") + .append(variableSlot(inequality.leftVariable())) + .append(") == state().value(") + .append(variableSlot(inequality.rightVariable())) + .append(")) {\n"); + source.append(" return false;\n"); + source.append(" }\n"); + } + source.append(" return true;\n"); + } + source.append(" }\n\n"); + } + + private int variableSlot(String variableName) { + int slot = plan.variableOrder().indexOf(variableName); + if (slot < 0) { + throw new IllegalArgumentException("Unknown variable in inequality constraint: " + variableName); + } + return slot; + } } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java index b22aad3484..ce91e9762d 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java @@ -597,29 +597,16 @@ private void appendRelationGroupAccessor(StringBuilder source, RelationGroup rel source.append(" return relationGroup").append(relationGroup.groupId).append(";\n"); source.append(" }\n"); source.append(" metrics().recordRelationLoad();\n"); - source.append(" LmdbDerivedBinaryRelation.Builder builder = new LmdbDerivedBinaryRelation.Builder(") - .append(sourceComponent) + source.append(" long predicateId = state().fixedIdForComponent(") + .append(patternOrdinal) .append(", ") - .append(targetComponent) + .append(TripleStore.PRED_IDX) .append(");\n"); - appendWitnessSeek(source, patternOrdinal, patternShape, "0L"); - source.append(" while (available").append(slotSuffix(patternOrdinal, -1)).append("Explicit"); - if (includeInferred) { - source.append(" || available").append(slotSuffix(patternOrdinal, -1)).append("Inferred"); - } - source.append(") {\n"); - if (includeInferred) { - appendMergedWitnessRowSelection(source, patternOrdinal, patternShape, true); - } else { - source.append(" builder.add(") - .append(componentAccessor(patternOrdinal, -1, "Explicit", sourceComponent)) - .append(", ") - .append(componentAccessor(patternOrdinal, -1, "Explicit", targetComponent)) - .append(");\n"); - source.append(" advanceWitness").append(patternOrdinal).append("Explicit();\n"); - } - source.append(" }\n"); - source.append(" relationGroup").append(relationGroup.groupId).append(" = builder.build();\n"); + source.append(" relationGroup") + .append(relationGroup.groupId) + .append(" = loadDerivedRelation(") + .append(patternOrdinal) + .append(", predicateId);\n"); source.append(" relationGroup") .append(relationGroup.groupId) .append("Scratch.prepare(relationGroup") @@ -1237,19 +1224,16 @@ private void appendInequalityHelper(StringBuilder source) { if (plan.inequalityConstraints().isEmpty()) { source.append(" return true;\n"); } else { - source.append(" return "); - for (int i = 0; i < plan.inequalityConstraints().size(); i++) { - LmdbLftjPlan.InequalityConstraint inequality = plan.inequalityConstraints().get(i); - if (i > 0) { - source.append("\n && "); - } - source.append("state().value(") + for (LmdbLftjPlan.InequalityConstraint inequality : plan.inequalityConstraints()) { + source.append(" if (state().value(") .append(variableSlot(inequality.leftVariable())) - .append(") != state().value(") + .append(") == state().value(") .append(variableSlot(inequality.rightVariable())) - .append(')'); + .append(")) {\n"); + source.append(" return false;\n"); + source.append(" }\n"); } - source.append(";\n"); + source.append(" return true;\n"); } source.append(" }\n\n"); } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbPrefixFrontierProvider.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbPrefixFrontierProvider.java index daea3d0fa5..20a2a58b4a 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbPrefixFrontierProvider.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbPrefixFrontierProvider.java @@ -148,21 +148,27 @@ private LmdbDerivedBinaryRelation relation(LmdbLftjPatternPlan patternPlan) { } metrics.recordRelationLoad(); + relation = loadDerivedRelation(queryAccess, state.txn(), patternPlan, predicateId); + relationCache.put(lookup.freeze(), relation); + return relation; + } + + static LmdbDerivedBinaryRelation loadDerivedRelation(LmdbQueryAccess queryAccess, TxnManager.Txn txn, + LmdbLftjPatternPlan patternPlan, long predicateId) { int sourceComponent = patternPlan.keyTerm(1).component(); int targetComponent = patternPlan.keyTerm(2).component(); LmdbDerivedBinaryRelation.Builder builder = new LmdbDerivedBinaryRelation.Builder(sourceComponent, targetComponent); - Arrays.fill(relationLowerBound, 0L); - Arrays.fill(relationUpperBound, Long.MAX_VALUE); - relationLowerBound[TripleStore.PRED_IDX] = predicateId; - relationUpperBound[TripleStore.PRED_IDX] = predicateId; + long[] lowerBound = new long[4]; + long[] upperBound = new long[4]; + Arrays.fill(upperBound, Long.MAX_VALUE); + lowerBound[TripleStore.PRED_IDX] = predicateId; + upperBound[TripleStore.PRED_IDX] = predicateId; int sourceKeyField = patternPlan.keyFieldIndexForComponent(sourceComponent); int targetKeyField = patternPlan.keyFieldIndexForComponent(targetComponent); - forEachUniqueRow(patternPlan, relationLowerBound, relationUpperBound, 1, + forEachUniqueRow(queryAccess, txn, patternPlan, lowerBound, upperBound, 1, row -> builder.add(row[sourceKeyField], row[targetKeyField])); - relation = builder.build(); - relationCache.put(lookup.freeze(), relation); - return relation; + return builder.build(); } private boolean canUseDerivedRelation(LmdbLftjPatternPlan patternPlan) { @@ -172,11 +178,17 @@ private boolean canUseDerivedRelation(LmdbLftjPatternPlan patternPlan) { private void forEachUniqueRow(LmdbLftjPatternPlan patternPlan, long[] lowerBound, long[] upperBound, int prefixLength, RowConsumer consumer) { + forEachUniqueRow(queryAccess, state.txn(), patternPlan, lowerBound, upperBound, prefixLength, consumer); + } + + private static void forEachUniqueRow(LmdbQueryAccess queryAccess, TxnManager.Txn txn, + LmdbLftjPatternPlan patternPlan, long[] lowerBound, long[] upperBound, int prefixLength, + RowConsumer consumer) { try (CursorReader explicit = new CursorReader( - queryAccess.openTrieCursor(state.txn(), patternPlan.indexName(), true), + queryAccess.openTrieCursor(txn, patternPlan.indexName(), true), lowerBound, upperBound, prefixLength); CursorReader inferred = queryAccess.includeInferred() - ? new CursorReader(queryAccess.openTrieCursor(state.txn(), patternPlan.indexName(), false), + ? new CursorReader(queryAccess.openTrieCursor(txn, patternPlan.indexName(), false), lowerBound, upperBound, prefixLength) : CursorReader.empty()) { while (explicit.available() || inferred.available()) { @@ -195,7 +207,7 @@ private void forEachUniqueRow(LmdbLftjPatternPlan patternPlan, long[] lowerBound } } - private int compareRows(long[] left, long[] right) { + private static int compareRows(long[] left, long[] right) { for (int i = 0; i < 4; i++) { int comparison = Long.compare(left[i], right[i]); if (comparison != 0) { diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java index bdf49a0236..95a6bbe815 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java @@ -43,6 +43,7 @@ import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet; import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; import org.eclipse.rdf4j.query.explanation.Explanation; import org.eclipse.rdf4j.query.impl.EmptyBindingSet; import org.eclipse.rdf4j.repository.sail.SailRepository; @@ -50,6 +51,8 @@ import org.eclipse.rdf4j.sail.SailException; import org.eclipse.rdf4j.sail.base.SailDataset; import org.eclipse.rdf4j.sail.lmdb.benchmark.FoafCliqueQueryBenchmark; +import org.eclipse.rdf4j.sail.lmdb.benchmark.FoafCliqueQueryCatalog; +import org.eclipse.rdf4j.sail.lmdb.benchmark.FoafCliqueQueryCatalog.QueryScenario; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; import org.junit.jupiter.api.Test; @@ -148,7 +151,7 @@ void fullStackCompilerSourceShouldInlineInequalityGuardsForAliasedPlans() { assertThat(source) .contains("passesInequalityConstraints") - .contains("state().value(0) != state().value(1)") + .contains("state().value(0) == state().value(1)") .doesNotContain("FilterIterator") .doesNotContain("ProjectionIterator"); } @@ -210,6 +213,59 @@ void compiledAndInterpretedShouldMatchForPartiallyBoundHiddenContextInput() { assertThat(compiled).hasSize(54).containsExactlyElementsOf(interpreted); } + @Test + void fullCompiledAndExecutorShouldProduceSameRowsForRealStoreCycle5() throws Exception { + try (FullCodegenFixture executorFixture = new FullCodegenFixture(5, LmdbLftjCodegenCompiler.INSTANCE); + FullCodegenFixture fullFixture = new FullCodegenFixture(5, LmdbLftjFullCodegenCompiler.INSTANCE)) { + List executorRows = executeQueryRows(executorFixture.repository, foafCycleQuery(5)); + List fullRows = executeQueryRows(fullFixture.repository, foafCycleQuery(5)); + + assertThat(executorRows).hasSize(120); + assertThat(fullRows) + .withFailMessage("missing=%s extra=%s", difference(executorRows, fullRows), + difference(fullRows, executorRows)) + .containsExactlyInAnyOrderElementsOf(executorRows); + } + } + + @Test + void fullCompiledRealStoreCycle5ShouldInlineAllInequalityGuards() throws Exception { + try (FullCodegenFixture fixture = new FullCodegenFixture(5, LmdbLftjFullCodegenCompiler.INSTANCE)) { + executeQueryRows(fixture.repository, foafCycleQuery(5)); + + String source = sourceForPreparedPlan(fixture.store, LmdbLftjFullCodegenCompiler.INSTANCE); + + assertThat(source) + .contains("state().value(0) == state().value(1)") + .contains("state().value(0) == state().value(2)") + .contains("state().value(0) == state().value(3)") + .contains("state().value(0) == state().value(4)") + .contains("state().value(1) == state().value(2)") + .contains("state().value(1) == state().value(3)") + .contains("state().value(1) == state().value(4)") + .contains("state().value(2) == state().value(3)") + .contains("state().value(2) == state().value(4)") + .contains("state().value(3) == state().value(4)"); + } + } + + @Test + void fullCompiledFactoryShouldProduceSameRowsAsExecutorFactoryForRealStoreCycle5() throws Exception { + try (FullCodegenFixture executorFixture = new FullCodegenFixture(5, LmdbLftjCodegenCompiler.INSTANCE); + FullCodegenFixture fullFixture = new FullCodegenFixture(5, LmdbLftjFullCodegenCompiler.INSTANCE)) { + List executorRows = executeCompiledPlanRows(executorFixture, + LmdbLftjCodegenCompiler.INSTANCE, foafCycleQuery(5)); + List fullRows = executeCompiledPlanRows(fullFixture, + LmdbLftjFullCodegenCompiler.INSTANCE, foafCycleQuery(5)); + + assertThat(executorRows).hasSize(120); + assertThat(fullRows) + .withFailMessage("missing=%s extra=%s", difference(executorRows, fullRows), + difference(fullRows, executorRows)) + .containsExactlyInAnyOrderElementsOf(executorRows); + } + } + @Test void compiledAndInterpretedShouldMatchForFullyBoundHiddenContextInput() { LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlanWithHiddenContexts(); @@ -444,23 +500,18 @@ void fullCodegenShouldFuseSupportedFilterAndProjectionIntoLftjPlan() throws Exce @Test void fullCodegenFoafBenchmarkSequentialQueriesShouldKeepUsingGeneratedFactories() throws Exception { - FoafCliqueQueryBenchmark benchmark = new FoafCliqueQueryBenchmark(); - benchmark.peopleCount = 300; - benchmark.cliquePercentage = 30; - benchmark.minCliqueSize = 3; - benchmark.maxCliqueSize = 6; - benchmark.randomKnowsEdges = 900; - benchmark.seed = 12345L; - benchmark.benchmarkMode = LmdbLftjBenchmarkMode.FULL_CODEGEN; + FoafCliqueQueryBenchmark benchmark = configuredFoafBenchmark(); benchmark.setup(); try { - assertThat(benchmark.cycle3()).isPositive(); - assertThat(benchmark.cycle4()).isPositive(); - assertThat(benchmark.cycle5()).isPositive(); + for (QueryScenario scenario : FoafCliqueQueryCatalog.allScenarios()) { + assertThat(benchmark.executeScenario(scenario)) + .withFailMessage("scenario=%s", scenario.benchmarkMethodName()) + .isPositive(); + } SailRepository repository = (SailRepository) readField(benchmark, "repository"); LmdbBenchmarkStore store = (LmdbBenchmarkStore) repository.getSail(); assertThat(compiledFactoryClassNames(store.codegenCache())) - .hasSizeGreaterThanOrEqualTo(3) + .hasSizeGreaterThanOrEqualTo(FoafCliqueQueryCatalog.allScenarios().size()) .allSatisfy(name -> assertThat(name).contains("GeneratedLmdbFullStackLftjFactory")); assertThat(cacheEntryDescriptions(store.codegenCache())) .allSatisfy(description -> assertThat(description).doesNotContain("Unable to compile")); @@ -469,6 +520,31 @@ void fullCodegenFoafBenchmarkSequentialQueriesShouldKeepUsingGeneratedFactories( } } + @Test + void disabledFoafBenchmarkSequentialQueriesShouldSkipLftjAndCodegen() throws Exception { + FoafCliqueQueryBenchmark benchmark = configuredFoafBenchmark(FoafCliqueQueryBenchmark.LFTJ_DISABLED); + benchmark.setup(); + try { + SailRepository repository = (SailRepository) readField(benchmark, "repository"); + try (SailRepositoryConnection connection = repository.getConnection()) { + assertThat(connection.prepareTupleQuery(FoafCliqueQueryCatalog.QueryScenario.CYCLE3.query()) + .explain(Explanation.Level.Optimized) + .toString()) + .doesNotContain("LmdbLftjTupleExpr"); + } + for (QueryScenario scenario : FoafCliqueQueryCatalog.allScenarios()) { + assertThat(benchmark.executeScenario(scenario)) + .withFailMessage("scenario=%s", scenario.benchmarkMethodName()) + .isPositive(); + } + LmdbBenchmarkStore store = (LmdbBenchmarkStore) repository.getSail(); + assertThat(compiledFactoryClassNames(store.codegenCache())).isEmpty(); + assertThat(cacheEntryDescriptions(store.codegenCache())).isEmpty(); + } finally { + benchmark.tearDown(); + } + } + @Test void defaultStoreConnectionShouldUseFullCodegenCompiler() throws Exception { try (DefaultCodegenFixture fixture = new DefaultCodegenFixture()) { @@ -490,14 +566,7 @@ void compileFailureShouldNotSilentlyFallbackToInterpretedIteration() { } private void assertFoafBenchmarkQueryCompilesGeneratedFactory(int cycleSize) throws Exception { - FoafCliqueQueryBenchmark benchmark = new FoafCliqueQueryBenchmark(); - benchmark.peopleCount = 300; - benchmark.cliquePercentage = 30; - benchmark.minCliqueSize = 3; - benchmark.maxCliqueSize = 6; - benchmark.randomKnowsEdges = 900; - benchmark.seed = 12345L; - benchmark.benchmarkMode = LmdbLftjBenchmarkMode.FULL_CODEGEN; + FoafCliqueQueryBenchmark benchmark = configuredFoafBenchmark(); benchmark.setup(); try { SailRepository repository = (SailRepository) readField(benchmark, "repository"); @@ -517,6 +586,22 @@ private void assertFoafBenchmarkQueryCompilesGeneratedFactory(int cycleSize) thr } } + private FoafCliqueQueryBenchmark configuredFoafBenchmark() { + return configuredFoafBenchmark(LmdbLftjBenchmarkMode.FULL_CODEGEN); + } + + private FoafCliqueQueryBenchmark configuredFoafBenchmark(String benchmarkMode) { + FoafCliqueQueryBenchmark benchmark = new FoafCliqueQueryBenchmark(); + benchmark.peopleCount = 300; + benchmark.cliquePercentage = 30; + benchmark.minCliqueSize = 3; + benchmark.maxCliqueSize = 6; + benchmark.randomKnowsEdges = 900; + benchmark.seed = 12345L; + benchmark.benchmarkMode = benchmarkMode; + return benchmark; + } + private long executeFoafBenchmarkCycle(FoafCliqueQueryBenchmark benchmark, int cycleSize) { switch (cycleSize) { case 3: @@ -849,6 +934,85 @@ private String foafCycleAliasQuery(int size) { return builder.toString(); } + private List executeQueryRows(SailRepository repository, String query) { + try (SailRepositoryConnection connection = repository.getConnection()) { + List rows = new ArrayList<>(); + try (var result = connection.prepareTupleQuery(query).evaluate()) { + result.forEach(bindingSet -> rows.add(bindingSetKey(bindingSet))); + } + rows.sort(String::compareTo); + return rows; + } + } + + private String bindingSetKey(BindingSet bindingSet) { + List names = new ArrayList<>(bindingSet.getBindingNames()); + names.sort(String::compareTo); + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < names.size(); i++) { + if (i > 0) { + builder.append('|'); + } + String name = names.get(i); + builder.append(name).append('=').append(bindingSet.getValue(name).stringValue()); + } + return builder.toString(); + } + + private List difference(List left, List right) { + List difference = new ArrayList<>(left); + difference.removeAll(right); + return difference; + } + + private List executeCompiledPlanRows(FullCodegenFixture fixture, LmdbLftjCodegenCompiler compiler, + String query) throws Exception { + try (SailRepositoryConnection connection = fixture.repository.getConnection()) { + connection.prepareTupleQuery(query).explain(Explanation.Level.Optimized); + } + + LmdbLftjPlan plan = preparedPlan(fixture.store).copy(); + LmdbLftjExecutionShape shape = new LmdbLftjExecutionShape(plan); + LmdbQueryAccess queryAccess = fixture.connection.benchmarkQueryAccess(false); + LmdbLftjBindingState state = new LmdbLftjBindingState(plan, EmptyBindingSet.getInstance(), queryAccess); + assertThat(state.initialize()).isTrue(); + state.attachTxn(queryAccess.acquireReadTxn()); + QueryEvaluationContext context = new QueryEvaluationContext.Minimal((Dataset) null); + LmdbCompiledLftjFactory factory = compiler.compile(plan, shape, false); + try (CloseableIteration iteration = factory.create(plan, shape, state, context, queryAccess, + new LmdbLftjMetrics())) { + List rows = new ArrayList<>(); + while (iteration.hasNext()) { + rows.add(bindingSetKey(iteration.next())); + } + rows.sort(String::compareTo); + return rows; + } + } + + @SuppressWarnings("unchecked") + private String sourceForPreparedPlan(LmdbBenchmarkStore store, LmdbLftjCodegenCompiler compiler) throws Exception { + LmdbLftjPlan plan = preparedPlan(store); + LmdbLftjExecutionShape shape = new LmdbLftjExecutionShape(plan); + Path dump = Files.createTempFile("lmdb-lftj-generated-", ".java"); + compiler.dumpSourceFor(dump, plan, shape, false); + return Files.readString(dump); + } + + @SuppressWarnings("unchecked") + private LmdbLftjPlan preparedPlan(LmdbBenchmarkStore store) throws Exception { + Field entriesField = LmdbLftjPreparedPlanCache.class.getDeclaredField("entries"); + entriesField.setAccessible(true); + Map entries = (LinkedHashMap) entriesField + .get(store.preparedPlanCache()); + return entries.values() + .stream() + .filter(LmdbLftjPlanner.PlanningResult::planned) + .map(LmdbLftjPlanner.PlanningResult::plan) + .findFirst() + .orElseThrow(); + } + private Object readField(Object target, String name) throws Exception { Field field = target.getClass().getDeclaredField(name); field.setAccessible(true); @@ -956,6 +1120,10 @@ private static final class FullCodegenFixture implements AutoCloseable { private final File dataDir; private FullCodegenFixture() throws IOException { + this(4, LmdbLftjFullCodegenCompiler.INSTANCE); + } + + private FullCodegenFixture(int personCount, LmdbLftjCodegenCompiler compiler) throws IOException { dataDir = Files.createTempDirectory("rdf4j-lmdb-full-codegen-test").toFile(); LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); config.setLftjEnabled(true); @@ -963,10 +1131,10 @@ private FullCodegenFixture() throws IOException { config.setForceSync(false); config.setValueDBSize(64L * 1024 * 1024); config.setTripleDBSize(config.getValueDBSize()); - store = new LmdbBenchmarkStore(dataDir, config, LmdbLftjFullCodegenCompiler.INSTANCE); + store = new LmdbBenchmarkStore(dataDir, config, compiler); repository = new SailRepository(store); repository.init(); - populate(repository); + populate(repository, personCount); connection = (LmdbBenchmarkStore.BenchmarkStoreConnection) store.getConnection(); } @@ -980,10 +1148,10 @@ public void close() throws Exception { } } - private static void populate(SailRepository repository) { + private static void populate(SailRepository repository, int personCount) { try (SailRepositoryConnection connection = repository.getConnection()) { - for (long subject = 1; subject <= 4; subject++) { - for (long object = 1; object <= 4; object++) { + for (long subject = 1; subject <= personCount; subject++) { + for (long object = 1; object <= personCount; object++) { if (subject != object) { connection.add(person(subject), FOAF.KNOWS, person(object)); } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java index c8e0877672..2a0d6f01c8 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java @@ -32,11 +32,15 @@ import org.eclipse.rdf4j.query.QueryEvaluationException; import org.eclipse.rdf4j.query.QueryLanguage; import org.eclipse.rdf4j.query.algebra.And; +import org.eclipse.rdf4j.query.algebra.BindingSetAssignment; import org.eclipse.rdf4j.query.algebra.Compare; +import org.eclipse.rdf4j.query.algebra.Distinct; import org.eclipse.rdf4j.query.algebra.Extension; import org.eclipse.rdf4j.query.algebra.ExtensionElem; import org.eclipse.rdf4j.query.algebra.Filter; +import org.eclipse.rdf4j.query.algebra.Group; import org.eclipse.rdf4j.query.algebra.Join; +import org.eclipse.rdf4j.query.algebra.Order; import org.eclipse.rdf4j.query.algebra.Projection; import org.eclipse.rdf4j.query.algebra.ProjectionElem; import org.eclipse.rdf4j.query.algebra.ProjectionElemList; @@ -51,6 +55,7 @@ import org.eclipse.rdf4j.query.impl.EmptyBindingSet; import org.eclipse.rdf4j.query.parser.ParsedTupleQuery; import org.eclipse.rdf4j.query.parser.QueryParserUtil; +import org.eclipse.rdf4j.sail.lmdb.benchmark.FoafCliqueQueryCatalog; import org.junit.jupiter.api.Test; class LmdbLftjOptimizerTest { @@ -275,6 +280,44 @@ void optimizeShouldLeaveChainedAliasExtensionOutsideFusedOutputs() throws Except assertInstanceOf(Projection.class, findNode(tupleExpr, Projection.class)); } + @Test + void optimizeShouldKeepDistinctAndOrderOutsideFusedCycleCore() throws Exception { + TestQueryAccess queryAccess = new TestQueryAccess(); + LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer( + new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess)); + + TupleExpr tupleExpr = parsedQueryRoot( + FoafCliqueQueryCatalog.QueryScenario.CYCLE3_DISTINCT_CITY_ORDERED.query()); + + optimizer.optimize(tupleExpr, (Dataset) null, EmptyBindingSet.getInstance()); + + LmdbLftjTupleExpr lftj = findNode(tupleExpr, LmdbLftjTupleExpr.class); + assertInstanceOf(BindingSetAssignment.class, findNode(tupleExpr, BindingSetAssignment.class)); + assertInstanceOf(Distinct.class, findNode(tupleExpr, Distinct.class)); + assertInstanceOf(Order.class, findNode(tupleExpr, Order.class)); + assertInstanceOf(Filter.class, findNode(tupleExpr, Filter.class)); + assertEquals(List.of(), lftj.plan().inequalityConstraints()); + } + + @Test + void optimizeShouldKeepGroupingOutsideFusedCycleCore() throws Exception { + TestQueryAccess queryAccess = new TestQueryAccess(); + LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer( + new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess)); + + TupleExpr tupleExpr = parsedQueryRoot( + FoafCliqueQueryCatalog.QueryScenario.CYCLE3_GROUPED_INTEREST.query()); + + optimizer.optimize(tupleExpr, (Dataset) null, EmptyBindingSet.getInstance()); + + LmdbLftjTupleExpr lftj = findNode(tupleExpr, LmdbLftjTupleExpr.class); + assertInstanceOf(BindingSetAssignment.class, findNode(tupleExpr, BindingSetAssignment.class)); + assertInstanceOf(Group.class, findNode(tupleExpr, Group.class)); + assertInstanceOf(Order.class, findNode(tupleExpr, Order.class)); + assertInstanceOf(Filter.class, findNode(tupleExpr, Filter.class)); + assertEquals(List.of(), lftj.plan().inequalityConstraints()); + } + private TupleExpr cycle(String a, String b, String c) { StatementPattern pattern1 = statementPattern(a, b); StatementPattern pattern2 = statementPattern(b, c); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueDataGenerator.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueDataGenerator.java index 42be1165c1..18a31d8397 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueDataGenerator.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueDataGenerator.java @@ -15,20 +15,69 @@ import java.util.Collections; import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.Random; import java.util.Set; import org.eclipse.rdf4j.common.transaction.IsolationLevels; import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.model.ValueFactory; import org.eclipse.rdf4j.model.vocabulary.FOAF; import org.eclipse.rdf4j.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.vocabulary.RDFS; import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; final class FoafCliqueDataGenerator { private static final String PERSON_NAMESPACE = "http://example.org/foaf/person/"; + private static final String TYPE_NAMESPACE = FoafCliqueQueryCatalog.META_NAMESPACE + "type/"; private static final int BATCH_SIZE = 10_000; + private static final String[] FIRST_NAMES = { + "Anna", "Ben", "Clara", "Daniel", "Elin", "Farah", "Gustav", "Hana", + "Ida", "Jonas", "Karin", "Lukas", "Mira", "Noah", "Oskar", "Petra" + }; + private static final String[] LAST_NAMES = { + "Berg", "Lind", "Dahl", "Nyman", "Holm", "Aasen", "Svensson", "Olsen", + "Hansen", "Lehto", "Madsen", "Jensen", "Nygaard", "Ranta", "Karlsson", "Eklund" + }; + private static final String[] ALT_LABEL_LANGS = { "sv", "no", "da", "de" }; + private static final String[][] CITY_DATA = { + { "oslo", "Oslo", "nb" }, + { "stockholm", "Stockholm", "sv" }, + { "copenhagen", "Copenhagen", "da" }, + { "helsinki", "Helsinki", "fi" }, + { "bergen", "Bergen", "nn" }, + { "gothenburg", "Gothenburg", "sv" }, + { "aarhus", "Aarhus", "da" }, + { "trondheim", "Trondheim", "nb" } + }; + private static final String[][] ORGANIZATION_DATA = { + { "northGraphLab", "North Graph Lab" }, + { "northDataCollective", "North Data Collective" }, + { "northSemanticsStudio", "North Semantics Studio" }, + { "harborKnowledgeWorks", "Harbor Knowledge Works" }, + { "fjordQuerySystems", "Fjord Query Systems" }, + { "balticReasoningGuild", "Baltic Reasoning Guild" }, + { "arcticOntologyOffice", "Arctic Ontology Office" }, + { "signalWeaveLabs", "Signal Weave Labs" } + }; + private static final String[][] INTEREST_DATA = { + { "rdf", "RDF" }, + { "sparql", "SPARQL" }, + { "knowledgeGraphs", "Knowledge Graphs" }, + { "queryPlanning", "Query Planning" }, + { "java", "Java" }, + { "reasoning", "Reasoning" }, + { "federation", "Federation" }, + { "analytics", "Analytics" } + }; + private static final String[][] ROLE_DATA = { + { "engineer", "Engineer" }, + { "researcher", "Researcher" }, + { "architect", "Architect" }, + { "analyst", "Analyst" } + }; private final int peopleCount; private final int cliquePercentage; @@ -38,6 +87,10 @@ final class FoafCliqueDataGenerator { private final Random random; private final List people = new ArrayList<>(); + private final List organizations = new ArrayList<>(); + private final List cities = new ArrayList<>(); + private final List interests = new ArrayList<>(); + private final List roleTypes = new ArrayList<>(); private final Set knowsEdges = new HashSet<>(); private int pendingStatements; @@ -73,6 +126,7 @@ void populate(SailRepositoryConnection connection) { pendingStatements = 0; connection.begin(IsolationLevels.NONE); try { + createReferenceData(connection, valueFactory); createPeople(connection, valueFactory); createCliques(connection); createRandomKnowsEdges(connection); @@ -85,12 +139,87 @@ void populate(SailRepositoryConnection connection) { } } + private void createReferenceData(SailRepositoryConnection connection, ValueFactory valueFactory) { + cities.clear(); + organizations.clear(); + interests.clear(); + roleTypes.clear(); + + IRI cityType = valueFactory.createIRI(TYPE_NAMESPACE + "City"); + IRI interestType = valueFactory.createIRI(TYPE_NAMESPACE + "Interest"); + IRI roleType = valueFactory.createIRI(TYPE_NAMESPACE + "Role"); + + for (String[] cityData : CITY_DATA) { + IRI city = valueFactory.createIRI(FoafCliqueQueryCatalog.CITY_NAMESPACE + cityData[0]); + cities.add(city); + addStatement(connection, city, RDF.TYPE, cityType); + addStatement(connection, city, RDFS.LABEL, valueFactory.createLiteral(cityData[1], "en")); + addStatement(connection, city, RDFS.LABEL, valueFactory.createLiteral(cityData[1], cityData[2])); + } + + for (String[] roleData : ROLE_DATA) { + IRI role = valueFactory.createIRI(FoafCliqueQueryCatalog.ROLE_NAMESPACE + roleData[0]); + roleTypes.add(role); + addStatement(connection, role, RDF.TYPE, roleType); + addStatement(connection, role, RDFS.LABEL, valueFactory.createLiteral(roleData[1], "en")); + } + + for (int i = 0; i < ORGANIZATION_DATA.length; i++) { + String[] organizationData = ORGANIZATION_DATA[i]; + IRI organization = valueFactory + .createIRI(FoafCliqueQueryCatalog.ORGANIZATION_NAMESPACE + organizationData[0]); + organizations.add(organization); + addStatement(connection, organization, RDF.TYPE, FOAF.ORGANIZATION); + addStatement(connection, organization, FOAF.NAME, valueFactory.createLiteral(organizationData[1])); + addStatement(connection, organization, RDFS.LABEL, valueFactory.createLiteral(organizationData[1], "en")); + addStatement(connection, organization, FOAF.HOMEPAGE, + valueFactory.createIRI("https://" + organizationData[0] + ".example.org")); + addStatement(connection, organization, FOAF.BASED_NEAR, cities.get(i % cities.size())); + } + + for (String[] interestData : INTEREST_DATA) { + IRI interest = valueFactory.createIRI(FoafCliqueQueryCatalog.INTEREST_NAMESPACE + interestData[0]); + interests.add(interest); + addStatement(connection, interest, RDF.TYPE, interestType); + addStatement(connection, interest, RDFS.LABEL, valueFactory.createLiteral(interestData[1], "en")); + } + } + private void createPeople(SailRepositoryConnection connection, ValueFactory valueFactory) { people.clear(); for (int i = 0; i < peopleCount; i++) { IRI person = valueFactory.createIRI(PERSON_NAMESPACE + i); + IRI city = cities.get(i % cities.size()); + IRI roleType = roleTypes.get(i % roleTypes.size()); + IRI primaryInterest = interests.get(i % interests.size()); + IRI secondaryInterest = interests.get((i + 3) % interests.size()); + IRI primaryOrganization = organizations.get(i % organizations.size()); + IRI secondaryOrganization = organizations.get((i + 5) % organizations.size()); + String fullName = composeFullName(i); + String slug = slugify(fullName) + "-" + i; + people.add(person); addStatement(connection, person, RDF.TYPE, FOAF.PERSON); + addStatement(connection, person, RDF.TYPE, roleType); + addStatement(connection, person, RDFS.LABEL, valueFactory.createLiteral(fullName, "en")); + addStatement(connection, person, RDFS.LABEL, + valueFactory.createLiteral(fullName, ALT_LABEL_LANGS[i % ALT_LABEL_LANGS.length])); + addStatement(connection, person, FOAF.NAME, valueFactory.createLiteral(fullName)); + addStatement(connection, person, FOAF.NICK, + valueFactory.createLiteral(FIRST_NAMES[i % FIRST_NAMES.length].toLowerCase(Locale.ROOT) + (i + 1))); + addStatement(connection, person, FOAF.MBOX, valueFactory.createIRI("mailto:" + slug + "@example.org")); + addStatement(connection, person, FOAF.HOMEPAGE, + valueFactory.createIRI("https://people.example.org/" + slug)); + addStatement(connection, person, FOAF.AGE, valueFactory.createLiteral(24 + (i % 35))); + addStatement(connection, person, FOAF.BASED_NEAR, city); + addStatement(connection, person, FOAF.INTEREST, primaryInterest); + if (!primaryInterest.equals(secondaryInterest)) { + addStatement(connection, person, FOAF.INTEREST, secondaryInterest); + } + addStatement(connection, primaryOrganization, FOAF.MEMBER, person); + if (i % 11 == 0 && !primaryOrganization.equals(secondaryOrganization)) { + addStatement(connection, secondaryOrganization, FOAF.MEMBER, person); + } } } @@ -158,7 +287,7 @@ private boolean addKnowsEdge(SailRepositoryConnection connection, int source, in return true; } - private void addStatement(SailRepositoryConnection connection, IRI subject, IRI predicate, IRI object) { + private void addStatement(SailRepositoryConnection connection, IRI subject, IRI predicate, Value object) { connection.add(subject, predicate, object); pendingStatements++; if (pendingStatements >= BATCH_SIZE) { @@ -173,4 +302,26 @@ private void flush(SailRepositoryConnection connection) { } pendingStatements = 0; } + + private String composeFullName(int index) { + String firstName = FIRST_NAMES[index % FIRST_NAMES.length]; + String lastName = LAST_NAMES[(index / FIRST_NAMES.length) % LAST_NAMES.length]; + return firstName + " " + lastName; + } + + private String slugify(String value) { + StringBuilder builder = new StringBuilder(value.length()); + for (int i = 0; i < value.length(); i++) { + char current = Character.toLowerCase(value.charAt(i)); + if ((current >= 'a' && current <= 'z') || (current >= '0' && current <= '9')) { + builder.append(current); + } else if (builder.length() > 0 && builder.charAt(builder.length() - 1) != '-') { + builder.append('-'); + } + } + if (builder.length() > 0 && builder.charAt(builder.length() - 1) == '-') { + builder.setLength(builder.length() - 1); + } + return builder.toString(); + } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java index c17ee0b84f..2f42976b5d 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java @@ -15,12 +15,14 @@ import java.io.File; import java.nio.file.Path; +import java.util.List; import org.eclipse.rdf4j.repository.Repository; import org.eclipse.rdf4j.repository.RepositoryConnection; import org.eclipse.rdf4j.repository.sail.SailRepository; import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; import org.eclipse.rdf4j.sail.lmdb.LmdbStore; +import org.eclipse.rdf4j.sail.lmdb.benchmark.FoafCliqueQueryCatalog.QueryScenario; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -28,21 +30,16 @@ class FoafCliqueLftjCorrectnessTest { @Test - void cycle3ShouldMatchRegularJoinCount(@TempDir Path tempDir) { - assertCycleCountMatches(tempDir, 3); + void baselineCycleQueriesShouldMatchRegularJoinCount(@TempDir Path tempDir) { + assertQueriesMatch(tempDir, FoafCliqueQueryCatalog.baselineScenarios()); } @Test - void cycle4ShouldMatchRegularJoinCount(@TempDir Path tempDir) { - assertCycleCountMatches(tempDir, 4); + void mixedCycleQueriesShouldMatchRegularJoinCount(@TempDir Path tempDir) { + assertQueriesMatch(tempDir, FoafCliqueQueryCatalog.mixedScenarios()); } - @Test - void cycle5ShouldMatchRegularJoinCount(@TempDir Path tempDir) { - assertCycleCountMatches(tempDir, 5); - } - - private void assertCycleCountMatches(Path tempDir, int cycleSize) { + private void assertQueriesMatch(Path tempDir, List scenarios) { Repository fallbackRepository = createRepository(tempDir.resolve("fallback").toFile(), false, false); Repository interpretedRepository = createRepository(tempDir.resolve("interpreted").toFile(), true, false); Repository compiledRepository = createRepository(tempDir.resolve("compiled").toFile(), true, true); @@ -52,13 +49,16 @@ private void assertCycleCountMatches(Path tempDir, int cycleSize) { populate(interpretedRepository); populate(compiledRepository); - long expected = executeCount(fallbackRepository, cycleQuery(cycleSize)); - long interpreted = executeCount(interpretedRepository, cycleQuery(cycleSize)); - long compiled = executeCount(compiledRepository, cycleQuery(cycleSize)); + for (QueryScenario scenario : scenarios) { + long expected = executeCount(fallbackRepository, scenario.query()); + long interpreted = executeCount(interpretedRepository, scenario.query()); + long compiled = executeCount(compiledRepository, scenario.query()); - assertEquals(expected, interpreted, - "Interpreted LFTJ must preserve the cycle" + cycleSize + " result count"); - assertEquals(expected, compiled, "Compiled LFTJ must preserve the cycle" + cycleSize + " result count"); + assertEquals(expected, interpreted, + "Interpreted LFTJ must preserve the " + scenario.benchmarkMethodName() + " result count"); + assertEquals(expected, compiled, + "Compiled LFTJ must preserve the " + scenario.benchmarkMethodName() + " result count"); + } } finally { fallbackRepository.shutDown(); interpretedRepository.shutDown(); @@ -90,35 +90,4 @@ private long executeCount(Repository repository, String query) { return connection.prepareTupleQuery(query).evaluate().stream().count(); } } - - private static String cycleQuery(int size) { - StringBuilder builder = new StringBuilder(); - builder.append("PREFIX foaf: \n"); - builder.append("SELECT * WHERE {\n"); - for (int i = 0; i < size; i++) { - builder.append(" ?") - .append(variableName(i)) - .append(" foaf:knows ?") - .append(variableName((i + 1) % size)) - .append(" .\n"); - } - builder.append(" FILTER ("); - boolean first = true; - for (int i = 0; i < size; i++) { - for (int j = i + 1; j < size; j++) { - if (!first) { - builder.append(" && "); - } - builder.append("?").append(variableName(i)).append(" != ?").append(variableName(j)); - first = false; - } - } - builder.append(")\n"); - builder.append("}\n"); - return builder.toString(); - } - - private static char variableName(int index) { - return (char) ('a' + index); - } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java index f0dd048ab8..f5e3d31cc9 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java @@ -21,6 +21,7 @@ import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; import org.eclipse.rdf4j.sail.lmdb.LmdbBenchmarkStore; import org.eclipse.rdf4j.sail.lmdb.LmdbLftjBenchmarkMode; +import org.eclipse.rdf4j.sail.lmdb.benchmark.FoafCliqueQueryCatalog.QueryScenario; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -40,16 +41,14 @@ import org.openjdk.jmh.runner.options.OptionsBuilder; @State(Scope.Benchmark) -@Warmup(iterations = 30, time = 1, timeUnit = TimeUnit.SECONDS) +@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) @BenchmarkMode(Mode.AverageTime) @Fork(value = 1, jvmArgs = { "-Xms2G", "-Xmx2G", "-XX:+UseG1GC" }) -@Measurement(iterations = 3, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) @OutputTimeUnit(TimeUnit.MILLISECONDS) public class FoafCliqueQueryBenchmark { - private static final String QUERY_CYCLE_3 = cycleQuery(3); - private static final String QUERY_CYCLE_4 = cycleQuery(4); - private static final String QUERY_CYCLE_5 = cycleQuery(5); + public static final String LFTJ_DISABLED = "disabled"; @Param({ "5000" }) public int peopleCount; @@ -69,8 +68,7 @@ public class FoafCliqueQueryBenchmark { @Param({ "12345" }) public long seed; -// @Param({ "interpreted", "executor_codegen", "full_codegen" }) - @Param({ "full_codegen" }) + @Param({ "interpreted", "executor_codegen", "full_codegen", LFTJ_DISABLED }) public String benchmarkMode; private File dataDir; @@ -85,10 +83,10 @@ public static void main(String[] args) throws RunnerException { @Setup(Level.Trial) public void setup() throws IOException { - LmdbLftjBenchmarkMode.validate(benchmarkMode); + validateBenchmarkMode(benchmarkMode); dataDir = Files.createTempDirectory("rdf4j-lmdb-foaf-cliques").toFile(); repository = new SailRepository(new LmdbBenchmarkStore(dataDir, createLftjBenchmarkConfig(benchmarkMode), - LmdbLftjBenchmarkMode.compiler(benchmarkMode))); + LFTJ_DISABLED.equals(benchmarkMode) ? null : LmdbLftjBenchmarkMode.compiler(benchmarkMode))); repository.init(); try (SailRepositoryConnection connection = repository.getConnection()) { @@ -109,17 +107,58 @@ public void tearDown() throws IOException { @Benchmark public long cycle3() { - return executeCount(QUERY_CYCLE_3); + return executeCount(QueryScenario.CYCLE3.query()); } @Benchmark public long cycle4() { - return executeCount(QUERY_CYCLE_4); + return executeCount(QueryScenario.CYCLE4.query()); } @Benchmark public long cycle5() { - return executeCount(QUERY_CYCLE_5); + return executeCount(QueryScenario.CYCLE5.query()); + } + + @Benchmark + public long cycle3DistinctCityOrdered() { + return executeCount(QueryScenario.CYCLE3_DISTINCT_CITY_ORDERED.query()); + } + + @Benchmark + public long cycle4ValuesFilteredOrdered() { + return executeCount(QueryScenario.CYCLE4_VALUES_FILTERED_ORDERED.query()); + } + + @Benchmark + public long cycle3GroupedInterest() { + return executeCount(QueryScenario.CYCLE3_GROUPED_INTEREST.query()); + } + + @Benchmark + public long cycle5ValuesDistinctMailboxOrdered() { + return executeCount(QueryScenario.CYCLE5_VALUES_DISTINCT_MAILBOX_ORDERED.query()); + } + + public long executeScenario(QueryScenario scenario) { + switch (scenario) { + case CYCLE3: + return cycle3(); + case CYCLE4: + return cycle4(); + case CYCLE5: + return cycle5(); + case CYCLE3_DISTINCT_CITY_ORDERED: + return cycle3DistinctCityOrdered(); + case CYCLE4_VALUES_FILTERED_ORDERED: + return cycle4ValuesFilteredOrdered(); + case CYCLE3_GROUPED_INTEREST: + return cycle3GroupedInterest(); + case CYCLE5_VALUES_DISTINCT_MAILBOX_ORDERED: + return cycle5ValuesDistinctMailboxOrdered(); + default: + throw new IllegalArgumentException("Unsupported benchmark scenario: " + scenario); + } } private long executeCount(String query) { @@ -130,42 +169,19 @@ private long executeCount(String query) { private static LmdbStoreConfig createLftjBenchmarkConfig(String benchmarkMode) { LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); - config.setLftjEnabled(true); - config.setLftjCodegenEnabled(LmdbLftjBenchmarkMode.lftjCodegenEnabled(benchmarkMode)); + boolean lftjEnabled = !LFTJ_DISABLED.equals(benchmarkMode); + config.setLftjEnabled(lftjEnabled); + config.setLftjCodegenEnabled(lftjEnabled && LmdbLftjBenchmarkMode.lftjCodegenEnabled(benchmarkMode)); config.setForceSync(false); config.setValueDBSize(1_073_741_824L); config.setTripleDBSize(config.getValueDBSize()); return config; } - private static String cycleQuery(int size) { - StringBuilder builder = new StringBuilder(); - builder.append("PREFIX foaf: \n"); - builder.append("SELECT * WHERE {\n"); - for (int i = 0; i < size; i++) { - builder.append(" ?") - .append(variableName(i)) - .append(" foaf:knows ?") - .append(variableName((i + 1) % size)) - .append(" .\n"); - } - builder.append(" FILTER ("); - boolean first = true; - for (int i = 0; i < size; i++) { - for (int j = i + 1; j < size; j++) { - if (!first) { - builder.append(" && "); - } - builder.append("?").append(variableName(i)).append(" != ?").append(variableName(j)); - first = false; - } + private static void validateBenchmarkMode(String benchmarkMode) { + if (LFTJ_DISABLED.equals(benchmarkMode)) { + return; } - builder.append(")\n"); - builder.append("}\n"); - return builder.toString(); - } - - private static char variableName(int index) { - return (char) ('a' + index); + LmdbLftjBenchmarkMode.validate(benchmarkMode); } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryCatalog.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryCatalog.java new file mode 100644 index 0000000000..7f2d255249 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryCatalog.java @@ -0,0 +1,176 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb.benchmark; + +import java.util.List; + +public final class FoafCliqueQueryCatalog { + + static final String META_NAMESPACE = "http://example.org/foaf/meta/"; + static final String ORGANIZATION_NAMESPACE = META_NAMESPACE + "organization/"; + static final String CITY_NAMESPACE = META_NAMESPACE + "city/"; + static final String INTEREST_NAMESPACE = META_NAMESPACE + "interest/"; + static final String ROLE_NAMESPACE = META_NAMESPACE + "role/"; + + private static final String PREFIXES = "PREFIX foaf: \n" + + "PREFIX rdfs: \n" + + "PREFIX excity: <" + CITY_NAMESPACE + ">\n" + + "PREFIX exinterest: <" + INTEREST_NAMESPACE + ">\n" + + "PREFIX exrole: <" + ROLE_NAMESPACE + ">\n"; + + public enum QueryScenario { + CYCLE3("cycle3", cycleQuery(3)), + CYCLE4("cycle4", cycleQuery(4)), + CYCLE5("cycle5", cycleQuery(5)), + CYCLE3_DISTINCT_CITY_ORDERED("cycle3DistinctCityOrdered", cycle3DistinctCityOrderedQuery()), + CYCLE4_VALUES_FILTERED_ORDERED("cycle4ValuesFilteredOrdered", cycle4ValuesFilteredOrderedQuery()), + CYCLE3_GROUPED_INTEREST("cycle3GroupedInterest", cycle3GroupedInterestQuery()), + CYCLE5_VALUES_DISTINCT_MAILBOX_ORDERED("cycle5ValuesDistinctMailboxOrdered", + cycle5ValuesDistinctMailboxOrderedQuery()); + + private final String benchmarkMethodName; + private final String query; + + QueryScenario(String benchmarkMethodName, String query) { + this.benchmarkMethodName = benchmarkMethodName; + this.query = query; + } + + public String benchmarkMethodName() { + return benchmarkMethodName; + } + + public String query() { + return query; + } + } + + private FoafCliqueQueryCatalog() { + } + + public static List baselineScenarios() { + return List.of( + QueryScenario.CYCLE3, + QueryScenario.CYCLE4, + QueryScenario.CYCLE5); + } + + public static List mixedScenarios() { + return List.of( + QueryScenario.CYCLE3_DISTINCT_CITY_ORDERED, + QueryScenario.CYCLE4_VALUES_FILTERED_ORDERED, + QueryScenario.CYCLE3_GROUPED_INTEREST, + QueryScenario.CYCLE5_VALUES_DISTINCT_MAILBOX_ORDERED); + } + + public static List allScenarios() { + return List.of(QueryScenario.values()); + } + + public static String cycleQuery(int size) { + return PREFIXES + + "SELECT * WHERE {\n" + + cyclePattern(size) + + inequalityFilter(size) + + "}\n"; + } + + private static String cycle3DistinctCityOrderedQuery() { + return PREFIXES + + "SELECT DISTINCT ?a ?aLabel ?cityLabel WHERE {\n" + + " VALUES ?interest { exinterest:rdf exinterest:sparql exinterest:queryPlanning }\n" + + cyclePattern(3) + + inequalityFilter(3) + + " ?a foaf:interest ?interest ;\n" + + " foaf:based_near ?city ;\n" + + " rdfs:label ?aLabel .\n" + + " ?city rdfs:label ?cityLabel .\n" + + " FILTER (lang(?aLabel) = \"en\" && lang(?cityLabel) = \"en\")\n" + + "}\n" + + "ORDER BY ?cityLabel ?aLabel\n"; + } + + private static String cycle4ValuesFilteredOrderedQuery() { + return PREFIXES + + "SELECT DISTINCT ?a ?age ?homepage WHERE {\n" + + " VALUES ?role { exrole:engineer exrole:researcher }\n" + + cyclePattern(4) + + inequalityFilter(4) + + " ?a a ?role ;\n" + + " foaf:age ?age ;\n" + + " foaf:homepage ?homepage .\n" + + " FILTER (?age >= 30 && CONTAINS(LCASE(STR(?homepage)), \"/people.example.org/\"))\n" + + "}\n" + + "ORDER BY DESC(?age) ?a\n"; + } + + private static String cycle3GroupedInterestQuery() { + return PREFIXES + + "SELECT ?interest (COUNT(DISTINCT ?a) AS ?memberCount) WHERE {\n" + + " VALUES ?interest { exinterest:rdf exinterest:sparql exinterest:knowledgeGraphs }\n" + + cyclePattern(3) + + inequalityFilter(3) + + " ?a foaf:interest ?interest .\n" + + "}\n" + + "GROUP BY ?interest\n" + + "HAVING (COUNT(DISTINCT ?a) >= 2)\n" + + "ORDER BY DESC(?memberCount) ?interest\n"; + } + + private static String cycle5ValuesDistinctMailboxOrderedQuery() { + return PREFIXES + + "SELECT DISTINCT ?a ?aLabel ?homepage WHERE {\n" + + " VALUES ?city { excity:oslo excity:stockholm excity:copenhagen excity:helsinki }\n" + + cyclePattern(5) + + inequalityFilter(5) + + " ?a foaf:based_near ?city ;\n" + + " rdfs:label ?aLabel ;\n" + + " foaf:mbox ?mbox ;\n" + + " foaf:homepage ?homepage .\n" + + " FILTER (lang(?aLabel) = \"en\" && CONTAINS(LCASE(STR(?mbox)), \"@example.org\"))\n" + + "}\n" + + "ORDER BY ?aLabel\n"; + } + + private static String cyclePattern(int size) { + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < size; i++) { + builder.append(" ?") + .append(variableName(i)) + .append(" foaf:knows ?") + .append(variableName((i + 1) % size)) + .append(" .\n"); + } + return builder.toString(); + } + + private static String inequalityFilter(int size) { + StringBuilder builder = new StringBuilder(); + builder.append(" FILTER ("); + boolean first = true; + for (int i = 0; i < size; i++) { + for (int j = i + 1; j < size; j++) { + if (!first) { + builder.append(" && "); + } + builder.append("?").append(variableName(i)).append(" != ?").append(variableName(j)); + first = false; + } + } + builder.append(")\n"); + return builder.toString(); + } + + private static char variableName(int index) { + return (char) ('a' + index); + } +} From ebbe896152538f972d390b9a6d05cec024220b7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Mon, 6 Apr 2026 08:24:02 +0200 Subject: [PATCH 23/32] updated results --- .../FoafCliqueQueryBenchmarkResults.md | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md index 49cdf1950c..228050acdf 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md @@ -17,3 +17,35 @@ FoafCliqueQueryBenchmark.cycle5 interpreted 30 FoafCliqueQueryBenchmark.cycle5 executor_codegen 30 8 3 5000 15000 12345 avgt 3 663.095 ± 236.201 ms/op FoafCliqueQueryBenchmark.cycle5 full_codegen 30 8 3 5000 15000 12345 avgt 3 520.210 ± 120.747 ms/op ``` + +``` +Benchmark (benchmarkMode) (cliquePercentage) (maxCliqueSize) (minCliqueSize) (peopleCount) (randomKnowsEdges) (seed) Mode Cnt Score Error Units +FoafCliqueQueryBenchmark.cycle3 interpreted 30 8 3 5000 15000 12345 avgt 5 21.555 ± 3.110 ms/op +FoafCliqueQueryBenchmark.cycle3 executor_codegen 30 8 3 5000 15000 12345 avgt 5 18.401 ± 1.086 ms/op +FoafCliqueQueryBenchmark.cycle3 full_codegen 30 8 3 5000 15000 12345 avgt 5 14.880 ± 1.150 ms/op +FoafCliqueQueryBenchmark.cycle3 disabled 30 8 3 5000 15000 12345 avgt 5 101.933 ± 4.997 ms/op +FoafCliqueQueryBenchmark.cycle3DistinctCityOrdered interpreted 30 8 3 5000 15000 12345 avgt 5 146.240 ± 9.376 ms/op +FoafCliqueQueryBenchmark.cycle3DistinctCityOrdered executor_codegen 30 8 3 5000 15000 12345 avgt 5 144.603 ± 27.824 ms/op +FoafCliqueQueryBenchmark.cycle3DistinctCityOrdered full_codegen 30 8 3 5000 15000 12345 avgt 5 110.024 ± 13.207 ms/op +FoafCliqueQueryBenchmark.cycle3DistinctCityOrdered disabled 30 8 3 5000 15000 12345 avgt 5 102.194 ± 7.423 ms/op +FoafCliqueQueryBenchmark.cycle3GroupedInterest interpreted 30 8 3 5000 15000 12345 avgt 5 54.924 ± 6.028 ms/op +FoafCliqueQueryBenchmark.cycle3GroupedInterest executor_codegen 30 8 3 5000 15000 12345 avgt 5 55.310 ± 5.366 ms/op +FoafCliqueQueryBenchmark.cycle3GroupedInterest full_codegen 30 8 3 5000 15000 12345 avgt 5 43.784 ± 0.896 ms/op +FoafCliqueQueryBenchmark.cycle3GroupedInterest disabled 30 8 3 5000 15000 12345 avgt 5 75.099 ± 1.030 ms/op +FoafCliqueQueryBenchmark.cycle4 interpreted 30 8 3 5000 15000 12345 avgt 5 88.792 ± 4.729 ms/op +FoafCliqueQueryBenchmark.cycle4 executor_codegen 30 8 3 5000 15000 12345 avgt 5 64.010 ± 1.059 ms/op +FoafCliqueQueryBenchmark.cycle4 full_codegen 30 8 3 5000 15000 12345 avgt 5 60.457 ± 2.042 ms/op +FoafCliqueQueryBenchmark.cycle4 disabled 30 8 3 5000 15000 12345 avgt 5 670.650 ± 7.958 ms/op +FoafCliqueQueryBenchmark.cycle4ValuesFilteredOrdered interpreted 30 8 3 5000 15000 12345 avgt 5 247.547 ± 20.613 ms/op +FoafCliqueQueryBenchmark.cycle4ValuesFilteredOrdered executor_codegen 30 8 3 5000 15000 12345 avgt 5 224.290 ± 31.904 ms/op +FoafCliqueQueryBenchmark.cycle4ValuesFilteredOrdered full_codegen 30 8 3 5000 15000 12345 avgt 5 188.459 ± 29.016 ms/op +FoafCliqueQueryBenchmark.cycle4ValuesFilteredOrdered disabled 30 8 3 5000 15000 12345 avgt 5 298.370 ± 15.842 ms/op +FoafCliqueQueryBenchmark.cycle5 interpreted 30 8 3 5000 15000 12345 avgt 5 481.559 ± 33.314 ms/op +FoafCliqueQueryBenchmark.cycle5 executor_codegen 30 8 3 5000 15000 12345 avgt 5 324.419 ± 51.822 ms/op +FoafCliqueQueryBenchmark.cycle5 full_codegen 30 8 3 5000 15000 12345 avgt 5 268.049 ± 4.831 ms/op +FoafCliqueQueryBenchmark.cycle5 disabled 30 8 3 5000 15000 12345 avgt 5 4189.973 ± 33.778 ms/op +FoafCliqueQueryBenchmark.cycle5ValuesDistinctMailboxOrdered interpreted 30 8 3 5000 15000 12345 avgt 5 3545.949 ± 193.672 ms/op +FoafCliqueQueryBenchmark.cycle5ValuesDistinctMailboxOrdered executor_codegen 30 8 3 5000 15000 12345 avgt 5 2919.335 ± 32.839 ms/op +FoafCliqueQueryBenchmark.cycle5ValuesDistinctMailboxOrdered full_codegen 30 8 3 5000 15000 12345 avgt 5 2135.299 ± 45.432 ms/op +FoafCliqueQueryBenchmark.cycle5ValuesDistinctMailboxOrdered disabled 30 8 3 5000 15000 12345 avgt 5 2163.481 ± 82.503 ms/op +``` From 3d4fb7949d10ae8a467edeaaee3a58b294dcc28f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Mon, 6 Apr 2026 08:31:58 +0200 Subject: [PATCH 24/32] more tests --- .../FoafCliqueLftjCorrectnessTest.java | 194 ++++++++++++++---- .../benchmark/FoafCliqueQueryBenchmark.java | 23 ++- 2 files changed, 173 insertions(+), 44 deletions(-) diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java index 2f42976b5d..54619d2c0a 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueLftjCorrectnessTest.java @@ -14,74 +14,129 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.nio.file.Path; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.EnumMap; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.commons.io.FileUtils; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.repository.Repository; import org.eclipse.rdf4j.repository.RepositoryConnection; import org.eclipse.rdf4j.repository.sail.SailRepository; import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; -import org.eclipse.rdf4j.sail.lmdb.LmdbStore; +import org.eclipse.rdf4j.rio.helpers.NTriplesUtil; +import org.eclipse.rdf4j.sail.lmdb.LmdbLftjBenchmarkMode; import org.eclipse.rdf4j.sail.lmdb.benchmark.FoafCliqueQueryCatalog.QueryScenario; -import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +@TestInstance(TestInstance.Lifecycle.PER_CLASS) class FoafCliqueLftjCorrectnessTest { + private static final int PEOPLE_COUNT = 300; + private static final int CLIQUE_PERCENTAGE = 30; + private static final int MIN_CLIQUE_SIZE = 3; + private static final int MAX_CLIQUE_SIZE = 6; + private static final int RANDOM_KNOWS_EDGES = 900; + private static final long SEED = 12345L; + + private final Map repositories = new LinkedHashMap<>(); + private final Map> queryHashes = new EnumMap<>(QueryScenario.class); + + private Path tempDir; + + @BeforeAll + void setUp() throws IOException { + tempDir = Files.createTempDirectory("rdf4j-lmdb-foaf-clique-correctness"); + for (String benchmarkMode : FoafCliqueQueryBenchmark.benchmarkModes()) { + Repository repository = createRepository(tempDir.resolve(benchmarkMode).toFile(), benchmarkMode); + populate(repository); + repositories.put(benchmarkMode, repository); + } + for (QueryScenario scenario : FoafCliqueQueryCatalog.allScenarios()) { + Map hashesByMode = new LinkedHashMap<>(); + for (String benchmarkMode : FoafCliqueQueryBenchmark.benchmarkModes()) { + hashesByMode.put(benchmarkMode, executeResultSetHash(repositoryFor(benchmarkMode), scenario.query())); + } + queryHashes.put(scenario, hashesByMode); + } + } + + @AfterAll + void tearDown() throws IOException { + for (Repository repository : repositories.values()) { + repository.shutDown(); + } + if (tempDir != null) { + FileUtils.deleteDirectory(tempDir.toFile()); + } + } + @Test - void baselineCycleQueriesShouldMatchRegularJoinCount(@TempDir Path tempDir) { - assertQueriesMatch(tempDir, FoafCliqueQueryCatalog.baselineScenarios()); + void baselineCycleQueriesShouldMatchRegularJoinCount() { + assertQueriesMatch(FoafCliqueQueryCatalog.baselineScenarios()); } @Test - void mixedCycleQueriesShouldMatchRegularJoinCount(@TempDir Path tempDir) { - assertQueriesMatch(tempDir, FoafCliqueQueryCatalog.mixedScenarios()); + void mixedCycleQueriesShouldMatchRegularJoinCount() { + assertQueriesMatch(FoafCliqueQueryCatalog.mixedScenarios()); } - private void assertQueriesMatch(Path tempDir, List scenarios) { - Repository fallbackRepository = createRepository(tempDir.resolve("fallback").toFile(), false, false); - Repository interpretedRepository = createRepository(tempDir.resolve("interpreted").toFile(), true, false); - Repository compiledRepository = createRepository(tempDir.resolve("compiled").toFile(), true, true); + @ParameterizedTest(name = "{0} / {1}") + @MethodSource("queryAndModeArguments") + void eachQueryAndModeShouldProduceTheSameResultHash(QueryScenario scenario, String benchmarkMode) { + assertEquals(hashFor(scenario, FoafCliqueQueryBenchmark.LFTJ_DISABLED), hashFor(scenario, benchmarkMode), + benchmarkMode + " must preserve the full result set for " + scenario.benchmarkMethodName()); + } - try { - populate(fallbackRepository); - populate(interpretedRepository); - populate(compiledRepository); - - for (QueryScenario scenario : scenarios) { - long expected = executeCount(fallbackRepository, scenario.query()); - long interpreted = executeCount(interpretedRepository, scenario.query()); - long compiled = executeCount(compiledRepository, scenario.query()); - - assertEquals(expected, interpreted, - "Interpreted LFTJ must preserve the " + scenario.benchmarkMethodName() + " result count"); - assertEquals(expected, compiled, - "Compiled LFTJ must preserve the " + scenario.benchmarkMethodName() + " result count"); + static Stream queryAndModeArguments() { + return FoafCliqueQueryCatalog.allScenarios() + .stream() + .flatMap(scenario -> FoafCliqueQueryBenchmark.benchmarkModes() + .stream() + .map(benchmarkMode -> Arguments.of(scenario, benchmarkMode))); + } + + private void assertQueriesMatch(List scenarios) { + Repository fallbackRepository = repositoryFor(FoafCliqueQueryBenchmark.LFTJ_DISABLED); + for (QueryScenario scenario : scenarios) { + long expected = executeCount(fallbackRepository, scenario.query()); + for (String benchmarkMode : List.of( + LmdbLftjBenchmarkMode.INTERPRETED, + LmdbLftjBenchmarkMode.EXECUTOR_CODEGEN, + LmdbLftjBenchmarkMode.FULL_CODEGEN)) { + assertEquals(expected, executeCount(repositoryFor(benchmarkMode), scenario.query()), + benchmarkMode + " must preserve the " + scenario.benchmarkMethodName() + " result count"); } - } finally { - fallbackRepository.shutDown(); - interpretedRepository.shutDown(); - compiledRepository.shutDown(); } } - private Repository createRepository(File dataDir, boolean lftjEnabled, boolean lftjCodegenEnabled) { - LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); - config.setLftjEnabled(lftjEnabled); - config.setLftjCodegenEnabled(lftjCodegenEnabled); - config.setForceSync(false); - config.setValueDBSize(1_073_741_824L); - config.setTripleDBSize(config.getValueDBSize()); - - Repository repository = new SailRepository(new LmdbStore(dataDir, config)); + private Repository createRepository(File dataDir, String benchmarkMode) { + SailRepository repository = FoafCliqueQueryBenchmark.createRepository(dataDir, benchmarkMode); repository.init(); return repository; } private void populate(Repository repository) { try (SailRepositoryConnection connection = (SailRepositoryConnection) repository.getConnection()) { - new FoafCliqueDataGenerator(300, 30, 3, 6, 900, 12345L).populate(connection); + new FoafCliqueDataGenerator(PEOPLE_COUNT, CLIQUE_PERCENTAGE, MIN_CLIQUE_SIZE, MAX_CLIQUE_SIZE, + RANDOM_KNOWS_EDGES, SEED).populate(connection); } } @@ -90,4 +145,65 @@ private long executeCount(Repository repository, String query) { return connection.prepareTupleQuery(query).evaluate().stream().count(); } } + + private String executeResultSetHash(Repository repository, String query) { + List rows = new ArrayList<>(); + try (RepositoryConnection connection = repository.getConnection()) { + connection.prepareTupleQuery(query) + .evaluate() + .forEach(bindingSet -> rows.add(bindingSetToString(bindingSet))); + } + rows.sort(String::compareTo); + return hash(rows); + } + + private String bindingSetToString(BindingSet bindingSet) { + return bindingSet.getBindingNames() + .stream() + .sorted() + .map(name -> name + "=" + valueToString(bindingSet.getValue(name))) + .collect(Collectors.joining("|")); + } + + private String valueToString(Value value) { + return NTriplesUtil.toNTriplesString(value); + } + + private String hash(List rows) { + MessageDigest digest = newSha256Digest(); + for (String row : rows) { + digest.update(row.getBytes(StandardCharsets.UTF_8)); + digest.update((byte) '\n'); + } + return toHex(digest.digest()); + } + + private MessageDigest newSha256Digest() { + try { + return MessageDigest.getInstance("SHA-256"); + } catch (NoSuchAlgorithmException e) { + throw new IllegalStateException("Missing SHA-256 digest", e); + } + } + + private String toHex(byte[] bytes) { + StringBuilder builder = new StringBuilder(bytes.length * 2); + for (byte b : bytes) { + builder.append(Character.forDigit((b >>> 4) & 0x0F, 16)); + builder.append(Character.forDigit(b & 0x0F, 16)); + } + return builder.toString(); + } + + private Repository repositoryFor(String benchmarkMode) { + Repository repository = repositories.get(benchmarkMode); + if (repository == null) { + throw new IllegalArgumentException("Unknown benchmark mode: " + benchmarkMode); + } + return repository; + } + + private String hashFor(QueryScenario scenario, String benchmarkMode) { + return queryHashes.get(scenario).get(benchmarkMode); + } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java index f5e3d31cc9..ffac0df70e 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java @@ -14,6 +14,7 @@ import java.io.File; import java.io.IOException; import java.nio.file.Files; +import java.util.List; import java.util.concurrent.TimeUnit; import org.apache.commons.io.FileUtils; @@ -81,12 +82,24 @@ public static void main(String[] args) throws RunnerException { .build()).run(); } + static List benchmarkModes() { + return List.of( + LmdbLftjBenchmarkMode.INTERPRETED, + LmdbLftjBenchmarkMode.EXECUTOR_CODEGEN, + LmdbLftjBenchmarkMode.FULL_CODEGEN, + LFTJ_DISABLED); + } + + static SailRepository createRepository(File dataDir, String benchmarkMode) { + validateBenchmarkMode(benchmarkMode); + return new SailRepository(new LmdbBenchmarkStore(dataDir, createLftjBenchmarkConfig(benchmarkMode), + LFTJ_DISABLED.equals(benchmarkMode) ? null : LmdbLftjBenchmarkMode.compiler(benchmarkMode))); + } + @Setup(Level.Trial) public void setup() throws IOException { - validateBenchmarkMode(benchmarkMode); dataDir = Files.createTempDirectory("rdf4j-lmdb-foaf-cliques").toFile(); - repository = new SailRepository(new LmdbBenchmarkStore(dataDir, createLftjBenchmarkConfig(benchmarkMode), - LFTJ_DISABLED.equals(benchmarkMode) ? null : LmdbLftjBenchmarkMode.compiler(benchmarkMode))); + repository = createRepository(dataDir, benchmarkMode); repository.init(); try (SailRepositoryConnection connection = repository.getConnection()) { @@ -167,7 +180,7 @@ private long executeCount(String query) { } } - private static LmdbStoreConfig createLftjBenchmarkConfig(String benchmarkMode) { + static LmdbStoreConfig createLftjBenchmarkConfig(String benchmarkMode) { LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); boolean lftjEnabled = !LFTJ_DISABLED.equals(benchmarkMode); config.setLftjEnabled(lftjEnabled); @@ -178,7 +191,7 @@ private static LmdbStoreConfig createLftjBenchmarkConfig(String benchmarkMode) { return config; } - private static void validateBenchmarkMode(String benchmarkMode) { + static void validateBenchmarkMode(String benchmarkMode) { if (LFTJ_DISABLED.equals(benchmarkMode)) { return; } From 02fce3c72bb15b3783fdebea0082bdbc8126eadd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Mon, 6 Apr 2026 09:06:27 +0200 Subject: [PATCH 25/32] improve skill --- .codex/skills/high-performance-java/SKILL.md | 63 ++++- .../high-performance-java/agents/openai.yaml | 4 +- .../references/advanced-coding-techniques.md | 220 ++++++++++++++++++ .../references/algorithms-data-structures.md | 181 ++++++++++++++ .../high-performance-java-libraries.md | 198 ++++++++++++++++ 5 files changed, 655 insertions(+), 11 deletions(-) create mode 100644 .codex/skills/high-performance-java/references/advanced-coding-techniques.md create mode 100644 .codex/skills/high-performance-java/references/algorithms-data-structures.md create mode 100644 .codex/skills/high-performance-java/references/high-performance-java-libraries.md diff --git a/.codex/skills/high-performance-java/SKILL.md b/.codex/skills/high-performance-java/SKILL.md index 501990d804..cae97aba2b 100644 --- a/.codex/skills/high-performance-java/SKILL.md +++ b/.codex/skills/high-performance-java/SKILL.md @@ -1,28 +1,33 @@ --- name: high-performance-java -description: Use when writing, reviewing, or reshaping HotSpot Java where throughput, latency, allocation rate, zero-copy, lazy evaluation, non-materialization, intrinsics, SuperWord auto-vectorization, or C2 assembly matter. Bias toward specialized hot-path code, then ground claims in benchmarks and JIT evidence. +description: Use when writing, reviewing, or reshaping HotSpot Java where algorithmic complexity, data-structure choice, throughput, latency, allocation rate, zero-copy, lazy evaluation, non-materialization, primitive collections, performance libraries, intrinsics, SuperWord auto-vectorization, or C2 assembly matter. Also use for advanced algorithmic problem solving in Java, including dynamic programming, graph/range techniques, and cache-aware code shape. Bias toward asymptotic wins first, then specialized hot-path code, then benchmark and JIT evidence. --- # High-Performance Java -Use this skill for Java hot paths. Default bias: fewer allocations, fewer copies, less polymorphism, narrower code shape, stronger evidence. +Use this skill for Java hot paths and algorithm-heavy Java. Default bias: asymptotic win first, then fewer allocations, fewer copies, less polymorphism, narrower code shape, stronger evidence. HotSpot-only v1. Baseline assumptions: - repo baseline: JDK 21 - current local runtime may be newer - low-level claims stay provisional until benchmark + JIT evidence agree +- algorithm/data-structure claims stay provisional until they match the actual workload constraints ## Core loop -1. Identify the workload shape. -2. Find the hot loop or hot call chain. -3. Write the narrow fast path first. -4. Push generic abstraction, materialization, and dispatch out of the loop. -5. Benchmark before claiming improvement. -6. Inspect HotSpot decisions before claiming JVM-level reasons. +1. Identify the workload shape and constraints. +2. Pick the algorithm and data structure that change the slope. +3. Find the hot loop or hot call chain. +4. Write the narrow fast path first. +5. Push generic abstraction, materialization, and dispatch out of the loop. +6. Benchmark before claiming improvement. +7. Inspect HotSpot decisions before claiming JVM-level reasons. ## Default coding bias +- Prefer an algorithmic win over a micro win. +- Prefer data structures that fit the operation mix, memory budget, and key domain. +- Prefer primitive-friendly layouts before boxed object graphs. - Prefer zero-copy over copy-transform-copy. - Prefer reuse over per-item allocation. - Prefer lazy traversal over full materialization. @@ -33,15 +38,23 @@ HotSpot-only v1. Baseline assumptions: ## Hard rules +- Do not micro-optimize a fundamentally wrong algorithm. - Do not defend a perf change with style arguments alone. - Do not claim “faster” without a measurement path. - Do not claim “JIT will optimize this” without checking inlining / compilation evidence. +- Do not add a specialized library until you know what property it buys: fewer allocations, fewer copies, lower contention, off-heap layout, better primitive support, or a stronger algorithm. - Do not keep elegant-but-generic stream pipelines in verified hot loops. - Do not pay interface / visitor / wrapper overhead inside the hottest loop unless evidence shows it disappears. +- Do not default to boxed `Map` / `Set` / `List` shapes when primitive collections or flat arrays better fit the dominant path. ## Design checklist Ask these first: +- What are `N`, `Q`, the update/query ratio, and the memory budget? +- Is the main problem asymptotic complexity, cache locality, allocation pressure, branchiness, contention, or I/O? +- What operation dominates: membership, counting, top-k, range query, join, shortest path, DP transition, parsing, encoding? +- Can the key/value/state space stay primitive or bit-packed? +- Can the workload become offline, batched, sorted, prefix-based, or compressed? - What allocates on the steady-state path? - What copies bytes, chars, arrays, or collections? - What materializes intermediate state that could stay streamed or cursor-based? @@ -51,6 +64,18 @@ Ask these first: ## Workflow +### 0) Pick the algorithmic shape + +- Estimate the real workload: input size, query count, mutation pattern, latency target, and memory ceiling. +- Choose the algorithm and data structure before tuning loop syntax. +- Favor contiguous, cache-friendly, primitive-heavy representations when semantics allow. +- For dynamic programming, define state, transition cost, base case, iteration order, and whether state compression is possible. +- For graph/range/string problems, look for offline transforms, prefix structures, monotonic structures, or specialized search before hand-tuning. + +Read these only when relevant: +- [references/algorithms-data-structures.md](references/algorithms-data-structures.md) for algorithm and data-structure selection. +- [references/advanced-coding-techniques.md](references/advanced-coding-techniques.md) for dynamic programming and advanced problem-solving patterns. + ### 1) Shape the code for HotSpot - Split hot and cold paths. @@ -84,10 +109,20 @@ When a benchmark moves, inspect what HotSpot actually did: Use sibling skill [hotspot-jit-forensics](../hotspot-jit-forensics/SKILL.md) for method-scoped C2 evidence. Use `async-profiler-java-macos` when wall/cpu/alloc evidence is needed on macOS. -### 4) Report honestly +### 4) Use libraries intentionally + +- Prefer the JDK first when it is close enough and operationally simpler. +- Reach for specialized libraries when they remove boxing, copies, parser overhead, contention, or off-heap indirection the JDK cannot. +- Check dependency health before adding a new library. +- Benchmark the library choice against the simplest credible in-repo baseline. + +Library reference: [references/high-performance-java-libraries.md](references/high-performance-java-libraries.md). + +### 5) Report honestly Frame conclusions as: - hypothesis +- algorithm/data-structure choice - benchmark result - JIT/profile evidence - confidence @@ -99,12 +134,19 @@ If assembly is unavailable, say so and fall back to compilation logs, inlining d Use this skill when the user asks to: - remove allocation pressure from a parser, iterator, encoder, decoder, or query loop - make a Java path zero-copy or lazy +- choose the right data structure for a Java workload +- solve a dynamic programming, graph, interval, ranking, or range-query problem in Java under performance constraints +- replace boxed collections with primitive or cache-friendly structures +- choose between the JDK and specialized Java performance libraries - specialize code for one workload instead of many - explain whether a HotSpot optimization actually happened - ground a Java perf change in benchmark + C2 evidence ## Reference map +- Algorithms and data structures: [references/algorithms-data-structures.md](references/algorithms-data-structures.md) +- Advanced coding techniques: [references/advanced-coding-techniques.md](references/advanced-coding-techniques.md) +- High-performance Java libraries: [references/high-performance-java-libraries.md](references/high-performance-java-libraries.md) - Coding rules: [references/coding-rules.md](references/coding-rules.md) - Evidence workflow: [references/evidence-workflow.md](references/evidence-workflow.md) - JDK version guardrails: [references/jdk-21-26-notes.md](references/jdk-21-26-notes.md) @@ -112,8 +154,11 @@ Use this skill when the user asks to: ## Output contract When you use this skill, the answer should usually include: +- workload model and asymptotic bottleneck +- algorithm and data-structure recommendation - hot-path hypothesis - concrete code-shape recommendation +- library recommendation when a library meaningfully changes the design - benchmark command or benchmark evidence - JIT/profile evidence or the missing prerequisite - a confidence statement tied to the active JDK diff --git a/.codex/skills/high-performance-java/agents/openai.yaml b/.codex/skills/high-performance-java/agents/openai.yaml index 1cea978b4d..d14661da03 100644 --- a/.codex/skills/high-performance-java/agents/openai.yaml +++ b/.codex/skills/high-performance-java/agents/openai.yaml @@ -1,4 +1,4 @@ interface: display_name: "High-Performance Java" - short_description: "Concise hot-path Java coding skill" - default_prompt: "Use $high-performance-java to write or review a Java hot path with benchmark and HotSpot evidence." + short_description: "Hot-path Java plus algorithm/perf-library guidance" + default_prompt: "Use $high-performance-java to choose the right algorithm, data structure, library, and HotSpot-friendly code shape for a high-performance Java task." diff --git a/.codex/skills/high-performance-java/references/advanced-coding-techniques.md b/.codex/skills/high-performance-java/references/advanced-coding-techniques.md new file mode 100644 index 0000000000..3b20bb09f2 --- /dev/null +++ b/.codex/skills/high-performance-java/references/advanced-coding-techniques.md @@ -0,0 +1,220 @@ +# Advanced Coding Techniques + +Use this reference when the problem needs more than basic loops and collections: dynamic programming, advanced search, state compression, offline transforms, or optimization patterns that materially change runtime. + +## Dynamic programming checklist + +Before writing code, define: +- state: the minimum information needed to continue +- transition: how one state moves to the next +- base case: the smallest solved states +- order: top-down memoization or bottom-up tabulation +- objective: min, max, count, feasibility, reconstruction +- memory plan: full table, rolling rows, bitset, or sparse map + +If any of those are fuzzy, the DP is not ready. + +## DP implementation bias in Java + +- Prefer flat primitive arrays over nested object graphs. +- Flatten `dp[row][col]` into one array when locality matters. +- Use sentinel values (`INF`, `-1`, impossible masks) instead of wrapper objects. +- Compress dimensions aggressively when a transition only needs prior rows or prior prefixes. +- Use iterative tabulation when recursion depth or call overhead is risky. +- Use memoization when the reachable state space is sparse or pruning is strong. + +## Common DP families + +### 1D DP + +Use for: +- linear decisions +- prefix optimization +- classic knapsack-style transitions + +Java notes: +- Often compresses to one array. +- Direction matters: reverse iterate for 0/1 knapsack; forward iterate for unbounded knapsack. + +### 2D grid / sequence DP + +Use for: +- edit distance +- LCS variants +- path counting +- interval composition + +Java notes: +- Two rolling rows often replace the full matrix. +- Keep row-major iteration consistent with memory layout. + +### Interval DP + +Use for: +- merge cost +- matrix chain multiplication +- optimal parenthesization +- palindrome partitioning + +Heuristic: +- Try increasing interval length order. +- Precompute reusable range costs. + +### Tree DP + +Use for: +- subtree aggregation +- rerooting +- independent set / matching variants on trees + +Java notes: +- Iterative traversal can avoid stack overflow. +- Store parent/index arrays once; reuse buffers for passes. + +### DAG DP + +Use for: +- longest path in DAG +- path counts +- dependency-ordered optimization + +Heuristic: +- Topological order first, transitions second. + +### Bitmask DP + +Use for: +- small `n` subset problems +- travelling-salesman-style state +- assignment and partition variants + +Java notes: +- Use `int` masks up to 31 bits, `long` masks up to 63. +- Precompute subset transitions when reused heavily. +- Beware exponential memory growth; consider meet-in-the-middle. + +### Digit DP + +Use for: +- counting numbers with digit constraints +- lexicographic numeric constraints + +State usually includes: +- position +- tight/limited flag +- started/leading-zero flag +- problem-specific accumulator + +## DP optimization patterns + +### Prefix/suffix acceleration + +If a transition scans prior states, ask whether prefix minima/maxima/sums can reduce it from `O(n^2)` to `O(n)`. + +### Monotonic queue optimization + +Use when transitions need min/max over a sliding window. + +### Divide-and-conquer DP optimization + +Use when the optimal split point is monotonic across rows or columns. + +### Convex hull trick / Li Chao tree + +Use when transitions are of the form: +- `dp[i] = min_j(m[j] * x[i] + b[j])` +- `max` variant of the same + +Only use when the algebra really matches. + +### Bitset DP + +Use when boolean subset transitions can become word-parallel bit operations. + +Examples: +- subset sum +- knapsack feasibility +- reachability layers + +### State compression + +Reduce dimensions by: +- keeping only prior row/column +- encoding booleans into bits +- coordinate-compressing sparse values +- using ids instead of objects + +## Search and optimization patterns + +### Binary search on answer + +Use when: +- feasibility is monotonic +- exact objective is hard but checking a threshold is easier + +### Meet-in-the-middle + +Use when: +- brute force is `2^n` +- `n` is small enough to split into two `2^(n/2)` halves + +### Branch and bound + +Use when: +- you can compute tight upper/lower bounds +- a good heuristic ordering prunes much of the tree + +### Iterative deepening + +Use when: +- memory is tight +- solution depth is unknown but usually shallow + +### Offline query processing + +Use when: +- query order is irrelevant +- sorting queries/events lets you reuse structure updates + +## Greedy and exchange-thinking + +Before building DP or search, test whether a greedy proof exists: +- local choice stays globally optimal +- exchange argument repairs any non-greedy optimal solution +- matroid-like or interval-scheduling structure is present + +If greedy works, it often beats DP both asymptotically and operationally. + +## Range and sequence patterns + +- Sliding window: monotonic boundary expansion or contraction. +- Two pointers: sorted arrays, pair/triple sums, dedup, partitioning. +- Monotonic stack: next greater/smaller, histogram, span problems. +- Difference arrays: batch range updates. +- Prefix sums / xor / hashes: cheap repeated range queries. + +## Java-specific implementation notes + +- Avoid recursion for deep graphs, trees, or DP unless the depth bound is small. +- Replace tuple objects with parallel arrays or packed longs in hot paths. +- Pre-size arrays and reusable buffers for repeated test cases. +- Be explicit about overflow; use `long` for counts/costs unless `int` is proven safe. +- Separate correctness code from hot code paths once the algorithm is clear. + +## Problem-solving ladder + +When stuck, try this order: +1. Can I sort or batch the work? +2. Can I precompute prefix, suffix, or compressed state? +3. Can a different data structure remove a nested loop? +4. Is the problem actually graph, interval, or DP in disguise? +5. Can the state shrink to primitives or bits? +6. Can I prove greedy, monotonicity, or convexity? + +## Red flags + +- DP state includes fields that do not affect future transitions. +- Memoization key is a heavyweight object when a few ints suffice. +- Full `O(n^2)` table retained even though only one frontier is used. +- Search explores symmetric states repeatedly. +- A library data structure is used where a flat array plus sort is enough. diff --git a/.codex/skills/high-performance-java/references/algorithms-data-structures.md b/.codex/skills/high-performance-java/references/algorithms-data-structures.md new file mode 100644 index 0000000000..96dc2481c5 --- /dev/null +++ b/.codex/skills/high-performance-java/references/algorithms-data-structures.md @@ -0,0 +1,181 @@ +# Algorithms and Data Structures + +Use this reference when the main question is algorithmic shape, data-structure choice, or whether a complexity change dominates any JVM-level tuning. Biggest wins usually come from changing the slope before shaving cycles. + +## Triage first + +Before choosing a structure, answer these: +- Is the workload one-shot, batched, or online? +- Do you need insertion order, sorted order, or just membership? +- Are keys dense integers, sparse integers, strings, tuples, or custom objects? +- Are queries point lookups, range queries, top-k queries, path queries, or aggregate queries? +- Is the structure static after build, append-only, or heavily mutable? +- Can the state stay primitive, bit-packed, or index-based? + +## Default data-structure bias + +- `int[]`, `long[]`, `byte[]`: best starting point when size is known or can grow geometrically. +- `ArrayList`: good general dynamic array when boxing is acceptable and traversal dominates. +- `ArrayDeque`: default queue/stack/deque. Better cache shape than `LinkedList`. +- `HashMap` / `HashSet`: baseline for sparse membership and counting when boxing cost is acceptable. +- `TreeMap` / `TreeSet`: only when ordered updates and queries are intrinsic. Do not pay `O(log n)` if sort-once plus scan works. +- `BitSet`: excellent for dense integer domains, set algebra, visited flags, and some DP/state compression. + +## Primitive-first guidance + +If keys/values are primitive and the path is hot: +- Prefer flat arrays when bounds are manageable. +- Prefer primitive maps/sets/heaps over boxed collections when boxing dominates time or memory. +- Use coordinate compression when raw keys are large but the distinct key count is moderate. +- Represent relations as integer ids plus parallel arrays instead of object graphs when traversal dominates. + +## Membership, dedup, counting + +- Hash table: default for sparse exact membership and frequency counting. +- Sort plus scan: strong when the data is batch-oriented, read-mostly, or you also need grouping/order. +- BitSet / boolean array: best for dense bounded integer keys. +- Bloom filter: prefilter only. Use when false positives are acceptable but false negatives are not. + +Red flags: +- Nested membership scans over lists. +- Repeated `contains` on `ArrayList` in hot code. +- Boxing primitive keys when the domain can be compressed. + +## Top-k, ranking, scheduling + +- Binary heap / priority queue: streaming top-k, best-first search, event scheduling. +- Quickselect: one-shot kth element or top-k partition when full sort is wasteful. +- Bucket/counting approach: when values live in a small bounded domain. +- Monotonic deque: sliding-window min/max in linear time. + +Java notes: +- JDK `PriorityQueue` is fine for many cases but boxes primitives. +- For tiny fixed `k`, a sorted small array can beat a heap. + +## Prefix, range, and interval workloads + +- Prefix sum: immutable range-sum/count queries. +- Difference array: batched range updates with one final sweep. +- Fenwick tree: point updates plus prefix/range aggregates in `O(log n)` with low constants. +- Segment tree: more flexible range updates/queries, but heavier than Fenwick. +- Sparse table: immutable idempotent range queries such as min/max/gcd. +- Sweep line: interval overlap, event merging, skyline, booking, and geometry-style event problems. + +Decision rule: +- Static data: prefer prefix sums or sparse tables. +- Dynamic point updates: Fenwick first. +- Complex dynamic range operations: segment tree only when simpler structures fail. + +## Graph workloads + +- BFS: unweighted shortest path, level order, flood fill. +- 0-1 BFS: edge weights only `0` or `1`. +- Dijkstra: non-negative weighted shortest path. +- Topological sort plus DP: DAG path/count problems. +- Union-find (disjoint set union): connectivity under merges, Kruskal, component grouping. +- Tarjan/Kosaraju: strongly connected components. + +Java notes: +- Prefer adjacency as primitive arrays or compact edge lists when the graph is large. +- Avoid per-edge objects on hot traversals. +- Beware recursion depth on DFS; iterative stacks are often safer. + +## String and sequence workloads + +- Sliding window / two pointers: substring/segment constraints with monotonic boundaries. +- KMP or Z-function: repeated pattern matching in linear time. +- Rolling hash: fast substring comparisons with collision caveat. +- Trie: prefix queries and dictionary walks when the alphabet is manageable. +- Aho-Corasick: multiple pattern matching. +- Patience sorting / tails array: `O(n log n)` LIS. + +Java notes: +- Avoid repeated substring materialization in tight loops. +- Work on `byte[]`, `char[]`, offsets, or integer ids when possible. + +## Ordered search and offline transforms + +- Sort plus binary search: often simpler and faster than maintaining ordered trees. +- Coordinate compression: map large sparse keys into `[0..m)` for arrays, Fenwick trees, and bitsets. +- Offline queries: sort events/queries once, then answer in a sweep. +- Meet-in-the-middle: split exponential search into two half-enumerations. + +## Data-structure atlas + +### Arrays and flat buffers + +Use when: +- Traversal dominates. +- Keys can be mapped to integer indexes. +- You need maximum locality and minimum allocation. + +Avoid when: +- Sparse domains would explode memory. +- Mutation semantics need expensive shifting and you cannot batch. + +### Hash tables + +Use when: +- Exact membership/counting dominates. +- Order is irrelevant or can be restored later. + +Avoid when: +- Dense bounded keys fit a bitset or direct array. +- You only need one batch query and sort plus scan is cheaper. + +### Heaps + +Use when: +- You need repeated access to min/max with incremental updates. +- Best-first exploration or top-k streaming dominates. + +Avoid when: +- You only need one final order; sort once instead. +- `k` is tiny and a fixed small array is cheaper. + +### Bitsets + +Use when: +- The domain is dense or compressible. +- Boolean DP, visited state, set algebra, or fast intersections matter. + +Avoid when: +- The domain is too sparse after compression. + +### Fenwick and segment trees + +Use when: +- Simple arrays are too static. +- Query/update interleaving matters. + +Avoid when: +- Prefix sums or difference arrays solve the same problem. +- The workload is too small to justify structural overhead. + +### Union-find + +Use when: +- The workload is merge-only connectivity. +- You need amortized near-constant component unions/finds. + +Avoid when: +- You need deletions or rich path queries. + +## Algorithmic red flags + +- `O(n^2)` nested scans hidden inside "simple" collection code. +- Re-sorting on every query. +- `LinkedList` for queue/stack workloads. +- Object-per-node or object-per-edge layouts in large graphs or DP tables. +- Recomputing prefix information instead of caching it. +- Dense DP stored as `Map` when a flat array works. +- Maintaining balanced trees when sort-once plus array search is enough. + +## Escalation rule + +If you can change: +- `O(n^2)` to `O(n log n)` or `O(n)`, +- boxed/object-heavy state to primitive/flat state, +- online mutable work to offline batched work, + +do that before micro-tuning loop syntax or arguing about JIT trivia. diff --git a/.codex/skills/high-performance-java/references/high-performance-java-libraries.md b/.codex/skills/high-performance-java/references/high-performance-java-libraries.md new file mode 100644 index 0000000000..affbb97d55 --- /dev/null +++ b/.codex/skills/high-performance-java/references/high-performance-java-libraries.md @@ -0,0 +1,198 @@ +# High-Performance Java Libraries + +Use this reference when the JDK baseline is known and you need to decide whether a library meaningfully improves layout, primitive support, concurrency, serialization, caching, or observability. + +## Selection rule + +Do not add a library because it is "fast" in the abstract. Add it only when it buys at least one concrete property: +- primitive collections without boxing +- better buffer or off-heap control +- lower-contention queues or caches +- tighter binary encoding +- observability or benchmarking you cannot credibly replace + +Always compare against the simplest viable JDK baseline first. + +## JDK first choices + +Start here before adding dependencies: +- `ArrayDeque`: queue/stack/deque default +- `BitSet`: dense boolean/set algebra and bit-parallel state +- `PriorityQueue`: heap baseline +- `ConcurrentHashMap`: baseline concurrent map +- `LongAdder` / `LongAccumulator`: striped counters under contention +- `VarHandle`: low-level atomic/ordered field access +- `ByteBuffer`: baseline direct or heap buffer abstraction +- JMH, JFR, and `jcmd`: measurement and runtime evidence + +If these solve the problem with acceptable cost, stop. + +## Primitive collections + +### fastutil + +Use when: +- primitive maps, sets, lists, heaps, or big arrays are needed +- boxing in JDK collections is visible in memory or CPU profiles + +Good fit: +- `int -> int`, `long -> long`, and similar dense/sparse maps +- adjacency lists, frequency maps, index maps + +Caution: +- still benchmark against flat arrays when keys can be compressed + +### HPPC + +Use when: +- you want lean primitive collections with a smaller API surface +- hot loops need primitive containers without a broad framework + +### Eclipse Collections primitive containers + +Use when: +- you already use Eclipse Collections +- you need richer collection operations but want primitive variants + +## Buffers, off-heap, and low-latency plumbing + +### Agrona + +Use when: +- you need direct buffers, ring buffers, counters, or low-latency transport helpers +- you want explicit control over memory layout and flyweight-style access + +### Chronicle Bytes / Chronicle Queue / Chronicle Map + +Use when: +- off-heap or memory-mapped storage is intrinsic to the design +- inter-process communication or persisted queue semantics matter + +Caution: +- operational complexity is much higher than plain on-heap structures + +### Netty `ByteBuf` + +Use when: +- the stack already uses Netty +- pooled buffers and zero-copy byte handling matter + +Avoid when: +- pulling in Netty only for a small standalone buffer need + +## Concurrency and queues + +### JCTools + +Use when: +- single-producer/single-consumer or MPSC queue semantics are well defined +- `java.util.concurrent` queues show contention or allocation issues + +### LMAX Disruptor + +Use when: +- you have a staged event-processing pipeline +- extremely low latency and mechanical sympathy matter more than API simplicity + +Caution: +- only a fit for specific architectures; not a general queue replacement + +### Caffeine + +Use when: +- you need a production cache with strong hit-rate behavior and concurrency +- cache eviction policy quality matters, not just raw map speed + +## Bitmaps and compressed sets + +### RoaringBitmap + +Use when: +- integer sets are sparse-to-medium density +- you need fast unions, intersections, or membership with lower memory than plain bitsets + +Good fit: +- analytics filters +- posting lists +- visited/frontier sets with large sparse ids + +## Serialization, parsing, and wire formats + +### Jackson + +Use when: +- interoperability and ecosystem support matter more than max throughput + +Tune before replacing: +- reuse `ObjectMapper` +- avoid tree model on hot paths +- stream when full materialization is unnecessary + +### DSL-JSON / jsoniter / specialized parsers + +Use when: +- JSON remains required but generic reflection-heavy parsing is too expensive + +### Protocol Buffers + +Use when: +- schema evolution and interoperability matter + +### FlatBuffers / SBE / Chronicle Wire + +Use when: +- binary layout, lower-copy reads, or ultra-low latency wire handling matter more than generality + +Caution: +- these choices affect interfaces and tooling, not just speed + +## Numerics and vector-style work + +### JDK Vector API + +Use when: +- the workload is data parallel +- you can express operations as bulk lane-wise math + +Caution: +- JDK-version-sensitive; validate on the active runtime + +### EJML and similar numerics libraries + +Use when: +- matrix or numeric kernels dominate and bespoke loops are not the business value + +## Benchmarking and profiling + +### JMH + +Use when: +- you need trustworthy microbenchmarks + +### JFR + +Use when: +- you need low-overhead production-friendly profiling + +### async-profiler + +Use when: +- you need CPU, wall, alloc, or lock evidence with low overhead + +## Practical defaults + +If the bottleneck is: +- boxing in maps/sets: try `fastutil` first +- queue contention: compare JDK queues with `JCTools` +- cache behavior: use `Caffeine` +- sparse integer set algebra: use `RoaringBitmap` +- direct/off-heap buffer control: look at `Agrona` +- serious binary wire efficiency: compare Protobuf with FlatBuffers or SBE + +## Library red flags + +- Adding a library before a JDK baseline exists +- Replacing a simple array algorithm with a complex dependency +- Using a concurrency library without matching the actual producer/consumer pattern +- Choosing off-heap because it sounds faster, not because GC or sharing semantics require it +- Adopting a serialization stack without accounting for ecosystem, tooling, and evolution constraints From 9d28525b5e204520f95fe66d66c694a451871a74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Mon, 6 Apr 2026 20:06:05 +0200 Subject: [PATCH 26/32] even faster --- .../AbstractLmdbCompiledLftjIteration.java | 2 + ...actLmdbFullStackCompiledLftjIteration.java | 2 + .../sail/lmdb/LmdbCompiledLftjFactory.java | 2 + .../rdf4j/sail/lmdb/LmdbLftjBindingState.java | 2 + .../sail/lmdb/LmdbLftjCodegenCompiler.java | 27 +- .../rdf4j/sail/lmdb/LmdbLftjCursor.java | 3 + .../sail/lmdb/LmdbLftjExecutionShape.java | 3 + .../rdf4j/sail/lmdb/LmdbLftjExecutor.java | 2 +- .../lmdb/LmdbLftjFullCodegenCompiler.java | 67 ++-- .../rdf4j/sail/lmdb/LmdbLftjMetrics.java | 3 + .../rdf4j/sail/lmdb/LmdbLftjOptimizer.java | 356 +++++++++++++++--- .../eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java | 2 + .../rdf4j/sail/lmdb/LmdbLftjPlanner.java | 145 ++++++- .../sail/lmdb/LmdbLftjPlanningHints.java | 51 +++ .../lmdb/LmdbLftjTieredCodegenCompiler.java | 6 + .../rdf4j/sail/lmdb/LmdbLftjTupleExpr.java | 11 +- .../rdf4j/sail/lmdb/LmdbQueryAccess.java | 2 + .../sail/lmdb/config/LmdbStoreConfig.java | 5 + .../sail/lmdb/config/LmdbStoreSchema.java | 3 + .../rdf4j/sail/lmdb/LmdbLftjCodegenTest.java | 115 ++++++ .../sail/lmdb/LmdbLftjOptimizerTest.java | 88 ++++- .../rdf4j/sail/lmdb/LmdbLftjPlannerTest.java | 108 ++++++ .../rdf4j/sail/lmdb/LmdbSailStoreTest.java | 21 ++ .../benchmark/FoafCliqueQueryBenchmark.java | 28 ++ .../FoafCliqueQueryBenchmarkResults.md | 54 +++ .../benchmark/FoafCliqueQueryCatalog.java | 24 ++ .../benchmark/FoafCliqueQueryCatalogTest.java | 63 ++++ 27 files changed, 1110 insertions(+), 85 deletions(-) create mode 100644 core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlanningHints.java create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlannerTest.java create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryCatalogTest.java diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbCompiledLftjIteration.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbCompiledLftjIteration.java index ecfd434ecc..d5f15cae96 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbCompiledLftjIteration.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbCompiledLftjIteration.java @@ -11,11 +11,13 @@ // Some portions generated by Codex package org.eclipse.rdf4j.sail.lmdb; +import org.eclipse.rdf4j.common.annotation.Experimental; import org.eclipse.rdf4j.common.iteration.LookAheadIteration; import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.QueryEvaluationException; import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; +@Experimental public abstract class AbstractLmdbCompiledLftjIteration extends LookAheadIteration { private final LmdbLftjPlan plan; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbFullStackCompiledLftjIteration.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbFullStackCompiledLftjIteration.java index 32725b466f..de43a426b5 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbFullStackCompiledLftjIteration.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/AbstractLmdbFullStackCompiledLftjIteration.java @@ -11,11 +11,13 @@ // Some portions generated by Codex package org.eclipse.rdf4j.sail.lmdb; +import org.eclipse.rdf4j.common.annotation.Experimental; import org.eclipse.rdf4j.common.iteration.LookAheadIteration; import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.QueryEvaluationException; import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; +@Experimental public abstract class AbstractLmdbFullStackCompiledLftjIteration extends LookAheadIteration { private final LmdbLftjPlan plan; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCompiledLftjFactory.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCompiledLftjFactory.java index 07c3085bc7..48d95ffa31 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCompiledLftjFactory.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCompiledLftjFactory.java @@ -11,10 +11,12 @@ // Some portions generated by Codex package org.eclipse.rdf4j.sail.lmdb; +import org.eclipse.rdf4j.common.annotation.Experimental; import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; +@Experimental public interface LmdbCompiledLftjFactory { CloseableIteration create(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java index a4472a8d48..0a33c04670 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjBindingState.java @@ -17,12 +17,14 @@ import java.util.Map; import java.util.function.BiConsumer; +import org.eclipse.rdf4j.common.annotation.Experimental; import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.MutableBindingSet; import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue; +@Experimental public final class LmdbLftjBindingState { private final LmdbLftjPlan plan; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCompiler.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCompiler.java index 5bfb11eb5a..50b1f9e973 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCompiler.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenCompiler.java @@ -31,7 +31,7 @@ String cacheKey(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean include LmdbCompiledLftjFactory compile(LmdbLftjPlan plan, LmdbLftjExecutionShape shape) { String simpleClassName = "GeneratedLmdbLftjFactory" + CLASS_COUNTER.incrementAndGet(); - String source = sourceFor(simpleClassName, plan, shape, false); + String source = sourceFor(simpleClassName, plan, shape, false, null); return compileSource(plan.executionKey(), simpleClassName, source); } @@ -39,6 +39,11 @@ LmdbCompiledLftjFactory compile(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, return compile(plan, shape); } + LmdbCompiledLftjFactory compile(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred, + LmdbQueryAccess queryAccess) { + return compile(plan, shape, includeInferred); + } + protected final LmdbCompiledLftjFactory compileSource(String executionKey, String simpleClassName, String source) { try { SimpleCompiler compiler = new SimpleCompiler(); @@ -54,21 +59,37 @@ protected final LmdbCompiledLftjFactory compileSource(String executionKey, Strin } String sourceFor(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred) { - return sourceFor("GeneratedLmdbLftjSource", plan, shape, includeInferred); + return sourceFor(plan, shape, includeInferred, null); + } + + String sourceFor(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred, + LmdbQueryAccess queryAccess) { + return sourceFor("GeneratedLmdbLftjSource", plan, shape, includeInferred, queryAccess); } Path dumpSourceFor(Path outputFile, LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred) throws IOException { + return dumpSourceFor(outputFile, plan, shape, includeInferred, null); + } + + Path dumpSourceFor(Path outputFile, LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred, + LmdbQueryAccess queryAccess) + throws IOException { Path parent = outputFile.getParent(); if (parent != null) { Files.createDirectories(parent); } - Files.writeString(outputFile, sourceFor(plan, shape, includeInferred)); + Files.writeString(outputFile, sourceFor(plan, shape, includeInferred, queryAccess)); return outputFile; } protected String sourceFor(String simpleClassName, LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred) { + return sourceFor(simpleClassName, plan, shape, includeInferred, null); + } + + protected String sourceFor(String simpleClassName, LmdbLftjPlan plan, LmdbLftjExecutionShape shape, + boolean includeInferred, LmdbQueryAccess queryAccess) { return new SourceBuilder(simpleClassName, plan, shape).build(); } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCursor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCursor.java index 7d1ce00391..cee141f703 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCursor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCursor.java @@ -11,6 +11,9 @@ // Some portions generated by Codex package org.eclipse.rdf4j.sail.lmdb; +import org.eclipse.rdf4j.common.annotation.Experimental; + +@Experimental public interface LmdbLftjCursor extends AutoCloseable { boolean open(int bindingSlot); diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java index e1ad4f05c7..38bd92671e 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutionShape.java @@ -17,6 +17,9 @@ import java.util.List; import java.util.Map; +import org.eclipse.rdf4j.common.annotation.Experimental; + +@Experimental public final class LmdbLftjExecutionShape { private static final int FULL_STACK_TEMPLATE_VERSION = 4; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java index e0684bcc05..7321a7081d 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java @@ -80,7 +80,7 @@ private LmdbCompiledLftjFactory compiledFactory(LmdbQueryAccess queryAccess, Lmd } try { - LmdbCompiledLftjFactory factory = compiler.compile(plan, shape, queryAccess.includeInferred()); + LmdbCompiledLftjFactory factory = compiler.compile(plan, shape, queryAccess.includeInferred(), queryAccess); queryAccess.cacheCompiledPlanSuccess(cacheKey, factory); return factory; } catch (RuntimeException e) { diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java index ce91e9762d..f2fda00e84 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java @@ -28,14 +28,20 @@ String cacheKey(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean include @Override protected String sourceFor(String simpleClassName, LmdbLftjPlan plan, LmdbLftjExecutionShape shape, - boolean includeInferred) { - return new SourceBuilder(simpleClassName, plan, shape, includeInferred).build(); + boolean includeInferred, LmdbQueryAccess queryAccess) { + return new SourceBuilder(simpleClassName, plan, shape, includeInferred, queryAccess).build(); } @Override LmdbCompiledLftjFactory compile(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred) { + return compile(plan, shape, includeInferred, null); + } + + @Override + LmdbCompiledLftjFactory compile(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred, + LmdbQueryAccess queryAccess) { String simpleClassName = "GeneratedLmdbFullStackLftjFactory" + System.nanoTime(); - String source = sourceFor(simpleClassName, plan, shape, includeInferred); + String source = sourceFor(simpleClassName, plan, shape, includeInferred, queryAccess); return compileSource(cacheKey(plan, shape, includeInferred), simpleClassName, source); } @@ -47,15 +53,17 @@ private static final class SourceBuilder { private final LmdbLftjPlan plan; private final LmdbLftjExecutionShape shape; private final boolean includeInferred; + private final long[][] constantIds; private final int[] relationGroupByPattern; private final RelationGroup[] relationGroups; private SourceBuilder(String simpleClassName, LmdbLftjPlan plan, LmdbLftjExecutionShape shape, - boolean includeInferred) { + boolean includeInferred, LmdbQueryAccess queryAccess) { this.simpleClassName = simpleClassName; this.plan = plan; this.shape = shape; this.includeInferred = includeInferred; + this.constantIds = resolveConstantIds(queryAccess); this.relationGroupByPattern = new int[shape.patternCount()]; for (int i = 0; i < relationGroupByPattern.length; i++) { relationGroupByPattern[i] = -1; @@ -597,11 +605,9 @@ private void appendRelationGroupAccessor(StringBuilder source, RelationGroup rel source.append(" return relationGroup").append(relationGroup.groupId).append(";\n"); source.append(" }\n"); source.append(" metrics().recordRelationLoad();\n"); - source.append(" long predicateId = state().fixedIdForComponent(") - .append(patternOrdinal) - .append(", ") - .append(TripleStore.PRED_IDX) - .append(");\n"); + source.append(" long predicateId = ") + .append(fixedIdExpression(patternOrdinal, TripleStore.PRED_IDX)) + .append(";\n"); source.append(" relationGroup") .append(relationGroup.groupId) .append(" = loadDerivedRelation(") @@ -1466,18 +1472,7 @@ private String componentFieldName(int component) { private String componentValueExpression(int patternOrdinal, LmdbLftjExecutionShape.PatternShape patternShape, int component) { if (patternShape.isConstantComponent(component)) { - switch (component) { - case TripleStore.SUBJ_IDX: - return "state().fixedIdForComponent(" + patternOrdinal + ", " + TripleStore.SUBJ_IDX + ")"; - case TripleStore.PRED_IDX: - return "state().fixedIdForComponent(" + patternOrdinal + ", " + TripleStore.PRED_IDX + ")"; - case TripleStore.OBJ_IDX: - return "state().fixedIdForComponent(" + patternOrdinal + ", " + TripleStore.OBJ_IDX + ")"; - case TripleStore.CONTEXT_IDX: - return "state().fixedIdForComponent(" + patternOrdinal + ", " + TripleStore.CONTEXT_IDX + ")"; - default: - throw new IllegalArgumentException("Unknown LMDB component: " + component); - } + return fixedIdExpression(patternOrdinal, component); } int slot = patternShape.slotForComponent(component); if (slot >= 0) { @@ -1486,6 +1481,14 @@ private String componentValueExpression(int patternOrdinal, LmdbLftjExecutionSha return "0L"; } + private String fixedIdExpression(int patternOrdinal, int component) { + long resolvedId = constantIds[patternOrdinal][component]; + if (resolvedId > 0L) { + return resolvedId + "L"; + } + return "state().fixedIdForComponent(" + patternOrdinal + ", " + component + ")"; + } + private String upperBoundExpression(int patternOrdinal, LmdbLftjExecutionShape.PatternShape patternShape, int slot, int component) { @@ -1526,6 +1529,28 @@ private RelationGroup[] collectRelationGroups() { return ordered.toArray(new RelationGroup[0]); } + private long[][] resolveConstantIds(LmdbQueryAccess queryAccess) { + long[][] resolved = new long[shape.patternCount()][4]; + if (queryAccess == null) { + return resolved; + } + for (int patternOrdinal = 0; patternOrdinal < shape.patternCount(); patternOrdinal++) { + LmdbLftjPatternPlan patternPlan = plan.patternPlans().get(patternOrdinal); + resolveConstantId(resolved[patternOrdinal], patternPlan.subjectTerm(), queryAccess); + resolveConstantId(resolved[patternOrdinal], patternPlan.predicateTerm(), queryAccess); + resolveConstantId(resolved[patternOrdinal], patternPlan.objectTerm(), queryAccess); + resolveConstantId(resolved[patternOrdinal], patternPlan.termForComponent(TripleStore.CONTEXT_IDX), + queryAccess); + } + return resolved; + } + + private void resolveConstantId(long[] resolved, LmdbLftjPatternPlan.TermRef term, LmdbQueryAccess queryAccess) { + if (term.isConstant()) { + resolved[term.component()] = queryAccess.resolveId(term.constantValue()); + } + } + private static final class RelationGroup { private final int groupId; private final int representativePatternOrdinal; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java index d441412261..98a7b0e70d 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjMetrics.java @@ -11,6 +11,9 @@ // Some portions generated by Codex package org.eclipse.rdf4j.sail.lmdb; +import org.eclipse.rdf4j.common.annotation.Experimental; + +@Experimental public final class LmdbLftjMetrics { private long candidateScans; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java index 92ef00f5ff..0a85bd3536 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java @@ -21,11 +21,14 @@ import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.Dataset; import org.eclipse.rdf4j.query.algebra.And; +import org.eclipse.rdf4j.query.algebra.BindingSetAssignment; import org.eclipse.rdf4j.query.algebra.Compare; +import org.eclipse.rdf4j.query.algebra.Distinct; import org.eclipse.rdf4j.query.algebra.Extension; import org.eclipse.rdf4j.query.algebra.ExtensionElem; import org.eclipse.rdf4j.query.algebra.Filter; import org.eclipse.rdf4j.query.algebra.Join; +import org.eclipse.rdf4j.query.algebra.Order; import org.eclipse.rdf4j.query.algebra.Projection; import org.eclipse.rdf4j.query.algebra.ProjectionElem; import org.eclipse.rdf4j.query.algebra.QueryModelNode; @@ -77,8 +80,8 @@ private boolean transform(Join node, LmdbQueryAccess queryAccess) { List operands = new ArrayList<>(); collectOperands(node, operands); - FusionTarget fusionTarget = tryExtractFusionTarget(node, operands); - List patterns = (fusionTarget != null ? fusionTarget.patterns() + PlanningTarget planningTarget = tryExtractPlanningTarget(node, operands); + List patterns = (planningTarget != null ? planningTarget.patterns() : operands.stream() .filter(StatementPattern.class::isInstance) .map(StatementPattern.class::cast) @@ -88,18 +91,22 @@ private boolean transform(Join node, LmdbQueryAccess queryAccess) { } Set configuredIndexes = queryAccess.configuredIndexes(); - TupleExpr fallbackExpr = fusionTarget != null ? fusionTarget.root().clone() + TupleExpr fallbackExpr = planningTarget != null && planningTarget.root() != null && planningTarget.fullyFused() + ? planningTarget.root().clone() : rebuildJoin(patterns.stream().map(TupleExpr::clone).toList()); - List outputBindings = fusionTarget != null ? fusionTarget.outputBindings() + List outputBindings = planningTarget != null ? planningTarget.outputBindings() : List.of(); - List inequalityConstraints = fusionTarget != null - ? fusionTarget.inequalityConstraints() + List inequalityConstraints = planningTarget != null + ? planningTarget.inequalityConstraints() : List.of(); + LmdbLftjPlanningHints planningHints = planningTarget != null ? planningTarget.planningHints() + : collectPlanningHints(operands, List.of()); String cacheKey = LmdbLftjPreparedPlanCache.normalizedKey(patterns, configuredIndexes, outputBindings, inequalityConstraints); LmdbLftjPlanner.PlanningResult plan = queryAccess.cachedPlanningResult(cacheKey); if (plan == null) { - plan = planner.plan(fallbackExpr, patterns, configuredIndexes, outputBindings, inequalityConstraints); + plan = planner.plan(fallbackExpr, patterns, configuredIndexes, outputBindings, inequalityConstraints, + planningHints); queryAccess.cachePlanningResult(cacheKey, plan); } if (!plan.planned()) { @@ -108,10 +115,13 @@ private boolean transform(Join node, LmdbQueryAccess queryAccess) { } LmdbLftjTupleExpr lftjNode = new LmdbLftjTupleExpr(plan.plan()); - if (fusionTarget != null) { - fusionTarget.root().replaceWith(lftjNode); + if (planningTarget != null && planningTarget.root() != null) { + planningTarget.root().replaceWith(planningTarget.rebuild(lftjNode)); return true; } + if (planningTarget != null) { + planningTarget.rewriteFiltersInPlace(); + } List rebuiltOperands = new ArrayList<>(); boolean inserted = false; for (TupleExpr operand : operands) { @@ -132,39 +142,60 @@ private boolean transform(Join node, LmdbQueryAccess queryAccess) { return true; } - private FusionTarget tryExtractFusionTarget(Join node, List operands) { + private PlanningTarget tryExtractPlanningTarget(Join node, List operands) { List extractedPatterns = new ArrayList<>(operands.size()); + boolean hasNonPatternOperands = false; for (TupleExpr operand : operands) { ExtractedPattern extractedPattern = extractFilteredPattern(operand); if (extractedPattern == null) { - return null; + hasNonPatternOperands = true; + continue; } extractedPatterns.add(extractedPattern); } + if (extractedPatterns.size() < 3) { + return null; + } List patterns = extractedPatterns.stream().map(ExtractedPattern::pattern).toList(); List visibleVariables = collectVisibleVariables(patterns); - QueryModelNode current = node; + QueryModelNode traversal = node; + QueryModelNode replacementRoot = node; + boolean rootReplaceable = !hasNonPatternOperands; List filters = new ArrayList<>(); Extension extension = null; Projection projection = null; - while (current.getParentNode() instanceof UnaryTupleOperator - && ((UnaryTupleOperator) current.getParentNode()).getArg() == current) { - QueryModelNode parent = current.getParentNode(); + while (traversal.getParentNode() instanceof UnaryTupleOperator + && ((UnaryTupleOperator) traversal.getParentNode()).getArg() == traversal) { + QueryModelNode parent = traversal.getParentNode(); if (parent instanceof Filter) { filters.add((Filter) parent); - current = parent; + traversal = parent; + if (rootReplaceable) { + replacementRoot = parent; + } continue; } if (parent instanceof Extension && extension == null) { extension = (Extension) parent; - current = parent; + traversal = parent; + if (rootReplaceable) { + replacementRoot = parent; + } continue; } if (parent instanceof Projection && projection == null) { projection = (Projection) parent; - current = parent; + traversal = parent; + if (rootReplaceable) { + replacementRoot = parent; + } + continue; + } + if (parent instanceof Distinct || parent instanceof Order) { + rootReplaceable = false; + traversal = parent; continue; } break; @@ -178,22 +209,43 @@ private FusionTarget tryExtractFusionTarget(Join node, List operands) return null; } - List outerInequalities = collectInequalities(filters, visibleVariables); - if (outerInequalities == null) { - return null; - } - inequalities.addAll(outerInequalities); + FilterPartition filterPartition = partitionFilters(filters, Set.copyOf(visibleVariables)); + inequalities.addAll(filterPartition.inequalityConstraints()); if (!supportsVisibleVariables(inequalities, Set.copyOf(visibleVariables))) { return null; } - List outputBindings = collectOutputBindings(projection, extension, - visibleVariables); + List outputBindings = collectOutputBindings(projection, extension, visibleVariables, + filterPartition.requiredVariables()); if (outputBindings == null) { return null; } + LmdbLftjPlanningHints planningHints = collectPlanningHints(operands, filterPartition.requiredVariables()); + + return new PlanningTarget(rootReplaceable ? (TupleExpr) replacementRoot : null, patterns, outputBindings, + inequalities, + filterPartition.filterRewrites(), extension, projection, planningHints); + } + + private LmdbLftjPlanningHints collectPlanningHints(List operands, List residualFilterVariables) { + int firstPatternIndex = -1; + for (int i = 0; i < operands.size(); i++) { + if (extractFilteredPattern(operands.get(i)) != null) { + firstPatternIndex = i; + break; + } + } + if (firstPatternIndex < 0) { + return LmdbLftjPlanningHints.of(List.of(), residualFilterVariables); + } - return new FusionTarget((TupleExpr) current, patterns, outputBindings, inequalities); + LinkedHashSet inputBoundVariables = new LinkedHashSet<>(); + for (int i = 0; i < firstPatternIndex; i++) { + if (operands.get(i) instanceof BindingSetAssignment) { + inputBoundVariables.addAll(((BindingSetAssignment) operands.get(i)).getBindingNames()); + } + } + return LmdbLftjPlanningHints.of(List.copyOf(inputBoundVariables), residualFilterVariables); } private ExtractedPattern extractFilteredPattern(TupleExpr operand) { @@ -223,23 +275,6 @@ private List collectVisibleVariables(List patterns) { return List.copyOf(variableNames); } - private List collectInequalities(List filters, - List visibleVariables) { - if (filters.isEmpty()) { - return List.of(); - } - List inequalities = new ArrayList<>(); - for (Filter filter : filters) { - if (!appendInequalities(filter.getCondition(), inequalities)) { - return null; - } - } - if (!supportsVisibleVariables(inequalities, Set.copyOf(visibleVariables))) { - return null; - } - return inequalities; - } - private boolean appendInequalities(ValueExpr condition, List inequalities) { if (condition instanceof And) { And and = (And) condition; @@ -277,7 +312,7 @@ private boolean isNamedVariable(Var var) { } private List collectOutputBindings(Projection projection, Extension extension, - List visibleVariables) { + List visibleVariables, List requiredVariables) { if (projection == null) { return extension == null ? List.of() : null; } @@ -287,6 +322,7 @@ private List collectOutputBindings(Projection projec } Set visible = Set.copyOf(visibleVariables); List outputBindings = new ArrayList<>(); + Set coveredSources = new LinkedHashSet<>(); for (ProjectionElem projectionElem : projection.getProjectionElemList().getElements()) { String sourceVariable = resolveProjectedSource(projectionElem, extensionBindings, visible); if (sourceVariable == null) { @@ -295,10 +331,111 @@ private List collectOutputBindings(Projection projec outputBindings.add(new LmdbLftjPlan.OutputBinding( projectionElem.getProjectionAlias().orElse(projectionElem.getName()), sourceVariable)); + coveredSources.add(sourceVariable); + } + for (String requiredVariable : requiredVariables) { + if (visible.contains(requiredVariable) && coveredSources.add(requiredVariable)) { + outputBindings.add(new LmdbLftjPlan.OutputBinding(requiredVariable, requiredVariable)); + } } return outputBindings; } + private FilterPartition partitionFilters(List filters, Set visibleVariables) { + if (filters.isEmpty()) { + return FilterPartition.empty(); + } + List inequalities = new ArrayList<>(); + List filterRewrites = new ArrayList<>(); + LinkedHashSet requiredVariables = new LinkedHashSet<>(); + for (Filter filter : filters) { + SingleFilterPartition partition = partitionFilterCondition(filter.getCondition(), visibleVariables); + inequalities.addAll(partition.inequalityConstraints()); + filterRewrites.add(new FilterRewrite(filter, partition.residualCondition())); + if (partition.residualCondition() != null) { + requiredVariables.addAll(partition.requiredVariables()); + } + } + return new FilterPartition(inequalities, filterRewrites, List.copyOf(requiredVariables)); + } + + private SingleFilterPartition partitionFilterCondition(ValueExpr condition, Set visibleVariables) { + List inequalities = new ArrayList<>(); + List residualConjuncts = new ArrayList<>(); + LinkedHashSet requiredVariables = new LinkedHashSet<>(); + partitionFilterCondition(condition, visibleVariables, inequalities, residualConjuncts, requiredVariables); + return new SingleFilterPartition(inequalities, combineConjuncts(residualConjuncts), + List.copyOf(requiredVariables)); + } + + private void partitionFilterCondition(ValueExpr condition, Set visibleVariables, + List inequalities, + List residualConjuncts, Set requiredVariables) { + if (condition instanceof And) { + And and = (And) condition; + partitionFilterCondition(and.getLeftArg(), visibleVariables, inequalities, residualConjuncts, + requiredVariables); + partitionFilterCondition(and.getRightArg(), visibleVariables, inequalities, residualConjuncts, + requiredVariables); + return; + } + + LmdbLftjPlan.InequalityConstraint inequality = extractSupportedInequality(condition, visibleVariables); + if (inequality != null) { + inequalities.add(inequality); + return; + } + + residualConjuncts.add(condition.clone()); + collectReferencedVariables(condition, visibleVariables, requiredVariables); + } + + private ValueExpr combineConjuncts(List residualConjuncts) { + if (residualConjuncts.isEmpty()) { + return null; + } + ValueExpr combined = residualConjuncts.get(0); + for (int i = 1; i < residualConjuncts.size(); i++) { + combined = new And(combined, residualConjuncts.get(i)); + } + return combined; + } + + private LmdbLftjPlan.InequalityConstraint extractSupportedInequality(ValueExpr condition, + Set visibleVariables) { + if (!(condition instanceof Compare)) { + return null; + } + Compare compare = (Compare) condition; + if (compare.getOperator() != Compare.CompareOp.NE) { + return null; + } + if (!(compare.getLeftArg() instanceof Var) || !(compare.getRightArg() instanceof Var)) { + return null; + } + Var left = (Var) compare.getLeftArg(); + Var right = (Var) compare.getRightArg(); + if (!isNamedVariable(left) || !isNamedVariable(right)) { + return null; + } + if (!visibleVariables.contains(left.getName()) || !visibleVariables.contains(right.getName())) { + return null; + } + return new LmdbLftjPlan.InequalityConstraint(left.getName(), right.getName()); + } + + private void collectReferencedVariables(ValueExpr condition, Set visibleVariables, + Set requiredVariables) { + condition.visit(new AbstractQueryModelVisitor() { + @Override + public void meet(Var node) { + if (isNamedVariable(node) && visibleVariables.contains(node.getName())) { + requiredVariables.add(node.getName()); + } + } + }); + } + private Map collectExtensionBindings(Extension extension, List visibleVariables) { if (extension == null) { return Map.of(); @@ -369,19 +506,29 @@ private TupleExpr rebuildJoin(List operands) { return rebuilt; } - private static final class FusionTarget { + private static final class PlanningTarget { private final TupleExpr root; private final List patterns; private final List outputBindings; private final List inequalityConstraints; + private final List filterRewrites; + private final Extension extension; + private final Projection projection; + private final LmdbLftjPlanningHints planningHints; - private FusionTarget(TupleExpr root, List patterns, + private PlanningTarget(TupleExpr root, List patterns, List outputBindings, - List inequalityConstraints) { + List inequalityConstraints, + List filterRewrites, Extension extension, Projection projection, + LmdbLftjPlanningHints planningHints) { this.root = root; this.patterns = List.copyOf(patterns); this.outputBindings = List.copyOf(outputBindings); this.inequalityConstraints = List.copyOf(inequalityConstraints); + this.filterRewrites = List.copyOf(filterRewrites); + this.extension = extension; + this.projection = projection; + this.planningHints = planningHints; } private TupleExpr root() { @@ -399,6 +546,47 @@ private List outputBindings() { private List inequalityConstraints() { return inequalityConstraints; } + + private LmdbLftjPlanningHints planningHints() { + return planningHints; + } + + private boolean fullyFused() { + return filterRewrites.stream().allMatch(filterRewrite -> filterRewrite.residualCondition() == null); + } + + private void rewriteFiltersInPlace() { + for (FilterRewrite filterRewrite : filterRewrites) { + if (filterRewrite.residualCondition() == null) { + filterRewrite.filter().replaceWith(filterRewrite.filter().getArg()); + continue; + } + filterRewrite.filter().setCondition(filterRewrite.residualCondition().clone()); + } + } + + private TupleExpr rebuild(LmdbLftjTupleExpr lftjNode) { + if (fullyFused()) { + return lftjNode; + } + TupleExpr rebuilt = lftjNode; + for (FilterRewrite filterRewrite : filterRewrites) { + if (filterRewrite.residualCondition() != null) { + rebuilt = new Filter(rebuilt, filterRewrite.residualCondition().clone()); + } + } + if (extension != null) { + Extension rebuiltExtension = extension.clone(); + rebuiltExtension.setArg(rebuilt); + rebuilt = rebuiltExtension; + } + if (projection != null) { + Projection rebuiltProjection = projection.clone(); + rebuiltProjection.setArg(rebuilt); + rebuilt = rebuiltProjection; + } + return rebuilt; + } } private static final class ExtractedPattern { @@ -419,4 +607,78 @@ private List inequalityConstraints() { return inequalityConstraints; } } + + private static final class FilterPartition { + private static final FilterPartition EMPTY = new FilterPartition(List.of(), List.of(), List.of()); + + private final List inequalityConstraints; + private final List filterRewrites; + private final List requiredVariables; + + private FilterPartition(List inequalityConstraints, + List filterRewrites, List requiredVariables) { + this.inequalityConstraints = List.copyOf(inequalityConstraints); + this.filterRewrites = List.copyOf(filterRewrites); + this.requiredVariables = List.copyOf(requiredVariables); + } + + private static FilterPartition empty() { + return EMPTY; + } + + private List inequalityConstraints() { + return inequalityConstraints; + } + + private List filterRewrites() { + return filterRewrites; + } + + private List requiredVariables() { + return requiredVariables; + } + } + + private static final class SingleFilterPartition { + private final List inequalityConstraints; + private final ValueExpr residualCondition; + private final List requiredVariables; + + private SingleFilterPartition(List inequalityConstraints, + ValueExpr residualCondition, List requiredVariables) { + this.inequalityConstraints = List.copyOf(inequalityConstraints); + this.residualCondition = residualCondition; + this.requiredVariables = List.copyOf(requiredVariables); + } + + private List inequalityConstraints() { + return inequalityConstraints; + } + + private ValueExpr residualCondition() { + return residualCondition; + } + + private List requiredVariables() { + return requiredVariables; + } + } + + private static final class FilterRewrite { + private final Filter filter; + private final ValueExpr residualCondition; + + private FilterRewrite(Filter filter, ValueExpr residualCondition) { + this.filter = filter; + this.residualCondition = residualCondition; + } + + private Filter filter() { + return filter; + } + + private ValueExpr residualCondition() { + return residualCondition; + } + } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java index cba2b275bf..3ab59cf65f 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlan.java @@ -17,8 +17,10 @@ import java.util.Set; import java.util.stream.Collectors; +import org.eclipse.rdf4j.common.annotation.Experimental; import org.eclipse.rdf4j.query.algebra.TupleExpr; +@Experimental public final class LmdbLftjPlan { private final TupleExpr fallbackExpr; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlanner.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlanner.java index f2592274bd..700d8de11a 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlanner.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlanner.java @@ -15,6 +15,7 @@ import java.util.Collection; import java.util.Comparator; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -40,6 +41,14 @@ PlanningResult plan(TupleExpr fallbackExpr, Collection pattern PlanningResult plan(TupleExpr fallbackExpr, Collection patterns, Set configuredIndexes, List outputBindings, List inequalityConstraints) { + return plan(fallbackExpr, patterns, configuredIndexes, outputBindings, inequalityConstraints, + LmdbLftjPlanningHints.empty()); + } + + PlanningResult plan(TupleExpr fallbackExpr, Collection patterns, Set configuredIndexes, + List outputBindings, + List inequalityConstraints, + LmdbLftjPlanningHints planningHints) { if (patterns.size() < 3) { return PlanningResult.rejected("too-few-patterns"); } @@ -70,7 +79,7 @@ PlanningResult plan(TupleExpr fallbackExpr, Collection pattern if (!supportsInequalities(inequalityConstraints, visibleVariables)) { return PlanningResult.rejected("unsupported-inequality"); } - PlanningCandidate candidate = chooseCandidate(patternList, configuredIndexes, visibleVariables); + PlanningCandidate candidate = chooseCandidate(patternList, configuredIndexes, visibleVariables, planningHints); if (candidate == null) { return PlanningResult.rejected("incompatible-index-order"); } @@ -228,7 +237,7 @@ private List collectVisibleVariables(List patterns) { } private PlanningCandidate chooseCandidate(List patterns, Set configuredIndexes, - List visibleVariables) { + List visibleVariables, LmdbLftjPlanningHints planningHints) { List indexes = configuredIndexes.stream().sorted().toList(); if (visibleVariables.size() <= 8) { List current = new ArrayList<>(visibleVariables.size()); @@ -236,7 +245,7 @@ private PlanningCandidate chooseCandidate(List patterns, Set greedyOrder = greedyVariableOrder(patterns, visibleVariables); + List greedyOrder = greedyVariableOrder(patterns, visibleVariables, planningHints); return evaluateCandidate(patterns, indexes, greedyOrder); } @@ -257,21 +266,141 @@ private PlanningCandidate permute(List patterns, List return best; } - private List greedyVariableOrder(List patterns, List visibleVariables) { + private List greedyVariableOrder(List patterns, List visibleVariables, + LmdbLftjPlanningHints planningHints) { Map occurrences = visibleVariables.stream() .collect(java.util.stream.Collectors.toMap(name -> name, name -> patterns.stream() .filter(pattern -> pattern.getVarList() .stream() - .anyMatch(var -> var != null && !var.hasValue() && !var.isAnonymous() - && name.equals(var.getName()))) + .anyMatch(var -> isVisibleVariable(var) && name.equals(var.getName()))) .count())); + Map> adjacency = collectAdjacency(patterns, visibleVariables); + Map degrees = new LinkedHashMap<>(); + for (String variable : visibleVariables) { + degrees.put(variable, adjacency.getOrDefault(variable, Set.of()).size()); + } + Set fixedInputVariables = new LinkedHashSet<>(planningHints.inputBoundVariables()); + fixedInputVariables.retainAll(Set.copyOf(visibleVariables)); + Set residualFilterVariables = new LinkedHashSet<>(planningHints.residualFilterVariables()); + residualFilterVariables.retainAll(Set.copyOf(visibleVariables)); + Set leafVariables = collectConstantPredicateLeafVariables(patterns, degrees); + Set residualLeafVariables = new LinkedHashSet<>(leafVariables); + residualLeafVariables.retainAll(residualFilterVariables); + Set otherLeafVariables = new LinkedHashSet<>(leafVariables); + otherLeafVariables.removeAll(residualLeafVariables); + int maxDegree = degrees.values().stream().mapToInt(Integer::intValue).max().orElse(0); + Set anchorVariables = new LinkedHashSet<>(); + for (String variable : visibleVariables) { + if (fixedInputVariables.contains(variable)) { + continue; + } + if (degrees.getOrDefault(variable, 0) == maxDegree + || isAdjacentToAny(variable, fixedInputVariables, adjacency)) { + anchorVariables.add(variable); + } + } + Map fixedInputRank = toRankMap(planningHints.inputBoundVariables()); List ordered = new ArrayList<>(visibleVariables); - ordered.sort(Comparator.comparing(occurrences::get) - .reversed() + ordered.sort(Comparator.comparingInt(variable -> variableBucket(variable, fixedInputVariables, + anchorVariables, residualLeafVariables, otherLeafVariables)) + .thenComparingInt(variable -> fixedInputRank.getOrDefault(variable, Integer.MAX_VALUE)) + .thenComparing(Comparator.comparing(degrees::get).reversed()) + .thenComparing(Comparator.comparing(occurrences::get).reversed()) .thenComparing(Comparator.naturalOrder())); return ordered; } + private int variableBucket(String variable, Set fixedInputVariables, Set anchorVariables, + Set residualLeafVariables, Set otherLeafVariables) { + if (fixedInputVariables.contains(variable)) { + return 0; + } + if (anchorVariables.contains(variable)) { + return 1; + } + if (residualLeafVariables.contains(variable)) { + return 2; + } + if (otherLeafVariables.contains(variable)) { + return 3; + } + return 4; + } + + private Map> collectAdjacency(List patterns, List visibleVariables) { + Map> adjacency = new LinkedHashMap<>(); + for (String visibleVariable : visibleVariables) { + adjacency.put(visibleVariable, new LinkedHashSet<>()); + } + for (StatementPattern pattern : patterns) { + List patternVariables = collectPatternVariables(pattern); + for (int i = 0; i < patternVariables.size(); i++) { + for (int j = i + 1; j < patternVariables.size(); j++) { + adjacency.get(patternVariables.get(i)).add(patternVariables.get(j)); + adjacency.get(patternVariables.get(j)).add(patternVariables.get(i)); + } + } + } + return adjacency; + } + + private Set collectConstantPredicateLeafVariables(List patterns, + Map degrees) { + Set leafVariables = new LinkedHashSet<>(); + for (StatementPattern pattern : patterns) { + if (!pattern.getPredicateVar().hasValue()) { + continue; + } + List patternVariables = collectPatternVariables(pattern); + if (patternVariables.size() != 2) { + continue; + } + String left = patternVariables.get(0); + String right = patternVariables.get(1); + int leftDegree = degrees.getOrDefault(left, 0); + int rightDegree = degrees.getOrDefault(right, 0); + if (leftDegree == 1 && rightDegree > leftDegree) { + leafVariables.add(left); + } + if (rightDegree == 1 && leftDegree > rightDegree) { + leafVariables.add(right); + } + } + return leafVariables; + } + + private List collectPatternVariables(StatementPattern pattern) { + LinkedHashSet variables = new LinkedHashSet<>(); + for (Var var : pattern.getVarList()) { + if (isVisibleVariable(var)) { + variables.add(var.getName()); + } + } + return List.copyOf(variables); + } + + private boolean isVisibleVariable(Var var) { + return var != null && !var.hasValue() && !var.isAnonymous() && var.getName() != null; + } + + private boolean isAdjacentToAny(String variable, Set neighbors, Map> adjacency) { + Set variableNeighbors = adjacency.getOrDefault(variable, Set.of()); + for (String neighbor : neighbors) { + if (variableNeighbors.contains(neighbor)) { + return true; + } + } + return false; + } + + private Map toRankMap(List variables) { + Map ranks = new LinkedHashMap<>(); + for (int i = 0; i < variables.size(); i++) { + ranks.putIfAbsent(variables.get(i), i); + } + return ranks; + } + private PlanningCandidate evaluateCandidate(List patterns, List indexes, List variableOrder) { List indexNames = new ArrayList<>(patterns.size()); diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlanningHints.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlanningHints.java new file mode 100644 index 0000000000..9e9716c904 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlanningHints.java @@ -0,0 +1,51 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import java.util.LinkedHashSet; +import java.util.List; + +final class LmdbLftjPlanningHints { + + private static final LmdbLftjPlanningHints EMPTY = new LmdbLftjPlanningHints(List.of(), List.of()); + + private final List inputBoundVariables; + private final List residualFilterVariables; + + private LmdbLftjPlanningHints(List inputBoundVariables, List residualFilterVariables) { + this.inputBoundVariables = dedupe(inputBoundVariables); + this.residualFilterVariables = dedupe(residualFilterVariables); + } + + static LmdbLftjPlanningHints empty() { + return EMPTY; + } + + static LmdbLftjPlanningHints of(List inputBoundVariables, List residualFilterVariables) { + if (inputBoundVariables.isEmpty() && residualFilterVariables.isEmpty()) { + return EMPTY; + } + return new LmdbLftjPlanningHints(inputBoundVariables, residualFilterVariables); + } + + List inputBoundVariables() { + return inputBoundVariables; + } + + List residualFilterVariables() { + return residualFilterVariables; + } + + private static List dedupe(List variables) { + return List.copyOf(new LinkedHashSet<>(variables)); + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTieredCodegenCompiler.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTieredCodegenCompiler.java index d27f67e79a..e279c0de81 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTieredCodegenCompiler.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTieredCodegenCompiler.java @@ -26,4 +26,10 @@ String cacheKey(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean include LmdbCompiledLftjFactory compile(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred) { return full.compile(plan, shape, includeInferred); } + + @Override + LmdbCompiledLftjFactory compile(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred, + LmdbQueryAccess queryAccess) { + return full.compile(plan, shape, includeInferred, queryAccess); + } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTupleExpr.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTupleExpr.java index 62943bed16..1028ac3bbd 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTupleExpr.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjTupleExpr.java @@ -13,6 +13,7 @@ import java.util.Objects; import java.util.Set; +import java.util.stream.Collectors; import org.eclipse.rdf4j.query.algebra.AbstractQueryModelNode; import org.eclipse.rdf4j.query.algebra.QueryModelNode; @@ -60,7 +61,15 @@ public void replaceChildNode(QueryModelNode current, QueryModelNode replacement) public String getSignature() { return super.getSignature() + "[varOrder=" + String.join(",", plan.variableOrder()) + "; patterns=" + plan.patternCount() - + "; indexes=" + String.join(",", plan.indexNames()) + "]"; + + "; indexes=" + String.join(",", plan.indexNames()) + + "; inequalities=" + formatInequalities() + "]"; + } + + private String formatInequalities() { + return plan.inequalityConstraints() + .stream() + .map(inequality -> inequality.leftVariable() + "!=" + inequality.rightVariable()) + .collect(Collectors.joining(",", "[", "]")); } @Override diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java index 198680d391..20b51eeac1 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbQueryAccess.java @@ -13,8 +13,10 @@ import java.util.Set; +import org.eclipse.rdf4j.common.annotation.Experimental; import org.eclipse.rdf4j.model.Value; +@Experimental public interface LmdbQueryAccess { TripleStore tripleStore(); diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfig.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfig.java index 5d9d467904..f9a61f3a69 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfig.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfig.java @@ -13,6 +13,7 @@ import java.time.Duration; +import org.eclipse.rdf4j.common.annotation.Experimental; import org.eclipse.rdf4j.model.Model; import org.eclipse.rdf4j.model.Resource; import org.eclipse.rdf4j.model.ValueFactory; @@ -217,19 +218,23 @@ public LmdbStoreConfig setPageCardinalityEstimator(boolean pageCardinalityEstima return this; } + @Experimental public boolean isLftjEnabled() { return lftjEnabled; } + @Experimental public LmdbStoreConfig setLftjEnabled(boolean lftjEnabled) { this.lftjEnabled = lftjEnabled; return this; } + @Experimental public boolean isLftjCodegenEnabled() { return lftjCodegenEnabled; } + @Experimental public LmdbStoreConfig setLftjCodegenEnabled(boolean lftjCodegenEnabled) { this.lftjCodegenEnabled = lftjCodegenEnabled; return this; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreSchema.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreSchema.java index 50c08e3af9..ca347243e8 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreSchema.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreSchema.java @@ -11,6 +11,7 @@ // Some portions generated by Codex package org.eclipse.rdf4j.sail.lmdb.config; +import org.eclipse.rdf4j.common.annotation.Experimental; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.ValueFactory; import org.eclipse.rdf4j.model.impl.SimpleValueFactory; @@ -85,11 +86,13 @@ public class LmdbStoreSchema { /** * http://rdf4j.org/config/sail/lmdb#lftjEnabled */ + @Experimental public final static IRI LFTJ_ENABLED; /** * http://rdf4j.org/config/sail/lmdb#lftjCodegenEnabled */ + @Experimental public final static IRI LFTJ_CODEGEN_ENABLED; /** diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java index 95a6bbe815..902a76e81e 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java @@ -34,18 +34,30 @@ import org.apache.commons.io.FileUtils; import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.common.iteration.CloseableIteratorIteration; +import org.eclipse.rdf4j.common.iteration.EmptyIteration; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.eclipse.rdf4j.model.vocabulary.FOAF; import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.Dataset; +import org.eclipse.rdf4j.query.QueryEvaluationException; +import org.eclipse.rdf4j.query.QueryLanguage; +import org.eclipse.rdf4j.query.algebra.QueryModelNode; +import org.eclipse.rdf4j.query.algebra.QueryRoot; import org.eclipse.rdf4j.query.algebra.SingletonSet; import org.eclipse.rdf4j.query.algebra.TupleExpr; import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet; import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource; import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; +import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor; import org.eclipse.rdf4j.query.explanation.Explanation; import org.eclipse.rdf4j.query.impl.EmptyBindingSet; +import org.eclipse.rdf4j.query.parser.ParsedTupleQuery; +import org.eclipse.rdf4j.query.parser.QueryParserUtil; import org.eclipse.rdf4j.repository.sail.SailRepository; import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; import org.eclipse.rdf4j.sail.SailException; @@ -249,6 +261,36 @@ void fullCompiledRealStoreCycle5ShouldInlineAllInequalityGuards() throws Excepti } } + @Test + void fullCompiledRealStoreShouldInlineResolvedIdsForQueryConstants() throws Exception { + try (FullCodegenFixture fixture = new FullCodegenFixture(5, LmdbLftjFullCodegenCompiler.INSTANCE)) { + String query = "PREFIX foaf: \n" + + "SELECT * WHERE {\n" + + " foaf:knows ?b .\n" + + " ?b foaf:knows ?c .\n" + + " ?c foaf:knows ?d .\n" + + " ?d foaf:knows ?b .\n" + + " ?c foaf:knows .\n" + + " FILTER (?b != ?c && ?b != ?d && ?c != ?d)\n" + + "}\n"; + LmdbQueryAccess queryAccess = fixture.connection.benchmarkQueryAccess(false); + LmdbLftjPlan plan = optimizedPlan(query, queryAccess); + long person1Id = queryAccess.resolveId(FullCodegenFixture.person(1)); + long person2Id = queryAccess.resolveId(FullCodegenFixture.person(2)); + long knowsId = queryAccess.resolveId(FOAF.KNOWS); + String source = sourceForPlan(plan, queryAccess, LmdbLftjFullCodegenCompiler.INSTANCE); + + assertThat(person1Id).isPositive(); + assertThat(person2Id).isPositive(); + assertThat(knowsId).isPositive(); + assertThat(source) + .contains(person1Id + "L") + .contains(person2Id + "L") + .contains(knowsId + "L") + .doesNotContain("state().fixedIdForComponent("); + } + } + @Test void fullCompiledFactoryShouldProduceSameRowsAsExecutorFactoryForRealStoreCycle5() throws Exception { try (FullCodegenFixture executorFixture = new FullCodegenFixture(5, LmdbLftjCodegenCompiler.INSTANCE); @@ -990,6 +1032,37 @@ private List executeCompiledPlanRows(FullCodegenFixture fixture, LmdbLft } } + private LmdbLftjPlan optimizedPlan(String query, LmdbQueryAccess queryAccess) throws Exception { + TupleExpr tupleExpr = parsedQueryRoot(query); + LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer( + new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess)); + optimizer.optimize(tupleExpr, (Dataset) null, EmptyBindingSet.getInstance()); + LmdbLftjTupleExpr lftj = findNode(tupleExpr, LmdbLftjTupleExpr.class); + assertThat(lftj).isNotNull(); + return lftj.plan().copy(); + } + + private TupleExpr parsedQueryRoot(String query) throws Exception { + ParsedTupleQuery parsed = QueryParserUtil.parseTupleQuery(QueryLanguage.SPARQL, query, null); + TupleExpr tupleExpr = parsed.getTupleExpr().clone(); + if (!(tupleExpr instanceof QueryRoot)) { + tupleExpr = new QueryRoot(tupleExpr); + } + return tupleExpr; + } + + private String sourceForPlan(LmdbLftjPlan plan, LmdbQueryAccess queryAccess, LmdbLftjCodegenCompiler compiler) + throws Exception { + LmdbLftjExecutionShape shape = new LmdbLftjExecutionShape(plan); + Method sourceFor = findMethod(compiler.getClass(), "sourceFor", LmdbLftjPlan.class, + LmdbLftjExecutionShape.class, + boolean.class, LmdbQueryAccess.class); + if (sourceFor != null) { + return (String) sourceFor.invoke(compiler, plan, shape, false, queryAccess); + } + return compiler.sourceFor(plan, shape, false); + } + @SuppressWarnings("unchecked") private String sourceForPreparedPlan(LmdbBenchmarkStore store, LmdbLftjCodegenCompiler compiler) throws Exception { LmdbLftjPlan plan = preparedPlan(store); @@ -1013,6 +1086,34 @@ private LmdbLftjPlan preparedPlan(LmdbBenchmarkStore store) throws Exception { .orElseThrow(); } + private Method findMethod(Class type, String name, Class... parameterTypes) { + Class current = type; + while (current != null) { + try { + Method method = current.getDeclaredMethod(name, parameterTypes); + method.setAccessible(true); + return method; + } catch (NoSuchMethodException e) { + current = current.getSuperclass(); + } + } + return null; + } + + private T findNode(TupleExpr tupleExpr, Class type) { + QueryModelNode[] result = new QueryModelNode[1]; + tupleExpr.visit(new AbstractQueryModelVisitor() { + @Override + protected void meetNode(QueryModelNode node) { + if (result[0] == null && type.isInstance(node)) { + result[0] = node; + } + super.meetNode(node); + } + }); + return type.cast(result[0]); + } + private Object readField(Object target, String name) throws Exception { Field field = target.getClass().getDeclaredField(name); field.setAccessible(true); @@ -1025,6 +1126,20 @@ private boolean isInitializedLmdbValue(Value value) throws Exception { return field.getBoolean(value); } + private static final class EmptyTripleSource implements TripleSource { + + @Override + public CloseableIteration getStatements(Resource subj, IRI pred, Value obj, + Resource... contexts) throws QueryEvaluationException { + return new EmptyIteration<>(); + } + + @Override + public org.eclipse.rdf4j.model.ValueFactory getValueFactory() { + return SimpleValueFactory.getInstance(); + } + } + private static final class InterpretedQueryAccess extends LmdbLftjSyntheticScenario.TestQueryAccess { private InterpretedQueryAccess() { diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java index 2a0d6f01c8..9e6446a006 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java @@ -14,8 +14,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -281,7 +283,7 @@ void optimizeShouldLeaveChainedAliasExtensionOutsideFusedOutputs() throws Except } @Test - void optimizeShouldKeepDistinctAndOrderOutsideFusedCycleCore() throws Exception { + void optimizeShouldFuseSupportedInequalitiesFromMixedOuterFilterForCycle3DistinctCityOrdered() throws Exception { TestQueryAccess queryAccess = new TestQueryAccess(); LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer( new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess)); @@ -295,8 +297,13 @@ void optimizeShouldKeepDistinctAndOrderOutsideFusedCycleCore() throws Exception assertInstanceOf(BindingSetAssignment.class, findNode(tupleExpr, BindingSetAssignment.class)); assertInstanceOf(Distinct.class, findNode(tupleExpr, Distinct.class)); assertInstanceOf(Order.class, findNode(tupleExpr, Order.class)); - assertInstanceOf(Filter.class, findNode(tupleExpr, Filter.class)); - assertEquals(List.of(), lftj.plan().inequalityConstraints()); + Filter residualFilter = assertInstanceOf(Filter.class, findNode(tupleExpr, Filter.class)); + assertFilterDoesNotContainInequalities(residualFilter); + assertEquals(List.of( + new LmdbLftjPlan.OutputBinding("a", "a"), + new LmdbLftjPlan.OutputBinding("aLabel", "aLabel"), + new LmdbLftjPlan.OutputBinding("cityLabel", "cityLabel")), lftj.plan().outputBindings()); + assertEquals(completeInequalities(3), lftj.plan().inequalityConstraints()); } @Test @@ -315,7 +322,37 @@ void optimizeShouldKeepGroupingOutsideFusedCycleCore() throws Exception { assertInstanceOf(Group.class, findNode(tupleExpr, Group.class)); assertInstanceOf(Order.class, findNode(tupleExpr, Order.class)); assertInstanceOf(Filter.class, findNode(tupleExpr, Filter.class)); - assertEquals(List.of(), lftj.plan().inequalityConstraints()); + assertEquals(completeInequalities(3), lftj.plan().inequalityConstraints()); + } + + @Test + void optimizeShouldPreferValuesAndLeafVarsForCycle5ValuesDistinctMailboxOrdered() throws Exception { + TestQueryAccess queryAccess = new TestQueryAccess(); + LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer( + new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess)); + + TupleExpr tupleExpr = parsedQueryRoot( + FoafCliqueQueryCatalog.QueryScenario.CYCLE5_VALUES_DISTINCT_MAILBOX_ORDERED.query()); + + optimizer.optimize(tupleExpr, (Dataset) null, EmptyBindingSet.getInstance()); + + LmdbLftjTupleExpr lftj = findNode(tupleExpr, LmdbLftjTupleExpr.class); + assertInstanceOf(BindingSetAssignment.class, findNode(tupleExpr, BindingSetAssignment.class)); + assertInstanceOf(Distinct.class, findNode(tupleExpr, Distinct.class)); + assertInstanceOf(Order.class, findNode(tupleExpr, Order.class)); + Filter residualFilter = assertInstanceOf(Filter.class, findNode(tupleExpr, Filter.class)); + assertFilterDoesNotContainInequalities(residualFilter); + assertEquals(List.of( + new LmdbLftjPlan.OutputBinding("a", "a"), + new LmdbLftjPlan.OutputBinding("aLabel", "aLabel"), + new LmdbLftjPlan.OutputBinding("homepage", "homepage"), + new LmdbLftjPlan.OutputBinding("mbox", "mbox")), lftj.plan().outputBindings()); + assertEquals(completeInequalities(5), lftj.plan().inequalityConstraints()); + + List variableOrder = lftj.plan().variableOrder(); + assertEquals(List.of("city", "a"), variableOrder.subList(0, 2)); + assertLeafVarsPrecedeCycleTail(variableOrder, List.of("mbox", "aLabel", "homepage"), + List.of("b", "c", "d", "e")); } private TupleExpr cycle(String a, String b, String c) { @@ -459,6 +496,49 @@ protected void meetNode(QueryModelNode node) { return type.cast(result[0]); } + private List completeInequalities(int size) { + List inequalities = new java.util.ArrayList<>(); + for (int i = 0; i < size; i++) { + for (int j = i + 1; j < size; j++) { + inequalities.add(new LmdbLftjPlan.InequalityConstraint(variableName(i), variableName(j))); + } + } + return inequalities; + } + + private String variableName(int index) { + return String.valueOf((char) ('a' + index)); + } + + private void assertLeafVarsPrecedeCycleTail(List variableOrder, List leafVars, + List cycleTail) { + Map positions = new LinkedHashMap<>(); + for (int i = 0; i < variableOrder.size(); i++) { + positions.put(variableOrder.get(i), i); + } + for (String leafVar : leafVars) { + for (String cycleVar : cycleTail) { + assertTrue(positions.get(leafVar) < positions.get(cycleVar), + () -> "expected " + leafVar + " before " + cycleVar + " in " + variableOrder); + } + } + } + + private void assertFilterDoesNotContainInequalities(Filter filter) { + List inequalities = new java.util.ArrayList<>(); + filter.getCondition().visit(new AbstractQueryModelVisitor() { + @Override + public void meet(Compare node) { + if (node.getOperator() == Compare.CompareOp.NE) { + inequalities.add(node); + } + super.meet(node); + } + }); + assertEquals(List.of(), inequalities, + "supported != conjuncts should be removed from the residual outer filter"); + } + private static final class EmptyTripleSource implements TripleSource { @Override diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlannerTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlannerTest.java new file mode 100644 index 0000000000..da8c92447f --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPlannerTest.java @@ -0,0 +1,108 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.query.algebra.Join; +import org.eclipse.rdf4j.query.algebra.StatementPattern; +import org.eclipse.rdf4j.query.algebra.TupleExpr; +import org.eclipse.rdf4j.query.algebra.Var; +import org.junit.jupiter.api.Test; + +class LmdbLftjPlannerTest { + + private static final SimpleValueFactory VF = SimpleValueFactory.getInstance(); + private static final Set CONFIGURED_INDEXES = Set.of("spoc", "sopc", "psoc", "posc", "ospc", "opsc"); + + @Test + void planShouldPrioritizeBoundVarsAnchorsAndLeafPredicatesForLargeQueries() { + LmdbLftjPlanner planner = new LmdbLftjPlanner(); + List patterns = cycle5WithAnchoredLeaves(); + TupleExpr fallbackExpr = rebuildJoin(new ArrayList<>(patterns)); + + LmdbLftjPlanner.PlanningResult result = planner.plan(fallbackExpr, patterns, CONFIGURED_INDEXES, + List.of( + new LmdbLftjPlan.OutputBinding("a", "a"), + new LmdbLftjPlan.OutputBinding("aLabel", "aLabel"), + new LmdbLftjPlan.OutputBinding("homepage", "homepage"), + new LmdbLftjPlan.OutputBinding("mbox", "mbox")), + completeInequalities(5), + LmdbLftjPlanningHints.of(List.of("city"), List.of("aLabel", "mbox"))); + + assertTrue(result.planned()); + List variableOrder = result.plan().variableOrder(); + assertEquals(List.of("city", "a"), variableOrder.subList(0, 2)); + assertLeafVarsPrecedeCycleTail(variableOrder, List.of("mbox", "aLabel", "homepage"), + List.of("b", "c", "d", "e")); + } + + private List cycle5WithAnchoredLeaves() { + return List.of( + statementPattern("a", "knows", "b"), + statementPattern("b", "knows", "c"), + statementPattern("c", "knows", "d"), + statementPattern("d", "knows", "e"), + statementPattern("e", "knows", "a"), + statementPattern("a", "basedNear", "city"), + statementPattern("a", "label", "aLabel"), + statementPattern("a", "mbox", "mbox"), + statementPattern("a", "homepage", "homepage")); + } + + private StatementPattern statementPattern(String subjectName, String predicateLocalName, String objectName) { + return new StatementPattern(new Var(subjectName), new Var("p", VF.createIRI("urn:test:" + predicateLocalName)), + new Var(objectName)); + } + + private TupleExpr rebuildJoin(List operands) { + TupleExpr rebuilt = operands.get(0); + for (int i = 1; i < operands.size(); i++) { + rebuilt = new Join(rebuilt, operands.get(i)); + } + return rebuilt; + } + + private List completeInequalities(int size) { + List inequalities = new ArrayList<>(); + for (int i = 0; i < size; i++) { + for (int j = i + 1; j < size; j++) { + inequalities.add(new LmdbLftjPlan.InequalityConstraint(variableName(i), variableName(j))); + } + } + return inequalities; + } + + private String variableName(int index) { + return String.valueOf((char) ('a' + index)); + } + + private void assertLeafVarsPrecedeCycleTail(List variableOrder, List leafVars, + List cycleTail) { + int latestLeafIndex = leafVars.stream() + .mapToInt(variableOrder::indexOf) + .max() + .orElseThrow(); + int earliestCycleTailIndex = cycleTail.stream() + .mapToInt(variableOrder::indexOf) + .min() + .orElseThrow(); + assertTrue(latestLeafIndex < earliestCycleTailIndex, + () -> "expected leaf vars before cycle tail but got " + variableOrder); + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java index 8b15e0bef0..9f27cbf9c2 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java @@ -37,6 +37,7 @@ import org.eclipse.rdf4j.repository.sail.SailRepository; import org.eclipse.rdf4j.sail.SailException; import org.eclipse.rdf4j.sail.base.SailDataset; +import org.eclipse.rdf4j.sail.lmdb.benchmark.FoafCliqueQueryCatalog; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; @@ -343,6 +344,26 @@ public void testExplainOptimizedUsesStableCycleIndexOrder(@TempDir File dataDir) } } + @Test + public void testExplainOptimizedShowsMixedCycleInequalitiesAndValuesAwareOrder(@TempDir File dataDir) { + LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); + Repository repository = createRepository(dataDir, config, conn -> { + }); + + try (RepositoryConnection connection = repository.getConnection()) { + String actualPlan = connection.prepareTupleQuery( + FoafCliqueQueryCatalog.QueryScenario.CYCLE5_VALUES_DISTINCT_MAILBOX_ORDERED.query()) + .explain(Explanation.Level.Optimized) + .toString(); + assertTrue(actualPlan, actualPlan.contains("LmdbLftjTupleExpr")); + assertTrue(actualPlan, actualPlan.contains("varOrder=city,a,")); + assertTrue(actualPlan, actualPlan.contains( + "inequalities=[a!=b,a!=c,a!=d,a!=e,b!=c,b!=d,b!=e,c!=d,c!=e,d!=e]")); + } finally { + repository.shutDown(); + } + } + @Test public void testCyclicQueryMatchesResultsWhenLftjActivates(@TempDir File disabledDir, @TempDir File enabledDir) { LmdbStoreConfig disabled = new LmdbStoreConfig("spoc,posc"); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java index ffac0df70e..572d339e60 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java @@ -18,6 +18,7 @@ import java.util.concurrent.TimeUnit; import org.apache.commons.io.FileUtils; +import org.eclipse.rdf4j.model.Literal; import org.eclipse.rdf4j.repository.sail.SailRepository; import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; import org.eclipse.rdf4j.sail.lmdb.LmdbBenchmarkStore; @@ -50,6 +51,7 @@ public class FoafCliqueQueryBenchmark { public static final String LFTJ_DISABLED = "disabled"; + private static final String RESULT_COUNT_BINDING = "resultCount"; @Param({ "5000" }) public int peopleCount; @@ -133,6 +135,11 @@ public long cycle5() { return executeCount(QueryScenario.CYCLE5.query()); } + @Benchmark + public long cycle3CountCityInterest() { + return executeAggregateCount(QueryScenario.CYCLE3_COUNT_CITY_INTEREST.query()); + } + @Benchmark public long cycle3DistinctCityOrdered() { return executeCount(QueryScenario.CYCLE3_DISTINCT_CITY_ORDERED.query()); @@ -148,6 +155,11 @@ public long cycle3GroupedInterest() { return executeCount(QueryScenario.CYCLE3_GROUPED_INTEREST.query()); } + @Benchmark + public long cycle5ValuesCountMailboxHomepage() { + return executeAggregateCount(QueryScenario.CYCLE5_VALUES_COUNT_MAILBOX_HOMEPAGE.query()); + } + @Benchmark public long cycle5ValuesDistinctMailboxOrdered() { return executeCount(QueryScenario.CYCLE5_VALUES_DISTINCT_MAILBOX_ORDERED.query()); @@ -161,12 +173,16 @@ public long executeScenario(QueryScenario scenario) { return cycle4(); case CYCLE5: return cycle5(); + case CYCLE3_COUNT_CITY_INTEREST: + return cycle3CountCityInterest(); case CYCLE3_DISTINCT_CITY_ORDERED: return cycle3DistinctCityOrdered(); case CYCLE4_VALUES_FILTERED_ORDERED: return cycle4ValuesFilteredOrdered(); case CYCLE3_GROUPED_INTEREST: return cycle3GroupedInterest(); + case CYCLE5_VALUES_COUNT_MAILBOX_HOMEPAGE: + return cycle5ValuesCountMailboxHomepage(); case CYCLE5_VALUES_DISTINCT_MAILBOX_ORDERED: return cycle5ValuesDistinctMailboxOrdered(); default: @@ -180,6 +196,18 @@ private long executeCount(String query) { } } + private long executeAggregateCount(String query) { + try (SailRepositoryConnection connection = repository.getConnection()) { + return connection.prepareTupleQuery(query) + .evaluate() + .stream() + .findFirst() + .map(bindingSet -> (Literal) bindingSet.getValue(RESULT_COUNT_BINDING)) + .map(Literal::longValue) + .orElseThrow(() -> new IllegalStateException("Missing aggregate count result")); + } + } + static LmdbStoreConfig createLftjBenchmarkConfig(String benchmarkMode) { LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); boolean lftjEnabled = !LFTJ_DISABLED.equals(benchmarkMode); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md index 228050acdf..9df93e6c2e 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md @@ -24,28 +24,82 @@ FoafCliqueQueryBenchmark.cycle3 interpreted FoafCliqueQueryBenchmark.cycle3 executor_codegen 30 8 3 5000 15000 12345 avgt 5 18.401 ± 1.086 ms/op FoafCliqueQueryBenchmark.cycle3 full_codegen 30 8 3 5000 15000 12345 avgt 5 14.880 ± 1.150 ms/op FoafCliqueQueryBenchmark.cycle3 disabled 30 8 3 5000 15000 12345 avgt 5 101.933 ± 4.997 ms/op + FoafCliqueQueryBenchmark.cycle3DistinctCityOrdered interpreted 30 8 3 5000 15000 12345 avgt 5 146.240 ± 9.376 ms/op FoafCliqueQueryBenchmark.cycle3DistinctCityOrdered executor_codegen 30 8 3 5000 15000 12345 avgt 5 144.603 ± 27.824 ms/op FoafCliqueQueryBenchmark.cycle3DistinctCityOrdered full_codegen 30 8 3 5000 15000 12345 avgt 5 110.024 ± 13.207 ms/op FoafCliqueQueryBenchmark.cycle3DistinctCityOrdered disabled 30 8 3 5000 15000 12345 avgt 5 102.194 ± 7.423 ms/op + FoafCliqueQueryBenchmark.cycle3GroupedInterest interpreted 30 8 3 5000 15000 12345 avgt 5 54.924 ± 6.028 ms/op FoafCliqueQueryBenchmark.cycle3GroupedInterest executor_codegen 30 8 3 5000 15000 12345 avgt 5 55.310 ± 5.366 ms/op FoafCliqueQueryBenchmark.cycle3GroupedInterest full_codegen 30 8 3 5000 15000 12345 avgt 5 43.784 ± 0.896 ms/op FoafCliqueQueryBenchmark.cycle3GroupedInterest disabled 30 8 3 5000 15000 12345 avgt 5 75.099 ± 1.030 ms/op + FoafCliqueQueryBenchmark.cycle4 interpreted 30 8 3 5000 15000 12345 avgt 5 88.792 ± 4.729 ms/op FoafCliqueQueryBenchmark.cycle4 executor_codegen 30 8 3 5000 15000 12345 avgt 5 64.010 ± 1.059 ms/op FoafCliqueQueryBenchmark.cycle4 full_codegen 30 8 3 5000 15000 12345 avgt 5 60.457 ± 2.042 ms/op FoafCliqueQueryBenchmark.cycle4 disabled 30 8 3 5000 15000 12345 avgt 5 670.650 ± 7.958 ms/op + FoafCliqueQueryBenchmark.cycle4ValuesFilteredOrdered interpreted 30 8 3 5000 15000 12345 avgt 5 247.547 ± 20.613 ms/op FoafCliqueQueryBenchmark.cycle4ValuesFilteredOrdered executor_codegen 30 8 3 5000 15000 12345 avgt 5 224.290 ± 31.904 ms/op FoafCliqueQueryBenchmark.cycle4ValuesFilteredOrdered full_codegen 30 8 3 5000 15000 12345 avgt 5 188.459 ± 29.016 ms/op FoafCliqueQueryBenchmark.cycle4ValuesFilteredOrdered disabled 30 8 3 5000 15000 12345 avgt 5 298.370 ± 15.842 ms/op + FoafCliqueQueryBenchmark.cycle5 interpreted 30 8 3 5000 15000 12345 avgt 5 481.559 ± 33.314 ms/op FoafCliqueQueryBenchmark.cycle5 executor_codegen 30 8 3 5000 15000 12345 avgt 5 324.419 ± 51.822 ms/op FoafCliqueQueryBenchmark.cycle5 full_codegen 30 8 3 5000 15000 12345 avgt 5 268.049 ± 4.831 ms/op FoafCliqueQueryBenchmark.cycle5 disabled 30 8 3 5000 15000 12345 avgt 5 4189.973 ± 33.778 ms/op + FoafCliqueQueryBenchmark.cycle5ValuesDistinctMailboxOrdered interpreted 30 8 3 5000 15000 12345 avgt 5 3545.949 ± 193.672 ms/op FoafCliqueQueryBenchmark.cycle5ValuesDistinctMailboxOrdered executor_codegen 30 8 3 5000 15000 12345 avgt 5 2919.335 ± 32.839 ms/op FoafCliqueQueryBenchmark.cycle5ValuesDistinctMailboxOrdered full_codegen 30 8 3 5000 15000 12345 avgt 5 2135.299 ± 45.432 ms/op FoafCliqueQueryBenchmark.cycle5ValuesDistinctMailboxOrdered disabled 30 8 3 5000 15000 12345 avgt 5 2163.481 ± 82.503 ms/op ``` + +``` +Benchmark (benchmarkMode) (cliquePercentage) (maxCliqueSize) (minCliqueSize) (peopleCount) (randomKnowsEdges) (seed) Mode Cnt Score Error Units +FoafCliqueQueryBenchmark.cycle3 interpreted 30 8 3 5000 15000 12345 avgt 5 19.139 ± 0.249 ms/op +FoafCliqueQueryBenchmark.cycle3 executor_codegen 30 8 3 5000 15000 12345 avgt 5 18.083 ± 1.050 ms/op +FoafCliqueQueryBenchmark.cycle3 full_codegen 30 8 3 5000 15000 12345 avgt 5 14.350 ± 0.234 ms/op +FoafCliqueQueryBenchmark.cycle3 disabled 30 8 3 5000 15000 12345 avgt 5 101.270 ± 2.698 ms/op + +FoafCliqueQueryBenchmark.cycle3CountCityInterest interpreted 30 8 3 5000 15000 12345 avgt 5 63.494 ± 5.929 ms/op +FoafCliqueQueryBenchmark.cycle3CountCityInterest executor_codegen 30 8 3 5000 15000 12345 avgt 5 56.843 ± 4.770 ms/op +FoafCliqueQueryBenchmark.cycle3CountCityInterest full_codegen 30 8 3 5000 15000 12345 avgt 5 49.831 ± 1.542 ms/op +FoafCliqueQueryBenchmark.cycle3CountCityInterest disabled 30 8 3 5000 15000 12345 avgt 5 76.904 ± 1.780 ms/op + +FoafCliqueQueryBenchmark.cycle3DistinctCityOrdered interpreted 30 8 3 5000 15000 12345 avgt 5 150.576 ± 5.361 ms/op +FoafCliqueQueryBenchmark.cycle3DistinctCityOrdered executor_codegen 30 8 3 5000 15000 12345 avgt 5 128.802 ± 5.240 ms/op +FoafCliqueQueryBenchmark.cycle3DistinctCityOrdered full_codegen 30 8 3 5000 15000 12345 avgt 5 96.154 ± 4.793 ms/op +FoafCliqueQueryBenchmark.cycle3DistinctCityOrdered disabled 30 8 3 5000 15000 12345 avgt 5 100.160 ± 2.329 ms/op + +FoafCliqueQueryBenchmark.cycle3GroupedInterest interpreted 30 8 3 5000 15000 12345 avgt 5 51.695 ± 3.045 ms/op +FoafCliqueQueryBenchmark.cycle3GroupedInterest executor_codegen 30 8 3 5000 15000 12345 avgt 5 50.501 ± 2.451 ms/op +FoafCliqueQueryBenchmark.cycle3GroupedInterest full_codegen 30 8 3 5000 15000 12345 avgt 5 40.283 ± 2.956 ms/op +FoafCliqueQueryBenchmark.cycle3GroupedInterest disabled 30 8 3 5000 15000 12345 avgt 5 75.723 ± 2.881 ms/op + +FoafCliqueQueryBenchmark.cycle4 interpreted 30 8 3 5000 15000 12345 avgt 5 83.653 ± 3.626 ms/op +FoafCliqueQueryBenchmark.cycle4 executor_codegen 30 8 3 5000 15000 12345 avgt 5 76.264 ± 13.538 ms/op +FoafCliqueQueryBenchmark.cycle4 full_codegen 30 8 3 5000 15000 12345 avgt 5 60.321 ± 3.252 ms/op +FoafCliqueQueryBenchmark.cycle4 disabled 30 8 3 5000 15000 12345 avgt 5 685.196 ± 26.388 ms/op + +FoafCliqueQueryBenchmark.cycle4ValuesFilteredOrdered interpreted 30 8 3 5000 15000 12345 avgt 5 242.550 ± 8.657 ms/op +FoafCliqueQueryBenchmark.cycle4ValuesFilteredOrdered executor_codegen 30 8 3 5000 15000 12345 avgt 5 209.008 ± 12.205 ms/op +FoafCliqueQueryBenchmark.cycle4ValuesFilteredOrdered full_codegen 30 8 3 5000 15000 12345 avgt 5 159.240 ± 15.824 ms/op +FoafCliqueQueryBenchmark.cycle4ValuesFilteredOrdered disabled 30 8 3 5000 15000 12345 avgt 5 294.365 ± 5.560 ms/op + +FoafCliqueQueryBenchmark.cycle5 interpreted 30 8 3 5000 15000 12345 avgt 5 419.527 ± 33.089 ms/op +FoafCliqueQueryBenchmark.cycle5 executor_codegen 30 8 3 5000 15000 12345 avgt 5 286.429 ± 8.196 ms/op +FoafCliqueQueryBenchmark.cycle5 full_codegen 30 8 3 5000 15000 12345 avgt 5 277.975 ± 17.327 ms/op +FoafCliqueQueryBenchmark.cycle5 disabled 30 8 3 5000 15000 12345 avgt 5 4272.863 ± 439.501 ms/op + +FoafCliqueQueryBenchmark.cycle5ValuesCountMailboxHomepage interpreted 30 8 3 5000 15000 12345 avgt 5 1875.367 ± 77.732 ms/op +FoafCliqueQueryBenchmark.cycle5ValuesCountMailboxHomepage executor_codegen 30 8 3 5000 15000 12345 avgt 5 1450.226 ± 82.264 ms/op +FoafCliqueQueryBenchmark.cycle5ValuesCountMailboxHomepage full_codegen 30 8 3 5000 15000 12345 avgt 5 1129.560 ± 18.134 ms/op +FoafCliqueQueryBenchmark.cycle5ValuesCountMailboxHomepage disabled 30 8 3 5000 15000 12345 avgt 5 1808.042 ± 27.328 ms/op + +FoafCliqueQueryBenchmark.cycle5ValuesDistinctMailboxOrdered interpreted 30 8 3 5000 15000 12345 avgt 5 703.544 ± 35.325 ms/op +FoafCliqueQueryBenchmark.cycle5ValuesDistinctMailboxOrdered executor_codegen 30 8 3 5000 15000 12345 avgt 5 556.799 ± 29.910 ms/op +FoafCliqueQueryBenchmark.cycle5ValuesDistinctMailboxOrdered full_codegen 30 8 3 5000 15000 12345 avgt 5 455.503 ± 20.033 ms/op +FoafCliqueQueryBenchmark.cycle5ValuesDistinctMailboxOrdered disabled 30 8 3 5000 15000 12345 avgt 5 2165.313 ± 74.562 ms/op +``` diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryCatalog.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryCatalog.java index 7f2d255249..e2fd15ce49 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryCatalog.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryCatalog.java @@ -31,9 +31,12 @@ public enum QueryScenario { CYCLE3("cycle3", cycleQuery(3)), CYCLE4("cycle4", cycleQuery(4)), CYCLE5("cycle5", cycleQuery(5)), + CYCLE3_COUNT_CITY_INTEREST("cycle3CountCityInterest", cycle3CountCityInterestQuery()), CYCLE3_DISTINCT_CITY_ORDERED("cycle3DistinctCityOrdered", cycle3DistinctCityOrderedQuery()), CYCLE4_VALUES_FILTERED_ORDERED("cycle4ValuesFilteredOrdered", cycle4ValuesFilteredOrderedQuery()), CYCLE3_GROUPED_INTEREST("cycle3GroupedInterest", cycle3GroupedInterestQuery()), + CYCLE5_VALUES_COUNT_MAILBOX_HOMEPAGE("cycle5ValuesCountMailboxHomepage", + cycle5ValuesCountMailboxHomepageQuery()), CYCLE5_VALUES_DISTINCT_MAILBOX_ORDERED("cycle5ValuesDistinctMailboxOrdered", cycle5ValuesDistinctMailboxOrderedQuery()); @@ -99,6 +102,16 @@ private static String cycle3DistinctCityOrderedQuery() { + "ORDER BY ?cityLabel ?aLabel\n"; } + private static String cycle3CountCityInterestQuery() { + return PREFIXES + + "SELECT (COUNT(*) AS ?resultCount) WHERE {\n" + + " VALUES ?interest { exinterest:rdf exinterest:sparql exinterest:queryPlanning }\n" + + cyclePattern(3) + + " ?a foaf:interest ?interest ;\n" + + " foaf:based_near ?city .\n" + + "}\n"; + } + private static String cycle4ValuesFilteredOrderedQuery() { return PREFIXES + "SELECT DISTINCT ?a ?age ?homepage WHERE {\n" @@ -141,6 +154,17 @@ private static String cycle5ValuesDistinctMailboxOrderedQuery() { + "ORDER BY ?aLabel\n"; } + private static String cycle5ValuesCountMailboxHomepageQuery() { + return PREFIXES + + "SELECT (COUNT(*) AS ?resultCount) WHERE {\n" + + " VALUES ?city { excity:oslo excity:stockholm excity:copenhagen excity:helsinki }\n" + + cyclePattern(5) + + " ?a foaf:based_near ?city ;\n" + + " foaf:mbox ?mbox ;\n" + + " foaf:homepage ?homepage .\n" + + "}\n"; + } + private static String cyclePattern(int size) { StringBuilder builder = new StringBuilder(); for (int i = 0; i < size; i++) { diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryCatalogTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryCatalogTest.java new file mode 100644 index 0000000000..b8c0479e23 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryCatalogTest.java @@ -0,0 +1,63 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb.benchmark; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.lang.reflect.Method; + +import org.eclipse.rdf4j.sail.lmdb.benchmark.FoafCliqueQueryCatalog.QueryScenario; +import org.junit.jupiter.api.Test; + +class FoafCliqueQueryCatalogTest { + + @Test + void countOnlyScenariosShouldBeRegisteredWithoutFilters() { + assertCountOnlyScenario("cycle3CountCityInterest"); + assertCountOnlyScenario("cycle5ValuesCountMailboxHomepage"); + } + + @Test + void everyScenarioShouldHaveMatchingBenchmarkMethod() throws Exception { + for (QueryScenario scenario : FoafCliqueQueryCatalog.allScenarios()) { + Method method = FoafCliqueQueryBenchmark.class.getDeclaredMethod(scenario.benchmarkMethodName()); + assertNotNull(method, "Missing benchmark method for " + scenario.benchmarkMethodName()); + assertTrue(method.getReturnType().equals(long.class), + "Benchmark method must return long for " + scenario.benchmarkMethodName()); + assertTrue(method.getParameterCount() == 0, + "Benchmark method must be zero-arg for " + scenario.benchmarkMethodName()); + } + } + + private static void assertCountOnlyScenario(String benchmarkMethodName) { + QueryScenario scenario = FoafCliqueQueryCatalog.allScenarios() + .stream() + .filter(candidate -> candidate.benchmarkMethodName().equals(benchmarkMethodName)) + .findFirst() + .orElse(null); + assertNotNull(scenario, "Missing count-only scenario " + benchmarkMethodName); + assertTrue(scenario.query().contains("COUNT(*) AS ?resultCount"), + "Count-only scenario must use a COUNT(*) aggregate: " + benchmarkMethodName); + assertFalse(scenario.query().contains("FILTER ("), + "Count-only scenario must not use FILTER: " + benchmarkMethodName); + assertFalse(scenario.query().contains("ORDER BY"), + "Count-only scenario must not use ORDER BY: " + benchmarkMethodName); + assertFalse(scenario.query().contains("DISTINCT"), + "Count-only scenario must not use DISTINCT: " + benchmarkMethodName); + assertFalse(scenario.query().contains("GROUP BY"), + "Count-only scenario must not use GROUP BY: " + benchmarkMethodName); + assertFalse(scenario.query().contains("HAVING"), + "Count-only scenario must not use HAVING: " + benchmarkMethodName); + } +} From 584a3d1e03346a34942e19a0779568954c7a9643 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Mon, 6 Apr 2026 20:16:21 +0200 Subject: [PATCH 27/32] even faster --- .../benchmark/FoafCliqueQueryBenchmark.java | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java index 572d339e60..b5e0e914d0 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmark.java @@ -19,6 +19,7 @@ import org.apache.commons.io.FileUtils; import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.query.TupleQueryResult; import org.eclipse.rdf4j.repository.sail.SailRepository; import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; import org.eclipse.rdf4j.sail.lmdb.LmdbBenchmarkStore; @@ -71,7 +72,8 @@ public class FoafCliqueQueryBenchmark { @Param({ "12345" }) public long seed; - @Param({ "interpreted", "executor_codegen", "full_codegen", LFTJ_DISABLED }) +// @Param({ "interpreted", "executor_codegen", "full_codegen", LFTJ_DISABLED }) + @Param({ "full_codegen" }) public String benchmarkMode; private File dataDir; @@ -80,7 +82,7 @@ public class FoafCliqueQueryBenchmark { public static void main(String[] args) throws RunnerException { new Runner(new OptionsBuilder() .include("FoafCliqueQueryBenchmark") - .forks(1) + .forks(0) .build()).run(); } @@ -192,19 +194,23 @@ public long executeScenario(QueryScenario scenario) { private long executeCount(String query) { try (SailRepositoryConnection connection = repository.getConnection()) { - return connection.prepareTupleQuery(query).evaluate().stream().count(); + try (TupleQueryResult evaluate = connection.prepareTupleQuery(query).evaluate()) { + return evaluate.stream().count(); + } } } private long executeAggregateCount(String query) { try (SailRepositoryConnection connection = repository.getConnection()) { - return connection.prepareTupleQuery(query) - .evaluate() - .stream() - .findFirst() - .map(bindingSet -> (Literal) bindingSet.getValue(RESULT_COUNT_BINDING)) - .map(Literal::longValue) - .orElseThrow(() -> new IllegalStateException("Missing aggregate count result")); + try (TupleQueryResult evaluate = connection.prepareTupleQuery(query) + .evaluate()) { + return evaluate + .stream() + .findFirst() + .map(bindingSet -> (Literal) bindingSet.getValue(RESULT_COUNT_BINDING)) + .map(Literal::longValue) + .orElseThrow(() -> new IllegalStateException("Missing aggregate count result")); + } } } From c63b138e6ec50ca198bc8a79a4fd6e972a33b5ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Mon, 6 Apr 2026 20:35:26 +0200 Subject: [PATCH 28/32] even faster --- .../rdf4j/sail/lmdb/LmdbCachedFrontier.java | 47 ++++++++++++++++--- .../rdf4j/sail/lmdb/LmdbCachedTrieCursor.java | 2 +- .../lmdb/LmdbLftjFullCodegenCompiler.java | 43 +++++++++++++++-- .../FoafCliqueQueryBenchmarkResults.md | 13 +++++ 4 files changed, 95 insertions(+), 10 deletions(-) diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedFrontier.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedFrontier.java index 229a124af1..6ee5c8015e 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedFrontier.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedFrontier.java @@ -47,17 +47,52 @@ long countAt(int index) { } int seek(long target) { - int index = Arrays.binarySearch(values, target); - return index >= 0 ? index : -index - 1; + return lowerBound(values, target, -1); + } + + int seek(long target, int hintIndex) { + return lowerBound(values, target, hintIndex); } long countFor(long value) { + int index = lowerBound(values, value, -1); + if (index >= values.length || values[index] != value) { + return 0L; + } if (counts == null) { - int index = Arrays.binarySearch(values, value); - return index >= 0 ? 1L : 0L; + return 1L; + } + return counts[index]; + } + + static int lowerBound(long[] values, long target, int hintIndex) { + if (hintIndex < 0 || hintIndex >= values.length) { + int index = Arrays.binarySearch(values, target); + return index >= 0 ? index : -index - 1; } - int index = Arrays.binarySearch(values, value); - return index >= 0 ? counts[index] : 0L; + long hintValue = values[hintIndex]; + if (hintValue == target) { + return hintIndex; + } + + if (hintValue < target) { + int nextIndex = hintIndex + 1; + if (nextIndex >= values.length) { + return values.length; + } + if (values[nextIndex] >= target) { + return nextIndex; + } + int index = Arrays.binarySearch(values, nextIndex + 1, values.length, target); + return index >= 0 ? index : -index - 1; + } + + int previousIndex = hintIndex - 1; + if (previousIndex >= 0 && values[previousIndex] < target) { + return hintIndex; + } + int index = Arrays.binarySearch(values, 0, hintIndex, target); + return index >= 0 ? index : -index - 1; } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedTrieCursor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedTrieCursor.java index 05c3d0ef47..907f8ff286 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedTrieCursor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbCachedTrieCursor.java @@ -48,7 +48,7 @@ public boolean seek(long target) { } Frame frame = currentFrame(); - int position = frame.frontier.seek(target); + int position = frame.frontier.seek(target, frame.position); if (position >= frame.frontier.size()) { frame.position = -1; return false; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java index f2fda00e84..123330da0c 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFullCodegenCompiler.java @@ -635,6 +635,11 @@ private void appendRelationFrontierMethods(StringBuilder source, int patternOrdi source.append(" return seekSlot").append(suffix).append("(0L);\n"); source.append(" }\n\n"); source.append(" private boolean seekSlot").append(suffix).append("(long target) {\n"); + source.append(" long[] previousFrontierValues") + .append(suffix) + .append(" = frontierValues") + .append(suffix) + .append(";\n"); if (component == patternShape.derivedSourceComponent()) { source.append(" frontierValues") .append(suffix) @@ -653,7 +658,15 @@ private void appendRelationFrontierMethods(StringBuilder source, int patternOrdi .append(suffix) .append(" = seekFrontier(frontierValues") .append(suffix) - .append(", target);\n"); + .append(", target, frontierAvailable") + .append(suffix) + .append(" && frontierValues") + .append(suffix) + .append(" == previousFrontierValues") + .append(suffix) + .append(" ? frontierIndex") + .append(suffix) + .append(" : -1);\n"); source.append(" if (frontierIndex") .append(suffix) .append(" >= frontierValues") @@ -1163,8 +1176,32 @@ private void appendCloseCursorResources(StringBuilder source, int patternOrdinal private void appendHelpers(StringBuilder source) { source.append(" private static final long[] EMPTY_FRONTIER_VALUES = new long[0];\n\n"); if (relationGroups.length > 0) { - source.append(" private static int seekFrontier(long[] values, long target) {\n"); - source.append(" int index = java.util.Arrays.binarySearch(values, target);\n"); + source.append(" private static int seekFrontier(long[] values, long target, int hintIndex) {\n"); + source.append(" if (hintIndex < 0 || hintIndex >= values.length) {\n"); + source.append(" int index = java.util.Arrays.binarySearch(values, target);\n"); + source.append(" return index >= 0 ? index : -index - 1;\n"); + source.append(" }\n"); + source.append(" long hintValue = values[hintIndex];\n"); + source.append(" if (hintValue == target) {\n"); + source.append(" return hintIndex;\n"); + source.append(" }\n"); + source.append(" if (hintValue < target) {\n"); + source.append(" int nextIndex = hintIndex + 1;\n"); + source.append(" if (nextIndex >= values.length) {\n"); + source.append(" return values.length;\n"); + source.append(" }\n"); + source.append(" if (values[nextIndex] >= target) {\n"); + source.append(" return nextIndex;\n"); + source.append(" }\n"); + source.append( + " int index = java.util.Arrays.binarySearch(values, nextIndex + 1, values.length, target);\n"); + source.append(" return index >= 0 ? index : -index - 1;\n"); + source.append(" }\n"); + source.append(" int previousIndex = hintIndex - 1;\n"); + source.append(" if (previousIndex >= 0 && values[previousIndex] < target) {\n"); + source.append(" return hintIndex;\n"); + source.append(" }\n"); + source.append(" int index = java.util.Arrays.binarySearch(values, 0, hintIndex, target);\n"); source.append(" return index >= 0 ? index : -index - 1;\n"); source.append(" }\n\n"); source.append(" private static int mixFrontierCacheKey(long value) {\n"); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md index 9df93e6c2e..e483850f61 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/FoafCliqueQueryBenchmarkResults.md @@ -103,3 +103,16 @@ FoafCliqueQueryBenchmark.cycle5ValuesDistinctMailboxOrdered executor_codegen FoafCliqueQueryBenchmark.cycle5ValuesDistinctMailboxOrdered full_codegen 30 8 3 5000 15000 12345 avgt 5 455.503 ± 20.033 ms/op FoafCliqueQueryBenchmark.cycle5ValuesDistinctMailboxOrdered disabled 30 8 3 5000 15000 12345 avgt 5 2165.313 ± 74.562 ms/op ``` + +``` +Benchmark (benchmarkMode) (cliquePercentage) (maxCliqueSize) (minCliqueSize) (peopleCount) (randomKnowsEdges) (seed) Mode Cnt Score Error Units +FoafCliqueQueryBenchmark.cycle3 full_codegen 30 8 3 5000 15000 12345 avgt 5 12.145 ± 0.697 ms/op +FoafCliqueQueryBenchmark.cycle3CountCityInterest full_codegen 30 8 3 5000 15000 12345 avgt 5 36.130 ± 2.730 ms/op +FoafCliqueQueryBenchmark.cycle3DistinctCityOrdered full_codegen 30 8 3 5000 15000 12345 avgt 5 90.575 ± 8.376 ms/op +FoafCliqueQueryBenchmark.cycle3GroupedInterest full_codegen 30 8 3 5000 15000 12345 avgt 5 33.409 ± 2.409 ms/op +FoafCliqueQueryBenchmark.cycle4 full_codegen 30 8 3 5000 15000 12345 avgt 5 54.210 ± 3.619 ms/op +FoafCliqueQueryBenchmark.cycle4ValuesFilteredOrdered full_codegen 30 8 3 5000 15000 12345 avgt 5 145.352 ± 11.220 ms/op +FoafCliqueQueryBenchmark.cycle5 full_codegen 30 8 3 5000 15000 12345 avgt 5 255.649 ± 11.363 ms/op +FoafCliqueQueryBenchmark.cycle5ValuesCountMailboxHomepage full_codegen 30 8 3 5000 15000 12345 avgt 5 1088.888 ± 24.557 ms/op +FoafCliqueQueryBenchmark.cycle5ValuesDistinctMailboxOrdered full_codegen 30 8 3 5000 15000 12345 avgt 5 437.191 ± 53.469 ms/op +``` From 47cb10b0aaefc15adce7203e83113e384ea152ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Mon, 6 Apr 2026 23:29:07 +0200 Subject: [PATCH 29/32] fix bugs --- .../rdf4j/sail/lmdb/LmdbLftjOptimizer.java | 110 ++++++++++++++++-- .../rdf4j/sail/lmdb/LmdbLftjCodegenTest.java | 46 +++++++- .../lmdb/LmdbLftjFusionCorrectnessTest.java | 15 +++ .../sail/lmdb/LmdbLftjOptimizerTest.java | 34 ++++++ 4 files changed, 194 insertions(+), 11 deletions(-) diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java index 0a85bd3536..ce199eb4d5 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java @@ -13,6 +13,8 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -215,8 +217,11 @@ private PlanningTarget tryExtractPlanningTarget(Join node, List opera return null; } + boolean preserveOuterOperators = filterPartition.filterRewrites() + .stream() + .anyMatch(filterRewrite -> filterRewrite.residualCondition() != null); List outputBindings = collectOutputBindings(projection, extension, visibleVariables, - filterPartition.requiredVariables()); + filterPartition.requiredVariables(), preserveOuterOperators); if (outputBindings == null) { return null; } @@ -312,7 +317,10 @@ private boolean isNamedVariable(Var var) { } private List collectOutputBindings(Projection projection, Extension extension, - List visibleVariables, List requiredVariables) { + List visibleVariables, List requiredVariables, boolean preserveOuterOperators) { + if (preserveOuterOperators) { + return collectVisibleInputBindings(projection, extension, visibleVariables, requiredVariables); + } if (projection == null) { return extension == null ? List.of() : null; } @@ -322,20 +330,53 @@ private List collectOutputBindings(Projection projec } Set visible = Set.copyOf(visibleVariables); List outputBindings = new ArrayList<>(); - Set coveredSources = new LinkedHashSet<>(); + Map outputSourcesByName = new LinkedHashMap<>(); for (ProjectionElem projectionElem : projection.getProjectionElemList().getElements()) { String sourceVariable = resolveProjectedSource(projectionElem, extensionBindings, visible); if (sourceVariable == null) { return null; } - outputBindings.add(new LmdbLftjPlan.OutputBinding( - projectionElem.getProjectionAlias().orElse(projectionElem.getName()), - sourceVariable)); - coveredSources.add(sourceVariable); + String outputName = projectionElem.getProjectionAlias().orElse(projectionElem.getName()); + String previousSource = outputSourcesByName.putIfAbsent(outputName, sourceVariable); + if (previousSource != null && !previousSource.equals(sourceVariable)) { + return null; + } + outputBindings.add(new LmdbLftjPlan.OutputBinding(outputName, sourceVariable)); } for (String requiredVariable : requiredVariables) { - if (visible.contains(requiredVariable) && coveredSources.add(requiredVariable)) { + if (!visible.contains(requiredVariable)) { + continue; + } + String previousSource = outputSourcesByName.get(requiredVariable); + if (previousSource == null) { outputBindings.add(new LmdbLftjPlan.OutputBinding(requiredVariable, requiredVariable)); + outputSourcesByName.put(requiredVariable, requiredVariable); + } else if (!previousSource.equals(requiredVariable)) { + return null; + } + } + return outputBindings; + } + + private List collectVisibleInputBindings(Projection projection, Extension extension, + List visibleVariables, List requiredVariables) { + LinkedHashSet neededInputs = new LinkedHashSet<>(requiredVariables); + if (projection != null) { + for (ProjectionElem projectionElem : projection.getProjectionElemList().getElements()) { + neededInputs.add(projectionElem.getName()); + } + } else if (extension != null) { + neededInputs.addAll(extension.getBindingNames()); + } else { + neededInputs.addAll(visibleVariables); + } + if (extension != null) { + neededInputs = resolveExtensionInputs(neededInputs, extension, Set.copyOf(visibleVariables)); + } + List outputBindings = new ArrayList<>(); + for (String visibleVariable : visibleVariables) { + if (neededInputs.contains(visibleVariable)) { + outputBindings.add(new LmdbLftjPlan.OutputBinding(visibleVariable, visibleVariable)); } } return outputBindings; @@ -436,6 +477,59 @@ public void meet(Var node) { }); } + private LinkedHashSet resolveExtensionInputs(Set requiredNames, Extension extension, + Set visibleVariables) { + Map expressionsByName = new LinkedHashMap<>(); + for (ExtensionElem element : extension.getElements()) { + expressionsByName.put(element.getName(), element.getExpr()); + } + LinkedHashSet resolvedInputs = new LinkedHashSet<>(); + for (String requiredName : requiredNames) { + collectExtensionInputs(requiredName, visibleVariables, expressionsByName, resolvedInputs, new HashSet<>()); + } + return resolvedInputs; + } + + private void collectExtensionInputs(String variableName, Set visibleVariables, + Map expressionsByName, Set resolvedInputs, Set visiting) { + if (visibleVariables.contains(variableName)) { + resolvedInputs.add(variableName); + return; + } + ValueExpr expression = expressionsByName.get(variableName); + if (expression == null || !visiting.add(variableName)) { + return; + } + collectExtensionInputs(expression, visibleVariables, expressionsByName, resolvedInputs, visiting); + visiting.remove(variableName); + } + + private void collectExtensionInputs(ValueExpr expression, Set visibleVariables, + Map expressionsByName, Set resolvedInputs, Set visiting) { + expression.visit(new AbstractQueryModelVisitor() { + @Override + public void meet(Var node) { + if (!isNamedVariable(node)) { + return; + } + String variableName = node.getName(); + if (visibleVariables.contains(variableName)) { + resolvedInputs.add(variableName); + return; + } + ValueExpr nestedExpression = expressionsByName.get(variableName); + if (nestedExpression != null && visiting.add(variableName)) { + try { + collectExtensionInputs(nestedExpression, visibleVariables, expressionsByName, resolvedInputs, + visiting); + } finally { + visiting.remove(variableName); + } + } + } + }); + } + private Map collectExtensionBindings(Extension extension, List visibleVariables) { if (extension == null) { return Map.of(); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java index 902a76e81e..74a75f7410 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java @@ -540,6 +540,31 @@ void fullCodegenShouldFuseSupportedFilterAndProjectionIntoLftjPlan() throws Exce } } + @Test + void fullCodegenShouldMatchFallbackForAliasedProjectionWithResidualFilter() throws Exception { + String query = foafCycleAliasQueryWithResidualFilter(); + + try (FullCodegenFixture fallbackFixture = new FullCodegenFixture(4, false, false, null); + FullCodegenFixture fullFixture = new FullCodegenFixture(4, true, true, + LmdbLftjFullCodegenCompiler.INSTANCE)) { + List fallbackRows = executeQueryRows(fallbackFixture.repository, query); + try (SailRepositoryConnection connection = fullFixture.repository.getConnection()) { + assertThat(connection.prepareTupleQuery(query) + .explain(Explanation.Level.Optimized) + .toString()) + .contains("LmdbLftjTupleExpr") + .contains("Filter") + .contains("Projection"); + } + List fullRows = executeQueryRows(fullFixture.repository, query); + + assertThat(fallbackRows).hasSize(6); + assertThat(fullRows) + .withFailMessage("fallback=%s full=%s", fallbackRows, fullRows) + .containsExactlyElementsOf(fallbackRows); + } + } + @Test void fullCodegenFoafBenchmarkSequentialQueriesShouldKeepUsingGeneratedFactories() throws Exception { FoafCliqueQueryBenchmark benchmark = configuredFoafBenchmark(); @@ -976,6 +1001,16 @@ private String foafCycleAliasQuery(int size) { return builder.toString(); } + private String foafCycleAliasQueryWithResidualFilter() { + return "PREFIX foaf: \n" + + "SELECT (?a AS ?x) (?b AS ?y) (?c AS ?z) WHERE {\n" + + " ?a foaf:knows ?b .\n" + + " ?b foaf:knows ?c .\n" + + " ?c foaf:knows ?a .\n" + + " FILTER (?a != ?b && ?a != ?c && ?b != ?c && STRSTARTS(STR(?a), \"urn:person:1\"))\n" + + "}\n"; + } + private List executeQueryRows(SailRepository repository, String query) { try (SailRepositoryConnection connection = repository.getConnection()) { List rows = new ArrayList<>(); @@ -1235,14 +1270,19 @@ private static final class FullCodegenFixture implements AutoCloseable { private final File dataDir; private FullCodegenFixture() throws IOException { - this(4, LmdbLftjFullCodegenCompiler.INSTANCE); + this(4, true, true, LmdbLftjFullCodegenCompiler.INSTANCE); } private FullCodegenFixture(int personCount, LmdbLftjCodegenCompiler compiler) throws IOException { + this(personCount, true, true, compiler); + } + + private FullCodegenFixture(int personCount, boolean lftjEnabled, boolean lftjCodegenEnabled, + LmdbLftjCodegenCompiler compiler) throws IOException { dataDir = Files.createTempDirectory("rdf4j-lmdb-full-codegen-test").toFile(); LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); - config.setLftjEnabled(true); - config.setLftjCodegenEnabled(true); + config.setLftjEnabled(lftjEnabled); + config.setLftjCodegenEnabled(lftjCodegenEnabled); config.setForceSync(false); config.setValueDBSize(64L * 1024 * 1024); config.setTripleDBSize(config.getValueDBSize()); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFusionCorrectnessTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFusionCorrectnessTest.java index d4ce84df94..e98a14fab0 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFusionCorrectnessTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFusionCorrectnessTest.java @@ -63,6 +63,11 @@ void chainedAliasExtensionRowsShouldMatchRegularEvaluation(@TempDir java.nio.fil assertRowsMatch(tempDir, chainedAliasExtensionQuery()); } + @Test + void aliasedProjectionWithResidualFilterRowsShouldMatchRegularEvaluation(@TempDir java.nio.file.Path tempDir) { + assertRowsMatch(tempDir, aliasedProjectionWithResidualFilterQuery()); + } + @Test void aliasedCycleRowsShouldMatchRegularEvaluationWithBoundSourceVariables(@TempDir java.nio.file.Path tempDir) { assertEquals(List.of( @@ -257,4 +262,14 @@ private String chainedAliasExtensionQuery() { + " BIND(?x AS ?y)\n" + "}\n"; } + + private String aliasedProjectionWithResidualFilterQuery() { + return "PREFIX foaf: \n" + + "SELECT (?a AS ?x) (?b AS ?y) (?c AS ?z) WHERE {\n" + + " ?a foaf:knows ?b .\n" + + " ?b foaf:knows ?c .\n" + + " ?c foaf:knows ?a .\n" + + " FILTER (?a != ?b && ?a != ?c && ?b != ?c && STRSTARTS(STR(?a), \"urn:person:1\"))\n" + + "}\n"; + } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java index 9e6446a006..82da3c88ab 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java @@ -149,6 +149,30 @@ void optimizeShouldFuseParsedAliasProjectionQuery() throws Exception { new LmdbLftjPlan.InequalityConstraint("b", "c")), lftj.plan().inequalityConstraints()); } + @Test + void optimizeShouldExposeSourceBindingsWhenResidualFilterKeepsProjectionOutside() throws Exception { + TestQueryAccess queryAccess = new TestQueryAccess(); + LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer( + new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess)); + + TupleExpr tupleExpr = parsedQueryRoot(aliasProjectionWithResidualFilterQuery()); + + optimizer.optimize(tupleExpr, (Dataset) null, EmptyBindingSet.getInstance()); + + LmdbLftjTupleExpr lftj = assertInstanceOf(LmdbLftjTupleExpr.class, + findNode(tupleExpr, LmdbLftjTupleExpr.class)); + assertEquals(List.of( + new LmdbLftjPlan.OutputBinding("a", "a"), + new LmdbLftjPlan.OutputBinding("b", "b"), + new LmdbLftjPlan.OutputBinding("c", "c")), lftj.plan().outputBindings()); + assertEquals(List.of( + new LmdbLftjPlan.InequalityConstraint("a", "b"), + new LmdbLftjPlan.InequalityConstraint("a", "c"), + new LmdbLftjPlan.InequalityConstraint("b", "c")), lftj.plan().inequalityConstraints()); + assertInstanceOf(Filter.class, findNode(tupleExpr, Filter.class)); + assertInstanceOf(Projection.class, findNode(tupleExpr, Projection.class)); + } + @Test void optimizerPipelineShouldFuseParsedAliasProjectionQuery() throws Exception { TestQueryAccess queryAccess = new TestQueryAccess(); @@ -408,6 +432,16 @@ private String aliasProjectionQuery() { + "}\n"; } + private String aliasProjectionWithResidualFilterQuery() { + return "PREFIX foaf: \n" + + "SELECT (?a AS ?x) (?b AS ?y) (?c AS ?z) WHERE {\n" + + " ?a foaf:knows ?b .\n" + + " ?b foaf:knows ?c .\n" + + " ?c foaf:knows ?a .\n" + + " FILTER (?a != ?b && ?a != ?c && ?b != ?c && STRSTARTS(STR(?a), \"urn:person:1\"))\n" + + "}\n"; + } + private String reorderedDuplicateAliasProjectionQuery() { return "PREFIX foaf: \n" + "SELECT (?c AS ?z) (?a AS ?x) (?a AS ?x2) WHERE {\n" From 13db1a1d2477d33a6e255a02a19e4353e02a1cce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Tue, 7 Apr 2026 07:27:18 +0200 Subject: [PATCH 30/32] fix bugs --- .../rdf4j/sail/lmdb/LmdbLftjExecutor.java | 15 +- .../rdf4j/sail/lmdb/LmdbLftjOptimizer.java | 2 +- .../rdf4j/sail/lmdb/LmdbLftjExecutorTest.java | 69 ++++++ .../lmdb/LmdbLftjFusionCorrectnessTest.java | 16 ++ .../lmdb/LmdbLftjSnapshotIsolationTest.java | 223 ++++++++++++++++++ 5 files changed, 311 insertions(+), 14 deletions(-) create mode 100644 core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSnapshotIsolationTest.java diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java index 7321a7081d..a7af240b85 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutor.java @@ -73,10 +73,7 @@ private LmdbCompiledLftjFactory compiledFactory(LmdbQueryAccess queryAccess, Lmd String cacheKey = compiler.cacheKey(plan, shape, queryAccess.includeInferred()); LmdbLftjCodegenCache.CacheEntry cached = queryAccess.cachedCompiledPlan(cacheKey); if (cached != null) { - if (cached.compiled()) { - return cached.factory(); - } - throw codegenFailure(cacheKey, cached.failureMessage(), null); + return cached.compiled() ? cached.factory() : null; } try { @@ -85,16 +82,8 @@ private LmdbCompiledLftjFactory compiledFactory(LmdbQueryAccess queryAccess, Lmd return factory; } catch (RuntimeException e) { queryAccess.cacheCompiledPlanFailure(cacheKey, e.getMessage()); - throw codegenFailure(cacheKey, e.getMessage(), e); - } - } - - private IllegalStateException codegenFailure(String cacheKey, String message, RuntimeException cause) { - String detail = message == null || message.isBlank() ? "" : message; - if (cause == null) { - return new IllegalStateException("LMDB LFTJ codegen failed for " + cacheKey + ": " + detail); + return null; } - return new IllegalStateException("LMDB LFTJ codegen failed for " + cacheKey + ": " + detail, cause); } private final class LmdbLftjIteration extends LookAheadIteration { diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java index ce199eb4d5..c3e0a085b8 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java @@ -217,7 +217,7 @@ private PlanningTarget tryExtractPlanningTarget(Join node, List opera return null; } - boolean preserveOuterOperators = filterPartition.filterRewrites() + boolean preserveOuterOperators = !rootReplaceable || filterPartition.filterRewrites() .stream() .anyMatch(filterRewrite -> filterRewrite.residualCondition() != null); List outputBindings = collectOutputBindings(projection, extension, visibleVariables, diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java index 9dff0af689..57d336c404 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjExecutorTest.java @@ -166,4 +166,73 @@ void evaluateShouldAvoidRecordScansForHiddenContextMultiplicity() { assertEquals(0, queryAccess.recordScanCalls, "hidden context multiplicity should come from cached frontier counts, not RecordIterator rescans"); } + + @Test + void evaluateShouldFallbackToInterpretedPathWhenCodegenCompilationFails() { + FailingCompiler compiler = new FailingCompiler(); + FailingCachingQueryAccess queryAccess = new FailingCachingQueryAccess(compiler); + QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess); + + long firstPassCount = countRows(evaluationStep); + long secondPassCount = countRows(evaluationStep); + + assertTrue(firstPassCount > 0, "the interpreted executor should still enumerate results after codegen fails"); + assertEquals(firstPassCount, secondPassCount, + "cached codegen failures should keep using the interpreted executor on later evaluations"); + assertEquals(1, compiler.compileCalls, + "codegen should fail once and then stay on the interpreted path for the same execution key"); + } + + private long countRows(QueryEvaluationStep evaluationStep) { + long count = 0; + try (CloseableIteration iteration = evaluationStep.evaluate(EmptyBindingSet.getInstance())) { + while (iteration.hasNext()) { + iteration.next(); + count++; + } + } + return count; + } + + private static final class FailingCachingQueryAccess extends LmdbLftjSyntheticScenario.TestQueryAccess { + + private final FailingCompiler compiler; + private LmdbLftjCodegenCache.CacheEntry cachedEntry; + + private FailingCachingQueryAccess(FailingCompiler compiler) { + this.compiler = compiler; + } + + @Override + public LmdbLftjCodegenCache.CacheEntry cachedCompiledPlan(String executionKey) { + return cachedEntry; + } + + @Override + public void cacheCompiledPlanSuccess(String executionKey, LmdbCompiledLftjFactory factory) { + cachedEntry = LmdbLftjCodegenCache.CacheEntry.success(factory); + } + + @Override + public void cacheCompiledPlanFailure(String executionKey, String message) { + cachedEntry = LmdbLftjCodegenCache.CacheEntry.failure(message); + } + + @Override + public LmdbLftjCodegenCompiler codegenCompiler() { + return compiler; + } + } + + private static final class FailingCompiler extends LmdbLftjCodegenCompiler { + + private int compileCalls; + + @Override + LmdbCompiledLftjFactory compile(LmdbLftjPlan plan, LmdbLftjExecutionShape shape, boolean includeInferred, + LmdbQueryAccess queryAccess) { + compileCalls++; + throw new IllegalArgumentException("synthetic compile failure"); + } + } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFusionCorrectnessTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFusionCorrectnessTest.java index e98a14fab0..333e3cd4c0 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFusionCorrectnessTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjFusionCorrectnessTest.java @@ -68,6 +68,11 @@ void aliasedProjectionWithResidualFilterRowsShouldMatchRegularEvaluation(@TempDi assertRowsMatch(tempDir, aliasedProjectionWithResidualFilterQuery()); } + @Test + void orderedAliasedCycleRowsShouldMatchRegularEvaluation(@TempDir java.nio.file.Path tempDir) { + assertRowsMatch(tempDir, orderedAliasedCycleQuery()); + } + @Test void aliasedCycleRowsShouldMatchRegularEvaluationWithBoundSourceVariables(@TempDir java.nio.file.Path tempDir) { assertEquals(List.of( @@ -272,4 +277,15 @@ private String aliasedProjectionWithResidualFilterQuery() { + " FILTER (?a != ?b && ?a != ?c && ?b != ?c && STRSTARTS(STR(?a), \"urn:person:1\"))\n" + "}\n"; } + + private String orderedAliasedCycleQuery() { + return "PREFIX foaf: \n" + + "SELECT DISTINCT (?a AS ?x) (?b AS ?y) (?c AS ?z) WHERE {\n" + + " ?a foaf:knows ?b .\n" + + " ?b foaf:knows ?c .\n" + + " ?c foaf:knows ?a .\n" + + " FILTER (?a != ?b && ?a != ?c && ?b != ?c)\n" + + "}\n" + + "ORDER BY ?x ?y ?z\n"; + } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSnapshotIsolationTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSnapshotIsolationTest.java new file mode 100644 index 0000000000..a4bfdb6b1e --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjSnapshotIsolationTest.java @@ -0,0 +1,223 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Codex +package org.eclipse.rdf4j.sail.lmdb; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.File; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.eclipse.rdf4j.common.transaction.IsolationLevels; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.vocabulary.FOAF; +import org.eclipse.rdf4j.query.TupleQueryResult; +import org.eclipse.rdf4j.repository.Repository; +import org.eclipse.rdf4j.repository.RepositoryConnection; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.sail.NotifyingSailConnection; +import org.eclipse.rdf4j.sail.SailException; +import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +class LmdbLftjSnapshotIsolationTest { + + private static final SimpleValueFactory VF = SimpleValueFactory.getInstance(); + + @Test + void queryShouldKeepDatasetSnapshotWhenLftjStartsReading(@TempDir File dataDir) { + LmdbStoreConfig config = new LmdbStoreConfig("spoc,sopc,psoc,posc,ospc,opsc"); + config.setLftjEnabled(true); + config.setLftjCodegenEnabled(false); + config.setForceSync(false); + + SnapshotHookStore store = new SnapshotHookStore(dataDir, config); + Repository repository = new SailRepository(store); + IRI a = VF.createIRI("urn:person:a"); + IRI b = VF.createIRI("urn:person:b"); + IRI c = VF.createIRI("urn:person:c"); + + repository.init(); + try { + try (RepositoryConnection writer = repository.getConnection()) { + writer.add(a, FOAF.KNOWS, b); + writer.add(b, FOAF.KNOWS, c); + } + + AtomicBoolean injectedEdge = new AtomicBoolean(); + store.setBeforeAcquireReadTxn(() -> { + if (injectedEdge.compareAndSet(false, true)) { + try (RepositoryConnection writer = repository.getConnection()) { + writer.add(c, FOAF.KNOWS, a); + } + } + }); + + long rowCount = 0; + try (RepositoryConnection reader = repository.getConnection()) { + reader.begin(IsolationLevels.SNAPSHOT); + try (TupleQueryResult result = reader.prepareTupleQuery(cycleQuery()).evaluate()) { + while (result.hasNext()) { + result.next(); + rowCount++; + } + } + reader.commit(); + } + + assertTrue(injectedEdge.get(), "sanity check: the third cycle edge must be committed during evaluation"); + assertEquals(0, rowCount, + "LFTJ should stay on the pinned SailDataset snapshot instead of reading a newer LMDB transaction"); + } finally { + repository.shutDown(); + } + } + + private String cycleQuery() { + return "PREFIX foaf: \n" + + "SELECT * WHERE {\n" + + " ?a foaf:knows ?b .\n" + + " ?b foaf:knows ?c .\n" + + " ?c foaf:knows ?a .\n" + + " FILTER (?a != ?b && ?a != ?c && ?b != ?c)\n" + + "}\n"; + } + + private static final class SnapshotHookStore extends LmdbStore { + + private volatile Runnable beforeAcquireReadTxn = () -> { + }; + + private SnapshotHookStore(File dataDir, LmdbStoreConfig config) { + super(dataDir, config); + } + + private void setBeforeAcquireReadTxn(Runnable beforeAcquireReadTxn) { + this.beforeAcquireReadTxn = beforeAcquireReadTxn; + } + + @Override + protected NotifyingSailConnection getConnectionInternal() throws SailException { + return new SnapshotHookConnection(this); + } + } + + private static final class SnapshotHookConnection extends LmdbStoreConnection { + + private final SnapshotHookStore store; + + private SnapshotHookConnection(SnapshotHookStore store) { + super(store); + this.store = store; + } + + @Override + protected LmdbQueryAccess createQueryAccess(boolean includeInferred) { + LmdbQueryAccess delegate = super.createQueryAccess(includeInferred); + AtomicBoolean fired = new AtomicBoolean(); + return new LmdbQueryAccess() { + @Override + public TripleStore tripleStore() { + return delegate.tripleStore(); + } + + @Override + public TxnManager.Txn acquireReadTxn() { + if (fired.compareAndSet(false, true)) { + store.beforeAcquireReadTxn.run(); + } + return delegate.acquireReadTxn(); + } + + @Override + public void releaseReadTxn(TxnManager.Txn txn) { + delegate.releaseReadTxn(txn); + } + + @Override + public long resolveId(Value value) { + return delegate.resolveId(value); + } + + @Override + public Value resolveValue(long id) { + return delegate.resolveValue(id); + } + + @Override + public Value lazyValue(long id) { + return delegate.lazyValue(id); + } + + @Override + public boolean includeInferred() { + return delegate.includeInferred(); + } + + @Override + public Set configuredIndexes() { + return delegate.configuredIndexes(); + } + + @Override + public RecordIterator openScan(TxnManager.Txn txn, String indexName, long subj, long pred, long obj, + long context, boolean explicit) { + return delegate.openScan(txn, indexName, subj, pred, obj, context, explicit); + } + + @Override + public LmdbTrieKeyCursor openTrieCursor(TxnManager.Txn txn, String indexName, boolean explicit) { + return delegate.openTrieCursor(txn, indexName, explicit); + } + + @Override + public LmdbLftjPlanner.PlanningResult cachedPlanningResult(String cacheKey) { + return delegate.cachedPlanningResult(cacheKey); + } + + @Override + public void cachePlanningResult(String cacheKey, LmdbLftjPlanner.PlanningResult result) { + delegate.cachePlanningResult(cacheKey, result); + } + + @Override + public boolean lftjCodegenEnabled() { + return delegate.lftjCodegenEnabled(); + } + + @Override + public LmdbLftjCodegenCache.CacheEntry cachedCompiledPlan(String executionKey) { + return delegate.cachedCompiledPlan(executionKey); + } + + @Override + public void cacheCompiledPlanSuccess(String executionKey, LmdbCompiledLftjFactory factory) { + delegate.cacheCompiledPlanSuccess(executionKey, factory); + } + + @Override + public void cacheCompiledPlanFailure(String executionKey, String message) { + delegate.cacheCompiledPlanFailure(executionKey, message); + } + + @Override + public LmdbLftjCodegenCompiler codegenCompiler() { + return delegate.codegenCompiler(); + } + }; + } + } +} From 79e6bab9e8149cb409179078ee5648ebdeea64ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Tue, 7 Apr 2026 09:35:20 +0200 Subject: [PATCH 31/32] fix bugs --- .../rdf4j/sail/lmdb/LmdbLftjOptimizer.java | 40 ++++++-- .../sail/lmdb/LmdbLftjPreparedPlanCache.java | 13 ++- .../rdf4j/sail/lmdb/LmdbLftjCodegenTest.java | 35 ------- .../sail/lmdb/LmdbLftjOptimizerTest.java | 96 +++++++++++++++++++ 4 files changed, 137 insertions(+), 47 deletions(-) diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java index c3e0a085b8..540c1d5c3a 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizer.java @@ -31,6 +31,7 @@ import org.eclipse.rdf4j.query.algebra.Filter; import org.eclipse.rdf4j.query.algebra.Join; import org.eclipse.rdf4j.query.algebra.Order; +import org.eclipse.rdf4j.query.algebra.OrderElem; import org.eclipse.rdf4j.query.algebra.Projection; import org.eclipse.rdf4j.query.algebra.ProjectionElem; import org.eclipse.rdf4j.query.algebra.QueryModelNode; @@ -104,7 +105,7 @@ private boolean transform(Join node, LmdbQueryAccess queryAccess) { LmdbLftjPlanningHints planningHints = planningTarget != null ? planningTarget.planningHints() : collectPlanningHints(operands, List.of()); String cacheKey = LmdbLftjPreparedPlanCache.normalizedKey(patterns, configuredIndexes, outputBindings, - inequalityConstraints); + inequalityConstraints, planningHints); LmdbLftjPlanner.PlanningResult plan = queryAccess.cachedPlanningResult(cacheKey); if (plan == null) { plan = planner.plan(fallbackExpr, patterns, configuredIndexes, outputBindings, inequalityConstraints, @@ -167,6 +168,7 @@ private PlanningTarget tryExtractPlanningTarget(Join node, List opera List filters = new ArrayList<>(); Extension extension = null; Projection projection = null; + LinkedHashSet outerRequiredVariables = new LinkedHashSet<>(); while (traversal.getParentNode() instanceof UnaryTupleOperator && ((UnaryTupleOperator) traversal.getParentNode()).getArg() == traversal) { @@ -195,7 +197,13 @@ private PlanningTarget tryExtractPlanningTarget(Join node, List opera } continue; } - if (parent instanceof Distinct || parent instanceof Order) { + if (parent instanceof Distinct) { + rootReplaceable = false; + traversal = parent; + continue; + } + if (parent instanceof Order) { + collectOrderRequiredVariables((Order) parent, Set.copyOf(visibleVariables), outerRequiredVariables); rootReplaceable = false; traversal = parent; continue; @@ -221,7 +229,7 @@ private PlanningTarget tryExtractPlanningTarget(Join node, List opera .stream() .anyMatch(filterRewrite -> filterRewrite.residualCondition() != null); List outputBindings = collectOutputBindings(projection, extension, visibleVariables, - filterPartition.requiredVariables(), preserveOuterOperators); + filterPartition.requiredVariables(), List.copyOf(outerRequiredVariables), preserveOuterOperators); if (outputBindings == null) { return null; } @@ -317,9 +325,11 @@ private boolean isNamedVariable(Var var) { } private List collectOutputBindings(Projection projection, Extension extension, - List visibleVariables, List requiredVariables, boolean preserveOuterOperators) { + List visibleVariables, List requiredVariables, List outerRequiredVariables, + boolean preserveOuterOperators) { if (preserveOuterOperators) { - return collectVisibleInputBindings(projection, extension, visibleVariables, requiredVariables); + return collectVisibleInputBindings(projection, extension, visibleVariables, requiredVariables, + outerRequiredVariables); } if (projection == null) { return extension == null ? List.of() : null; @@ -359,8 +369,8 @@ private List collectOutputBindings(Projection projec } private List collectVisibleInputBindings(Projection projection, Extension extension, - List visibleVariables, List requiredVariables) { - LinkedHashSet neededInputs = new LinkedHashSet<>(requiredVariables); + List visibleVariables, List requiredVariables, List outerRequiredVariables) { + LinkedHashSet neededInputs = new LinkedHashSet<>(); if (projection != null) { for (ProjectionElem projectionElem : projection.getProjectionElemList().getElements()) { neededInputs.add(projectionElem.getName()); @@ -370,18 +380,28 @@ private List collectVisibleInputBindings(Projection } else { neededInputs.addAll(visibleVariables); } + neededInputs.addAll(requiredVariables); + neededInputs.addAll(outerRequiredVariables); if (extension != null) { neededInputs = resolveExtensionInputs(neededInputs, extension, Set.copyOf(visibleVariables)); } List outputBindings = new ArrayList<>(); - for (String visibleVariable : visibleVariables) { - if (neededInputs.contains(visibleVariable)) { - outputBindings.add(new LmdbLftjPlan.OutputBinding(visibleVariable, visibleVariable)); + Set visible = Set.copyOf(visibleVariables); + for (String neededInput : neededInputs) { + if (visible.contains(neededInput)) { + outputBindings.add(new LmdbLftjPlan.OutputBinding(neededInput, neededInput)); } } return outputBindings; } + private void collectOrderRequiredVariables(Order order, Set visibleVariables, + Set requiredVariables) { + for (OrderElem orderElem : order.getElements()) { + collectReferencedVariables(orderElem.getExpr(), visibleVariables, requiredVariables); + } + } + private FilterPartition partitionFilters(List filters, Set visibleVariables) { if (filters.isEmpty()) { return FilterPartition.empty(); diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPreparedPlanCache.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPreparedPlanCache.java index a23b23ef3b..692b5088b3 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPreparedPlanCache.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjPreparedPlanCache.java @@ -44,12 +44,13 @@ synchronized void clear() { } static String normalizedKey(List patterns, Set configuredIndexes) { - return normalizedKey(patterns, configuredIndexes, List.of(), List.of()); + return normalizedKey(patterns, configuredIndexes, List.of(), List.of(), LmdbLftjPlanningHints.empty()); } static String normalizedKey(List patterns, Set configuredIndexes, List outputBindings, - List inequalityConstraints) { + List inequalityConstraints, + LmdbLftjPlanningHints planningHints) { StringBuilder builder = new StringBuilder(configuredIndexes.size() * 6 + patterns.size() * 32); builder.append("indexes="); configuredIndexes.stream().sorted().forEach(indexName -> builder.append(indexName).append(',')); @@ -69,6 +70,14 @@ static String normalizedKey(List patterns, Set configu .append(inequalityConstraint.rightVariable()) .append(';'); } + builder.append(";inputs="); + for (String inputBoundVariable : planningHints.inputBoundVariables()) { + builder.append(inputBoundVariable).append(';'); + } + builder.append(";residuals="); + for (String residualFilterVariable : planningHints.residualFilterVariables()) { + builder.append(residualFilterVariable).append(';'); + } return builder.toString(); } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java index 74a75f7410..4b982679e7 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjCodegenTest.java @@ -12,7 +12,6 @@ package org.eclipse.rdf4j.sail.lmdb; import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.File; import java.io.IOException; @@ -620,18 +619,6 @@ void defaultStoreConnectionShouldUseFullCodegenCompiler() throws Exception { } } - @Test - void compileFailureShouldNotSilentlyFallbackToInterpretedIteration() { - LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); - CachingQueryAccess queryAccess = new CachingQueryAccess(new FailingCompiler()); - QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess, plan); - - assertThatThrownBy(() -> drain(evaluationStep, EmptyBindingSet.getInstance())) - .isInstanceOf(RuntimeException.class) - .hasMessageContaining("LMDB LFTJ execution failed") - .satisfies(throwable -> assertThat(rootCauseOf(throwable)).hasMessageContaining("forced failure")); - } - private void assertFoafBenchmarkQueryCompilesGeneratedFactory(int cycleSize) throws Exception { FoafCliqueQueryBenchmark benchmark = configuredFoafBenchmark(); benchmark.setup(); @@ -738,28 +725,6 @@ void codegenCacheShouldCompileSeparatelyForDistinctAliasLayouts() { assertThat(queryAccess.cachedEntry(duplicateAliased.executionKey())).isNotNull(); } - @Test - void codegenCacheShouldReuseNegativeResultAfterCompileFailure() { - LmdbLftjPlan plan = LmdbLftjSyntheticScenario.createPlan(); - FailingCompiler compiler = new FailingCompiler(); - CachingQueryAccess queryAccess = new CachingQueryAccess(compiler); - QueryEvaluationStep evaluationStep = LmdbLftjSyntheticScenario.createEvaluationStep(queryAccess, plan); - - assertThatThrownBy(() -> drain(evaluationStep, EmptyBindingSet.getInstance())) - .isInstanceOf(RuntimeException.class) - .hasMessageContaining("LMDB LFTJ execution failed") - .satisfies(throwable -> assertThat(rootCauseOf(throwable)).hasMessageContaining("forced failure")); - assertThatThrownBy(() -> drain(evaluationStep, EmptyBindingSet.getInstance())) - .isInstanceOf(RuntimeException.class) - .hasMessageContaining("LMDB LFTJ execution failed") - .satisfies(throwable -> assertThat(rootCauseOf(throwable)).hasMessageContaining("forced failure")); - - assertThat(compiler.compileCalls).isEqualTo(1); - assertThat(queryAccess.cachedEntry(plan.executionKey())).isNotNull(); - assertThat(queryAccess.cachedEntry(plan.executionKey()).compiled()).isFalse(); - assertThat(queryAccess.cachedEntry(plan.executionKey()).failureMessage()).contains("forced failure"); - } - private boolean invokeBooleanGetter(Object target, String getterName) { try { Method getter = target.getClass().getMethod(getterName); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java index 82da3c88ab..1fce0d8739 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbLftjOptimizerTest.java @@ -173,6 +173,25 @@ void optimizeShouldExposeSourceBindingsWhenResidualFilterKeepsProjectionOutside( assertInstanceOf(Projection.class, findNode(tupleExpr, Projection.class)); } + @Test + void optimizeShouldExposeOrderBindingsWhenOrderStaysOutsideFusedPlan() throws Exception { + TestQueryAccess queryAccess = new TestQueryAccess(); + LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer( + new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess)); + + TupleExpr tupleExpr = parsedQueryRoot(orderByNonProjectedVarQuery()); + + optimizer.optimize(tupleExpr, (Dataset) null, EmptyBindingSet.getInstance()); + + LmdbLftjTupleExpr lftj = findNode(tupleExpr, LmdbLftjTupleExpr.class); + assertInstanceOf(Order.class, findNode(tupleExpr, Order.class)); + assertInstanceOf(Projection.class, findNode(tupleExpr, Projection.class)); + assertEquals(List.of( + new LmdbLftjPlan.OutputBinding("a", "a"), + new LmdbLftjPlan.OutputBinding("age", "age")), lftj.plan().outputBindings()); + assertEquals(completeInequalities(3), lftj.plan().inequalityConstraints()); + } + @Test void optimizerPipelineShouldFuseParsedAliasProjectionQuery() throws Exception { TestQueryAccess queryAccess = new TestQueryAccess(); @@ -379,6 +398,25 @@ void optimizeShouldPreferValuesAndLeafVarsForCycle5ValuesDistinctMailboxOrdered( List.of("b", "c", "d", "e")); } + @Test + void optimizeShouldNotReusePreparedPlanAcrossDistinctPlanningHints() throws Exception { + TestQueryAccess queryAccess = new TestQueryAccess(); + LmdbLftjOptimizer optimizer = new LmdbLftjOptimizer( + new LmdbLftjTripleSource(new EmptyTripleSource(), queryAccess)); + + TupleExpr unbound = parsedQueryRoot(cycleQuery(9)); + TupleExpr valuesBound = parsedQueryRoot(cycleQueryWithValuesBinding(9, "e", "urn:person:5")); + + optimizer.optimize(unbound, (Dataset) null, EmptyBindingSet.getInstance()); + optimizer.optimize(valuesBound, (Dataset) null, EmptyBindingSet.getInstance()); + + assertEquals(2, queryAccess.cachedPlanPuts, + "planning hints must partition prepared-plan cache entries"); + assertEquals(0, queryAccess.cachedPlanHits, + "VALUES-bound planning hints must not reuse the unbound prepared plan"); + assertEquals("e", findNode(valuesBound, LmdbLftjTupleExpr.class).plan().variableOrder().get(0)); + } + private TupleExpr cycle(String a, String b, String c) { StatementPattern pattern1 = statementPattern(a, b); StatementPattern pattern2 = statementPattern(b, c); @@ -442,6 +480,18 @@ private String aliasProjectionWithResidualFilterQuery() { + "}\n"; } + private String orderByNonProjectedVarQuery() { + return "PREFIX foaf: \n" + + "SELECT ?a WHERE {\n" + + " ?a foaf:knows ?b .\n" + + " ?b foaf:knows ?c .\n" + + " ?c foaf:knows ?a .\n" + + " ?a foaf:age ?age .\n" + + " FILTER (?a != ?b && ?a != ?c && ?b != ?c)\n" + + "}\n" + + "ORDER BY ?age ?a\n"; + } + private String reorderedDuplicateAliasProjectionQuery() { return "PREFIX foaf: \n" + "SELECT (?c AS ?z) (?a AS ?x) (?a AS ?x2) WHERE {\n" @@ -503,6 +553,52 @@ private String repeatedVariableQuery() { + "}\n"; } + private String cycleQuery(int size) { + StringBuilder builder = new StringBuilder(); + builder.append("PREFIX foaf: \n"); + builder.append("SELECT * WHERE {\n"); + appendCyclePattern(builder, size); + appendPairwiseInequalities(builder, size); + builder.append("}\n"); + return builder.toString(); + } + + private String cycleQueryWithValuesBinding(int size, String variableName, String iri) { + StringBuilder builder = new StringBuilder(); + builder.append("PREFIX foaf: \n"); + builder.append("SELECT * WHERE {\n"); + builder.append(" VALUES ?").append(variableName).append(" { <").append(iri).append("> }\n"); + appendCyclePattern(builder, size); + appendPairwiseInequalities(builder, size); + builder.append("}\n"); + return builder.toString(); + } + + private void appendCyclePattern(StringBuilder builder, int size) { + for (int i = 0; i < size; i++) { + builder.append(" ?") + .append(variableName(i)) + .append(" foaf:knows ?") + .append(variableName((i + 1) % size)) + .append(" .\n"); + } + } + + private void appendPairwiseInequalities(StringBuilder builder, int size) { + builder.append(" FILTER ("); + boolean first = true; + for (int i = 0; i < size; i++) { + for (int j = i + 1; j < size; j++) { + if (!first) { + builder.append(" && "); + } + builder.append("?").append(variableName(i)).append(" != ?").append(variableName(j)); + first = false; + } + } + builder.append(")\n"); + } + private TupleExpr parsedQueryRoot(String query) throws Exception { ParsedTupleQuery parsed = QueryParserUtil.parseTupleQuery(QueryLanguage.SPARQL, query, null); TupleExpr tupleExpr = parsed.getTupleExpr().clone(); From ba6ddb2d318c62934d146a0b6ac5231afe665f64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ha=CC=8Avard=20Ottestad?= Date: Tue, 7 Apr 2026 23:55:47 +0200 Subject: [PATCH 32/32] fix bugs --- .../impl/ArrayBindingBasedQueryEvaluationContext.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/ArrayBindingBasedQueryEvaluationContext.java b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/ArrayBindingBasedQueryEvaluationContext.java index 8ae18963cd..73241d0f2f 100644 --- a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/ArrayBindingBasedQueryEvaluationContext.java +++ b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/ArrayBindingBasedQueryEvaluationContext.java @@ -40,6 +40,7 @@ import org.eclipse.rdf4j.query.algebra.QueryModelNode; import org.eclipse.rdf4j.query.algebra.QueryRoot; import org.eclipse.rdf4j.query.algebra.StatementPattern; +import org.eclipse.rdf4j.query.algebra.TupleExpr; import org.eclipse.rdf4j.query.algebra.UnaryTupleOperator; import org.eclipse.rdf4j.query.algebra.Var; import org.eclipse.rdf4j.query.algebra.ZeroLengthPath; @@ -344,6 +345,11 @@ public static String[] findAllVariablesUsedInQuery(QueryRoot node) { @Override public void meetOther(QueryModelNode node) throws QueryEvaluationException { + if (node instanceof TupleExpr) { + for (String bindingName : ((TupleExpr) node).getBindingNames()) { + varNames.computeIfAbsent(bindingName, k -> k); + } + } super.meetOther(node); }