Skip to content

Commit c17e4c6

Browse files
committed
GH-5672 introduce simple learned join optimization that tracks fanout metrics
1 parent 36c6cbe commit c17e4c6

13 files changed

Lines changed: 782 additions & 4 deletions

File tree

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2026 Eclipse RDF4J contributors.
3+
*
4+
* All rights reserved. This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Distribution License v1.0
6+
* which accompanies this distribution, and is available at
7+
* http://www.eclipse.org/org/documents/edl-v10.php.
8+
*
9+
* SPDX-License-Identifier: BSD-3-Clause
10+
*******************************************************************************/
11+
// Some portions generated by Codex
12+
package org.eclipse.rdf4j.query.algebra.evaluation.impl;
13+
14+
import java.util.Objects;
15+
16+
import org.eclipse.rdf4j.query.Dataset;
17+
import org.eclipse.rdf4j.query.algebra.evaluation.EvaluationStrategy;
18+
import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource;
19+
import org.eclipse.rdf4j.query.algebra.evaluation.federation.FederatedServiceResolver;
20+
import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.JoinStatsProvider;
21+
import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.LearningQueryOptimizerPipeline;
22+
import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.LearningTripleSource;
23+
import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.MemoryJoinStats;
24+
25+
/**
26+
* Evaluation strategy factory that injects a learned join optimizer.
27+
*/
28+
public class LearningEvaluationStrategyFactory extends DefaultEvaluationStrategyFactory {
29+
30+
private final JoinStatsProvider statsProvider;
31+
private final EvaluationStatistics optimizerStatisticsOverride;
32+
33+
public LearningEvaluationStrategyFactory() {
34+
this(new MemoryJoinStats(), null);
35+
}
36+
37+
public LearningEvaluationStrategyFactory(FederatedServiceResolver resolver) {
38+
this(new MemoryJoinStats(), null);
39+
setFederatedServiceResolver(resolver);
40+
}
41+
42+
public LearningEvaluationStrategyFactory(EvaluationStatistics optimizerStatisticsOverride) {
43+
this(new MemoryJoinStats(), optimizerStatisticsOverride);
44+
}
45+
46+
public LearningEvaluationStrategyFactory(JoinStatsProvider statsProvider) {
47+
this(statsProvider, null);
48+
}
49+
50+
public LearningEvaluationStrategyFactory(JoinStatsProvider statsProvider,
51+
EvaluationStatistics optimizerStatisticsOverride) {
52+
this.statsProvider = Objects.requireNonNull(statsProvider, "statsProvider");
53+
this.optimizerStatisticsOverride = optimizerStatisticsOverride;
54+
}
55+
56+
public JoinStatsProvider getStatsProvider() {
57+
return statsProvider;
58+
}
59+
60+
@Override
61+
public EvaluationStrategy createEvaluationStrategy(Dataset dataset, TripleSource tripleSource,
62+
EvaluationStatistics evaluationStatistics) {
63+
TripleSource learningTripleSource = new LearningTripleSource(tripleSource, statsProvider);
64+
EvaluationStrategy strategy = super.createEvaluationStrategy(dataset, learningTripleSource,
65+
evaluationStatistics);
66+
EvaluationStatistics optimizerStatistics = optimizerStatisticsOverride != null
67+
? optimizerStatisticsOverride
68+
: evaluationStatistics;
69+
strategy.setOptimizerPipeline(
70+
new LearningQueryOptimizerPipeline(strategy, learningTripleSource, optimizerStatistics,
71+
statsProvider));
72+
return strategy;
73+
}
74+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2026 Eclipse RDF4J contributors.
3+
*
4+
* All rights reserved. This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Distribution License v1.0
6+
* which accompanies this distribution, and is available at
7+
* http://www.eclipse.org/org/documents/edl-v10.php.
8+
*
9+
* SPDX-License-Identifier: BSD-3-Clause
10+
*******************************************************************************/
11+
// Some portions generated by Codex
12+
package org.eclipse.rdf4j.query.algebra.evaluation.optimizer;
13+
14+
/**
15+
* Collects and supplies statistics about triple pattern evaluations.
16+
*/
17+
public interface JoinStatsProvider {
18+
19+
void reset();
20+
21+
void recordCall(PatternKey key);
22+
23+
void recordResults(PatternKey key, long resultCount);
24+
25+
void seedIfAbsent(PatternKey key, double defaultCardinality, long priorCalls);
26+
27+
double getAverageResults(PatternKey key);
28+
29+
boolean hasStats(PatternKey key);
30+
31+
long getTotalCalls();
32+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2026 Eclipse RDF4J contributors.
3+
*
4+
* All rights reserved. This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Distribution License v1.0
6+
* which accompanies this distribution, and is available at
7+
* http://www.eclipse.org/org/documents/edl-v10.php.
8+
*
9+
* SPDX-License-Identifier: BSD-3-Clause
10+
*******************************************************************************/
11+
// Some portions generated by Codex
12+
package org.eclipse.rdf4j.query.algebra.evaluation.optimizer;
13+
14+
import java.util.List;
15+
import java.util.Map;
16+
import java.util.Objects;
17+
18+
import org.eclipse.rdf4j.model.IRI;
19+
import org.eclipse.rdf4j.query.BindingSet;
20+
import org.eclipse.rdf4j.query.Dataset;
21+
import org.eclipse.rdf4j.query.algebra.StatementPattern;
22+
import org.eclipse.rdf4j.query.algebra.TupleExpr;
23+
import org.eclipse.rdf4j.query.algebra.Var;
24+
import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource;
25+
import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics;
26+
27+
/**
28+
* Join optimizer that uses learned fanout statistics to estimate costs.
29+
*/
30+
public class LearnedQueryJoinOptimizer extends QueryJoinOptimizer {
31+
32+
private static final long DEFAULT_PRIOR_CALLS = 2;
33+
34+
private final JoinStatsProvider statsProvider;
35+
36+
public LearnedQueryJoinOptimizer(EvaluationStatistics statistics, TripleSource tripleSource,
37+
JoinStatsProvider statsProvider) {
38+
this(statistics, false, tripleSource, statsProvider);
39+
}
40+
41+
public LearnedQueryJoinOptimizer(EvaluationStatistics statistics, boolean trackResultSize,
42+
TripleSource tripleSource, JoinStatsProvider statsProvider) {
43+
super(statistics, trackResultSize, tripleSource);
44+
this.statsProvider = Objects.requireNonNull(statsProvider, "statsProvider");
45+
}
46+
47+
@Override
48+
public void optimize(TupleExpr tupleExpr, Dataset dataset, BindingSet bindings) {
49+
tupleExpr.visit(new LearnedJoinVisitor());
50+
}
51+
52+
protected class LearnedJoinVisitor extends JoinVisitor {
53+
54+
@Override
55+
public void meet(StatementPattern node) {
56+
double estimate = estimateCardinality(node);
57+
node.setResultSizeEstimate(estimate);
58+
}
59+
60+
@Override
61+
protected double getTupleExprCost(TupleExpr tupleExpr, Map<TupleExpr, Double> cardinalityMap,
62+
Map<TupleExpr, List<Var>> varsMap, Map<Var, Integer> varFreqMap) {
63+
if (tupleExpr instanceof StatementPattern) {
64+
StatementPattern statementPattern = (StatementPattern) tupleExpr;
65+
double estimate = estimateCardinality(statementPattern);
66+
statementPattern.setCardinality(estimate);
67+
statementPattern.setResultSizeEstimate(estimate);
68+
}
69+
return super.getTupleExprCost(tupleExpr, cardinalityMap, varsMap, varFreqMap);
70+
}
71+
72+
private double estimateCardinality(StatementPattern node) {
73+
PatternKey key = buildKey(node);
74+
if (!statsProvider.hasStats(key)) {
75+
double defaultEstimate = statistics.getCardinality(node);
76+
statsProvider.seedIfAbsent(key, defaultEstimate, DEFAULT_PRIOR_CALLS);
77+
}
78+
double estimate = statsProvider.getAverageResults(key);
79+
if (estimate <= 0.0d) {
80+
estimate = statistics.getCardinality(node);
81+
}
82+
return estimate;
83+
}
84+
85+
private PatternKey buildKey(StatementPattern node) {
86+
int mask = 0;
87+
if (isBound(node.getSubjectVar())) {
88+
mask |= PatternKey.SUBJECT_BOUND;
89+
}
90+
if (isBound(node.getPredicateVar())) {
91+
mask |= PatternKey.PREDICATE_BOUND;
92+
}
93+
if (isBound(node.getObjectVar())) {
94+
mask |= PatternKey.OBJECT_BOUND;
95+
}
96+
Var predVar = node.getPredicateVar();
97+
IRI predicateKey = null;
98+
if (predVar != null && predVar.hasValue() && predVar.getValue() instanceof IRI) {
99+
predicateKey = (IRI) predVar.getValue();
100+
}
101+
return new PatternKey(predicateKey, mask);
102+
}
103+
104+
private boolean isBound(Var var) {
105+
if (var == null) {
106+
return false;
107+
}
108+
List<Var> unbound = getUnboundVars(List.of(var));
109+
return unbound.isEmpty();
110+
}
111+
}
112+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2026 Eclipse RDF4J contributors.
3+
*
4+
* All rights reserved. This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Distribution License v1.0
6+
* which accompanies this distribution, and is available at
7+
* http://www.eclipse.org/org/documents/edl-v10.php.
8+
*
9+
* SPDX-License-Identifier: BSD-3-Clause
10+
*******************************************************************************/
11+
// Some portions generated by Codex
12+
package org.eclipse.rdf4j.query.algebra.evaluation.optimizer;
13+
14+
import java.util.ArrayList;
15+
import java.util.List;
16+
17+
import org.eclipse.rdf4j.query.algebra.evaluation.EvaluationStrategy;
18+
import org.eclipse.rdf4j.query.algebra.evaluation.QueryOptimizer;
19+
import org.eclipse.rdf4j.query.algebra.evaluation.QueryOptimizerPipeline;
20+
import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource;
21+
import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics;
22+
23+
/**
24+
* Standard optimizer pipeline with a learned join optimizer.
25+
*/
26+
public class LearningQueryOptimizerPipeline implements QueryOptimizerPipeline {
27+
28+
private static boolean assertsEnabled = false;
29+
30+
static {
31+
// noinspection AssertWithSideEffects
32+
assert assertsEnabled = true;
33+
}
34+
35+
private final EvaluationStatistics evaluationStatistics;
36+
private final TripleSource tripleSource;
37+
private final EvaluationStrategy strategy;
38+
private final JoinStatsProvider statsProvider;
39+
40+
public LearningQueryOptimizerPipeline(EvaluationStrategy strategy, TripleSource tripleSource,
41+
EvaluationStatistics evaluationStatistics, JoinStatsProvider statsProvider) {
42+
this.strategy = strategy;
43+
this.tripleSource = tripleSource;
44+
this.evaluationStatistics = evaluationStatistics;
45+
this.statsProvider = statsProvider;
46+
}
47+
48+
@Override
49+
public Iterable<QueryOptimizer> getOptimizers() {
50+
List<QueryOptimizer> optimizers = List.of(
51+
StandardQueryOptimizerPipeline.BINDING_ASSIGNER,
52+
StandardQueryOptimizerPipeline.BINDING_SET_ASSIGNMENT_INLINER,
53+
new ConstantOptimizer(strategy),
54+
new RegexAsStringFunctionOptimizer(tripleSource.getValueFactory()),
55+
StandardQueryOptimizerPipeline.COMPARE_OPTIMIZER,
56+
StandardQueryOptimizerPipeline.CONJUNCTIVE_CONSTRAINT_SPLITTER,
57+
StandardQueryOptimizerPipeline.DISJUNCTIVE_CONSTRAINT_OPTIMIZER,
58+
StandardQueryOptimizerPipeline.SAME_TERM_FILTER_OPTIMIZER,
59+
StandardQueryOptimizerPipeline.UNION_SCOPE_CHANGE_OPTIMIZER,
60+
StandardQueryOptimizerPipeline.QUERY_MODEL_NORMALIZER,
61+
StandardQueryOptimizerPipeline.PROJECTION_REMOVAL_OPTIMIZER,
62+
new LearnedQueryJoinOptimizer(evaluationStatistics, strategy.isTrackResultSize(), tripleSource,
63+
statsProvider),
64+
StandardQueryOptimizerPipeline.ITERATIVE_EVALUATION_OPTIMIZER,
65+
StandardQueryOptimizerPipeline.FILTER_OPTIMIZER,
66+
StandardQueryOptimizerPipeline.ORDER_LIMIT_OPTIMIZER
67+
);
68+
69+
if (assertsEnabled) {
70+
List<QueryOptimizer> optimizersWithReferenceChecker = new ArrayList<>();
71+
optimizersWithReferenceChecker.add(new ParentReferenceChecker(null));
72+
for (QueryOptimizer optimizer : optimizers) {
73+
optimizersWithReferenceChecker.add(optimizer);
74+
optimizersWithReferenceChecker.add(new ParentReferenceChecker(optimizer));
75+
}
76+
optimizers = optimizersWithReferenceChecker;
77+
}
78+
79+
return optimizers;
80+
}
81+
}

0 commit comments

Comments
 (0)