Skip to content

Commit 3d8f152

Browse files
committed
very good results with few regressions
1 parent c430058 commit 3d8f152

12 files changed

Lines changed: 5692 additions & 5129 deletions

File tree

core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/FilterSelectivityTelemetry.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
package org.eclipse.rdf4j.query.algebra.evaluation.optimizer;
1313

1414
import org.eclipse.rdf4j.query.algebra.Filter;
15+
import org.eclipse.rdf4j.query.algebra.StatementPattern;
16+
import org.eclipse.rdf4j.query.algebra.TupleExpr;
1517
import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics;
1618
import org.eclipse.rdf4j.query.explanation.TelemetryMetricNames;
1719

@@ -70,6 +72,9 @@ private static double estimateCardinalityPassRatio(Filter filter, EvaluationStat
7072
if (statistics == null || filter.getArg() == null) {
7173
return -1.0d;
7274
}
75+
if (!isCheapCardinalityFallbackInput(filter.getArg())) {
76+
return -1.0d;
77+
}
7378

7479
double inputRows = statistics.getCardinality(filter.getArg());
7580
double outputRows = statistics.getCardinality(filter);
@@ -82,6 +87,16 @@ private static double estimateCardinalityPassRatio(Filter filter, EvaluationStat
8287
return Math.min(1.0d, outputRows / inputRows);
8388
}
8489

90+
private static boolean isCheapCardinalityFallbackInput(TupleExpr tupleExpr) {
91+
if (tupleExpr instanceof StatementPattern) {
92+
return true;
93+
}
94+
if (tupleExpr instanceof Filter) {
95+
return isCheapCardinalityFallbackInput(((Filter) tupleExpr).getArg());
96+
}
97+
return false;
98+
}
99+
85100
private static boolean isValidPassRatio(double value) {
86101
return Double.isFinite(value) && value >= 0.0d && value <= 1.0d;
87102
}

core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/QueryJoinOptimizer.java

Lines changed: 57 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.eclipse.rdf4j.model.Statement;
3535
import org.eclipse.rdf4j.model.Value;
3636
import org.eclipse.rdf4j.model.ValueFactory;
37+
import org.eclipse.rdf4j.model.base.CoreDatatype;
3738
import org.eclipse.rdf4j.model.datatypes.XMLDatatypeUtil;
3839
import org.eclipse.rdf4j.model.vocabulary.FN;
3940
import org.eclipse.rdf4j.model.vocabulary.XMLSchema;
@@ -193,6 +194,7 @@ protected class JoinVisitor extends AbstractSimpleQueryModelVisitor<RuntimeExcep
193194
private Set<String> boundVars = new HashSet<>();
194195
private double currentHighestCost = 1;
195196
private final Map<TupleExpr, Double> plannedPrefixRowsByRightArg = new IdentityHashMap<>();
197+
private final Set<TupleExpr> plannerOrderedArgs = Collections.newSetFromMap(new IdentityHashMap<>());
196198
private int suppressedSmallLiteralFilterAnchors;
197199

198200
protected JoinVisitor() {
@@ -723,10 +725,12 @@ private boolean isUnsafeCoreInEqualityValue(Value value) {
723725
if (!(value instanceof Literal)) {
724726
return false;
725727
}
726-
IRI datatype = ((Literal) value).getDatatype();
727-
return XMLDatatypeUtil.isNumericDatatype(datatype)
728-
|| XMLSchema.BOOLEAN.equals(datatype)
729-
|| XMLSchema.DATETIME.equals(datatype);
728+
CoreDatatype coreDatatype = ((Literal) value).getCoreDatatype();
729+
if(coreDatatype.isXSDDatatype()){
730+
CoreDatatype.XSD xsdDatatype = coreDatatype.asXSDDatatypeOrNull();
731+
return xsdDatatype.isNumericDatatype() || xsdDatatype.isCalendarDatatype() || xsdDatatype == CoreDatatype.XSD.BOOLEAN;
732+
}
733+
return false;
730734
}
731735

732736
private RawJoinSegment findAnchorSegment(List<Object> rawPlanItems, DeferredFilter deferredFilter,
@@ -1142,6 +1146,7 @@ private TupleExpr buildJoinRoot(Deque<TupleExpr> orderedJoinArgs) {
11421146

11431147
Deque<TupleExpr> remaining = new ArrayDeque<>(orderedJoinArgs);
11441148
Map<TupleExpr, Double> plannedPrefixRowsForArgs = plannedPrefixRowsForArgs(orderedJoinArgs);
1149+
boolean plannerProvidedSegment = plannerProvidedSegment(orderedJoinArgs);
11451150

11461151
if (remaining.size() > 1) {
11471152
double cardinality = 0;
@@ -1167,7 +1172,7 @@ private TupleExpr buildJoinRoot(Deque<TupleExpr> orderedJoinArgs) {
11671172
cardinality = Math.max(cardinality, left.getResultSizeEstimate());
11681173
cardinality = Math.max(cardinality, right.getResultSizeEstimate());
11691174
Join join = createJoinWithEstimatedResultSize(left, right,
1170-
plannedPrefixRowsForArgs.remove(right));
1175+
plannedPrefixRowsForArgs.remove(right), plannerProvidedSegment);
11711176
join.setOrder((Var) supportedOrders.toArray()[0]);
11721177
join.setMergeJoin(true);
11731178
remaining.addFirst(join);
@@ -1181,7 +1186,7 @@ private TupleExpr buildJoinRoot(Deque<TupleExpr> orderedJoinArgs) {
11811186
supportedOrders.retainAll(next.getSupportedOrders(tripleSource));
11821187

11831188
Join join = createJoinWithEstimatedResultSize(root, next,
1184-
plannedPrefixRowsForArgs.remove(next));
1189+
plannedPrefixRowsForArgs.remove(next), plannerProvidedSegment);
11851190
if (USE_MERGE_JOIN_FOR_LAST_STATEMENT_PATTERNS_WHEN_CROSS_JOIN) {
11861191
mergeJoinForCrossJoin(remaining, supportedOrders, root, next, join);
11871192
}
@@ -1190,12 +1195,25 @@ private TupleExpr buildJoinRoot(Deque<TupleExpr> orderedJoinArgs) {
11901195

11911196
while (!remaining.isEmpty()) {
11921197
TupleExpr next = remaining.removeFirst();
1193-
root = createJoinWithEstimatedResultSize(root, next, plannedPrefixRowsForArgs.remove(next));
1198+
root = createJoinWithEstimatedResultSize(root, next, plannedPrefixRowsForArgs.remove(next),
1199+
plannerProvidedSegment);
11941200
}
11951201

11961202
return root;
11971203
}
11981204

1205+
private boolean plannerProvidedSegment(Deque<TupleExpr> orderedJoinArgs) {
1206+
if (plannerOrderedArgs.isEmpty()) {
1207+
return false;
1208+
}
1209+
for (TupleExpr orderedJoinArg : orderedJoinArgs) {
1210+
if (!plannerOrderedArgs.contains(orderedJoinArg)) {
1211+
return false;
1212+
}
1213+
}
1214+
return !orderedJoinArgs.isEmpty();
1215+
}
1216+
11991217
private Map<TupleExpr, Double> plannedPrefixRowsForArgs(Deque<TupleExpr> orderedJoinArgs) {
12001218
if (plannedPrefixRowsByRightArg.isEmpty() || orderedJoinArgs.size() < 2) {
12011219
return new IdentityHashMap<>();
@@ -1278,6 +1296,7 @@ private Deque<TupleExpr> optimizeJoinGroup(List<TupleExpr> joinGroup, List<Defer
12781296
JoinOrderPlanner.JoinOrderPlan plan = planningAttempt.getPlan().get();
12791297
Deque<TupleExpr> plannedJoinArgs = new ArrayDeque<>(plan.getOrderedArgs());
12801298
Deque<TupleExpr> normalizedJoinArgs = positionBindingSetAssignments(plannedJoinArgs);
1299+
markPlannerOrderedArgs(normalizedJoinArgs);
12811300
if (sameIdentityOrder(plan.getOrderedArgs(), new ArrayList<>(normalizedJoinArgs))) {
12821301
applyPlannerStepEstimates(plan);
12831302
} else {
@@ -1333,6 +1352,12 @@ private JoinOrderPlanner.PlanningAttempt planSegmentWithJoinOrderPlanner(List<Tu
13331352
return attempt;
13341353
}
13351354

1355+
private void markPlannerOrderedArgs(Deque<TupleExpr> orderedArgs) {
1356+
for (TupleExpr orderedArg : orderedArgs) {
1357+
plannerOrderedArgs.add(orderedArg);
1358+
}
1359+
}
1360+
13361361
private JoinOrderPlanner.Algorithm plannerAlgorithm(int segmentSize) {
13371362
if (JOIN_ORDER_STRATEGY == JoinOrderStrategy.DYNAMIC_PROGRAMMING) {
13381363
return JoinOrderPlanner.Algorithm.DYNAMIC_PROGRAMMING;
@@ -1547,9 +1572,19 @@ private Join createJoinWithEstimatedResultSize(TupleExpr left, TupleExpr right)
15471572
}
15481573

15491574
private Join createJoinWithEstimatedResultSize(TupleExpr left, TupleExpr right, Double plannedResultSize) {
1575+
return createJoinWithEstimatedResultSize(left, right, plannedResultSize, false);
1576+
}
1577+
1578+
private Join createJoinWithEstimatedResultSize(TupleExpr left, TupleExpr right, Double plannedResultSize,
1579+
boolean plannerProvidedSegment) {
15501580
Join join = new Join(left, right);
15511581
if (plannedResultSize != null && isFiniteNonNegative(plannedResultSize)) {
15521582
join.setResultSizeEstimate(Math.max(join.getResultSizeEstimate(), plannedResultSize));
1583+
} else if (plannerProvidedSegment) {
1584+
double existingEstimate = maxFiniteResultSizeEstimate(left, right);
1585+
if (isFiniteNonNegative(existingEstimate)) {
1586+
join.setResultSizeEstimate(Math.max(join.getResultSizeEstimate(), existingEstimate));
1587+
}
15531588
} else if (statistics.supportsJoinEstimation()) {
15541589
double estimatedResultSize = statistics.getCardinality(join);
15551590
if (!Double.isNaN(estimatedResultSize) && estimatedResultSize >= 0) {
@@ -1563,6 +1598,21 @@ private Join createJoinWithEstimatedResultSize(TupleExpr left, TupleExpr right,
15631598
return join;
15641599
}
15651600

1601+
private double maxFiniteResultSizeEstimate(TupleExpr left, TupleExpr right) {
1602+
double leftEstimate = left.getResultSizeEstimate();
1603+
double rightEstimate = right.getResultSizeEstimate();
1604+
if (isFiniteNonNegative(leftEstimate) && isFiniteNonNegative(rightEstimate)) {
1605+
return Math.max(leftEstimate, rightEstimate);
1606+
}
1607+
if (isFiniteNonNegative(leftEstimate)) {
1608+
return leftEstimate;
1609+
}
1610+
if (isFiniteNonNegative(rightEstimate)) {
1611+
return rightEstimate;
1612+
}
1613+
return Double.NaN;
1614+
}
1615+
15661616
private void copyOptimizerAnnotations(TupleExpr source, TupleExpr target) {
15671617
for (Map.Entry<String, Long> entry : source.getLongMetricsActual().entrySet()) {
15681618
if (TelemetryMetricNames.isOptimizerMetric(entry.getKey())) {

core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/QueryJoinOptimizerTest.java

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,26 @@ public void optimizeInvokesPlannerSpiWhenStatisticsProvidesPlan() {
526526
assertThat(predicates(flattenJoinLeaves(root.getArg()))).containsExactly(ex("pC"), ex("pB"), ex("pA"));
527527
}
528528

529+
@Test
530+
public void optimizeDoesNotRecomputeJoinCardinalityWhenPlannerHasNoPrefixRows() {
531+
StatementPattern a = statementPattern("s", "a", ex("pA"));
532+
StatementPattern b = statementPattern("a", "b", ex("pB"));
533+
StatementPattern c = statementPattern("b", "c", ex("pC"));
534+
PlannerStatistics statistics = new PlannerStatistics(List.of(c, b, a),
535+
List.of(a, b, c), Double.NaN, Double.NaN);
536+
537+
QueryRoot root = new QueryRoot(new Join(new Join(a, b), c));
538+
new QueryJoinOptimizer(statistics, new EmptyTripleSource()).optimize(root, null, null);
539+
540+
assertThat(statistics.planCalls)
541+
.as("QueryJoinOptimizer should delegate segment ordering to JoinOrderPlanner when available")
542+
.isEqualTo(1);
543+
assertThat(statistics.joinCardinalityCalls)
544+
.as("Planner-provided segments should not fall back to recursive full join cardinality")
545+
.isZero();
546+
assertThat(predicates(flattenJoinLeaves(root.getArg()))).containsExactly(ex("pC"), ex("pB"), ex("pA"));
547+
}
548+
529549
@Test
530550
public void optimizeDoesNotUseCloneBasedGreedyWhenPlannerProvidesDenseCyclePlan() {
531551
BindingSetAssignment userPairValues = bindingSetAssignment(Map.of(
@@ -1465,6 +1485,8 @@ private String predicate(TupleExpr expr) {
14651485
private static final class PlannerStatistics extends EvaluationStatistics implements JoinOrderPlanner {
14661486
private final List<TupleExpr> orderedArgs;
14671487
private final List<TupleExpr> expectedArgs;
1488+
private final double estimatedFinalRows;
1489+
private final double statementCardinality;
14681490
private List<JoinOrderPlanner.FilterConstraint> filterConstraints = List.of();
14691491
private int planCalls;
14701492
private int joinCardinalityCalls;
@@ -1474,8 +1496,15 @@ private PlannerStatistics(List<TupleExpr> orderedArgs) {
14741496
}
14751497

14761498
private PlannerStatistics(List<TupleExpr> orderedArgs, List<TupleExpr> expectedArgs) {
1499+
this(orderedArgs, expectedArgs, 1.0d, 10.0d);
1500+
}
1501+
1502+
private PlannerStatistics(List<TupleExpr> orderedArgs, List<TupleExpr> expectedArgs,
1503+
double estimatedFinalRows, double statementCardinality) {
14771504
this.orderedArgs = orderedArgs;
14781505
this.expectedArgs = expectedArgs;
1506+
this.estimatedFinalRows = estimatedFinalRows;
1507+
this.statementCardinality = statementCardinality;
14791508
}
14801509

14811510
@Override
@@ -1488,15 +1517,15 @@ public double getCardinality(TupleExpr expr) {
14881517
if (expr instanceof Join) {
14891518
joinCardinalityCalls++;
14901519
}
1491-
return 10.0d;
1520+
return statementCardinality;
14921521
}
14931522

14941523
@Override
14951524
public Optional<JoinOrderPlan> planJoinOrder(List<TupleExpr> args, Set<String> initiallyBoundVars,
14961525
Algorithm algorithm) {
14971526
planCalls++;
14981527
assertThat(args).containsExactlyElementsOf(expectedArgs);
1499-
return Optional.of(new JoinOrderPlan(orderedArgs, 1.0d, 1.0d));
1528+
return Optional.of(new JoinOrderPlan(orderedArgs, estimatedFinalRows, 1.0d));
15001529
}
15011530

15021531
@Override
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2026 Eclipse RDF4J contributors.
3+
*
4+
* All rights reserved. This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Distribution License v1.0
6+
* which accompanies this distribution, and is available at
7+
* http://www.eclipse.org/org/documents/edl-v10.php.
8+
*
9+
* SPDX-License-Identifier: BSD-3-Clause
10+
*******************************************************************************/
11+
// Some portions generated by Codex
12+
package org.eclipse.rdf4j.query.algebra.evaluation.optimizer;
13+
14+
import static org.junit.jupiter.api.Assertions.assertEquals;
15+
16+
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
17+
import org.eclipse.rdf4j.query.algebra.Compare;
18+
import org.eclipse.rdf4j.query.algebra.Filter;
19+
import org.eclipse.rdf4j.query.algebra.LeftJoin;
20+
import org.eclipse.rdf4j.query.algebra.StatementPattern;
21+
import org.eclipse.rdf4j.query.algebra.TupleExpr;
22+
import org.eclipse.rdf4j.query.algebra.ValueConstant;
23+
import org.eclipse.rdf4j.query.algebra.Var;
24+
import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics;
25+
import org.eclipse.rdf4j.query.explanation.TelemetryMetricNames;
26+
import org.junit.jupiter.api.Test;
27+
28+
class FilterSelectivityTelemetryTest {
29+
30+
private static final SimpleValueFactory VF = SimpleValueFactory.getInstance();
31+
32+
@Test
33+
void skipsCardinalityFallbackForComplexFilterInput() {
34+
StatementPattern left = pattern("s", "p1", "o1");
35+
StatementPattern right = pattern("s", "p2", "o2");
36+
Filter filter = new Filter(new LeftJoin(left, right),
37+
new Compare(Var.of("o2"), new ValueConstant(VF.createIRI("urn:o")), Compare.CompareOp.NE));
38+
CountingStatistics statistics = new CountingStatistics();
39+
40+
FilterSelectivityTelemetry.annotate(filter, statistics);
41+
42+
assertEquals(0, statistics.cardinalityCalls,
43+
"Telemetry should not recursively estimate full cardinality for complex filter inputs");
44+
assertEquals(-1.0d, filter.getDoubleMetricPlanned(TelemetryMetricNames.PLANNED_FILTER_PASS_RATIO));
45+
}
46+
47+
@Test
48+
void keepsCardinalityFallbackForLocalFilterInput() {
49+
Filter filter = new Filter(pattern("s", "p", "o"),
50+
new Compare(Var.of("o"), new ValueConstant(VF.createIRI("urn:o")), Compare.CompareOp.NE));
51+
CountingStatistics statistics = new CountingStatistics();
52+
53+
FilterSelectivityTelemetry.annotate(filter, statistics);
54+
55+
assertEquals(2, statistics.cardinalityCalls);
56+
assertEquals(0.5d, filter.getDoubleMetricPlanned(TelemetryMetricNames.PLANNED_FILTER_PASS_RATIO));
57+
}
58+
59+
private static StatementPattern pattern(String subjectName, String predicateName, String objectName) {
60+
return new StatementPattern(Var.of(subjectName), Var.of("p", VF.createIRI("urn:" + predicateName)),
61+
Var.of(objectName));
62+
}
63+
64+
private static final class CountingStatistics extends EvaluationStatistics {
65+
private int cardinalityCalls;
66+
67+
@Override
68+
public double getCardinality(TupleExpr expr) {
69+
cardinalityCalls++;
70+
return expr instanceof Filter ? 5.0d : 10.0d;
71+
}
72+
}
73+
}

0 commit comments

Comments
 (0)