Skip to content

Commit 6d2f28f

Browse files
committed
maybe better in some cases but also mostly worse
1 parent f2b45fe commit 6d2f28f

3 files changed

Lines changed: 343 additions & 4 deletions

File tree

core/queryalgebra/model/src/main/java/org/eclipse/rdf4j/query/algebra/helpers/CartesianJoinExplainAnalyzer.java

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,8 @@ private void analyzeJoin(Join join, Set<String> guaranteedBindings, boolean guar
280280

281281
if (globallySupported && !guaranteedBindingsTainted && leftInfo.supportedSubtree && rightInfo.supportedSubtree
282282
&& isCorrelationEmpty(leftInfo.bindingNames, leftInfo.use, rightInfo.bindingNames, rightInfo.use)
283-
&& !regionConnects(join, List.of(left), List.of(right))) {
283+
&& !regionConnects(join, List.of(left), List.of(right))
284+
&& !contextConnects(join, List.of(left), List.of(right))) {
284285
joinTypeByNode.put(join, "Cartesian product");
285286
}
286287

@@ -326,7 +327,8 @@ private void analyzeNJoin(QueryModelNode nJoin, Set<String> guaranteedBindings,
326327

327328
if (globallySupported && !guaranteedBindingsTainted && prefixSupported && argInfo.supportedSubtree
328329
&& isCorrelationEmpty(prefixBindingNames, prefixUse, argInfo.bindingNames, argInfo.use)
329-
&& !regionConnects(nJoin, args.subList(0, index), List.of(arg))) {
330+
&& !regionConnects(nJoin, args.subList(0, index), List.of(arg))
331+
&& !contextConnects(nJoin, args.subList(0, index), List.of(arg))) {
330332
boundaries.add(formatBoundary(index + 1));
331333
}
332334

@@ -495,6 +497,56 @@ private boolean regionConnects(QueryModelNode boundaryOwner, Collection<? extend
495497
QueryModelNode regionRoot = findMandatoryRegionRoot(boundaryOwner);
496498
List<QueryModelNode> atoms = new ArrayList<>();
497499
collectMandatoryRegionAtoms(regionRoot, atoms);
500+
return atomsConnect(atoms, leftRoots, rightRoots);
501+
}
502+
503+
private boolean contextConnects(QueryModelNode boundaryOwner, Collection<? extends QueryModelNode> leftRoots,
504+
Collection<? extends QueryModelNode> rightRoots) {
505+
QueryModelNode regionRoot = findMandatoryRegionRoot(boundaryOwner);
506+
List<QueryModelNode> atoms = new ArrayList<>();
507+
collectMandatoryRegionAtoms(regionRoot, atoms);
508+
509+
QueryModelNode child = regionRoot;
510+
QueryModelNode parent = child.getParentNode();
511+
boolean addedContext = false;
512+
while (parent != null) {
513+
if (isExact(parent, LeftJoin.class)) {
514+
LeftJoin leftJoin = (LeftJoin) parent;
515+
if (child == leftJoin.getLeftArg()) {
516+
collectMandatoryRegionAtoms(leftJoin.getRightArg(), atoms);
517+
addedContext = true;
518+
} else if (child == leftJoin.getRightArg()) {
519+
collectMandatoryRegionAtoms(leftJoin.getLeftArg(), atoms);
520+
addedContext = true;
521+
}
522+
if (leftJoin.getCondition() != null) {
523+
atoms.add(leftJoin.getCondition());
524+
addedContext = true;
525+
}
526+
child = parent;
527+
parent = child.getParentNode();
528+
continue;
529+
}
530+
if (isExact(parent, Filter.class)) {
531+
atoms.add(((Filter) parent).getCondition());
532+
addedContext = true;
533+
child = parent;
534+
parent = child.getParentNode();
535+
continue;
536+
}
537+
if (isTransparentRegionWrapper(parent) || isAlternativeRegionBoundary(parent)) {
538+
child = parent;
539+
parent = child.getParentNode();
540+
continue;
541+
}
542+
break;
543+
}
544+
545+
return addedContext && atomsConnect(atoms, leftRoots, rightRoots);
546+
}
547+
548+
private boolean atomsConnect(List<QueryModelNode> atoms, Collection<? extends QueryModelNode> leftRoots,
549+
Collection<? extends QueryModelNode> rightRoots) {
498550

499551
List<QueryModelNode> leftAtoms = new ArrayList<>();
500552
List<QueryModelNode> rightAtoms = new ArrayList<>();
@@ -571,6 +623,15 @@ private void collectMandatoryRegionAtoms(QueryModelNode node, List<QueryModelNod
571623
if (node == null) {
572624
return;
573625
}
626+
if (node instanceof Filter) {
627+
atoms.add(((Filter) node).getCondition());
628+
}
629+
if (node instanceof Extension) {
630+
atoms.addAll(((Extension) node).getElements());
631+
}
632+
if (node instanceof Projection && !((Projection) node).isSubquery()) {
633+
atoms.add(((Projection) node).getProjectionElemList());
634+
}
574635
if (isMandatoryJoinNode(node) || isTransparentRegionWrapper(node)) {
575636
for (TupleExpr child : tupleChildren(node)) {
576637
collectMandatoryRegionAtoms(child, atoms);
@@ -615,6 +676,13 @@ private static boolean isMandatoryRegionBoundary(QueryModelNode node) {
615676
|| (isExact(node, Projection.class) && ((Projection) node).isSubquery());
616677
}
617678

679+
private static boolean isAlternativeRegionBoundary(QueryModelNode node) {
680+
return isExact(node, Union.class)
681+
|| isExact(node, Difference.class)
682+
|| isExact(node, Intersection.class)
683+
|| isClass(node, NUNION_CLASS);
684+
}
685+
618686
private static List<TupleExpr> tupleChildren(QueryModelNode node) {
619687
if (isClass(node, NJOIN_CLASS) || isClass(node, NUNION_CLASS)) {
620688
return invokeList(node, "getArgs", TupleExpr.class);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2026 Eclipse RDF4J contributors.
3+
*
4+
* All rights reserved. This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Distribution License v1.0
6+
* which accompanies this distribution, and is available at
7+
* http://www.eclipse.org/org/documents/edl-v10.php.
8+
*
9+
* SPDX-License-Identifier: BSD-3-Clause
10+
*******************************************************************************/
11+
// Some portions generated by Codex
12+
package org.eclipse.rdf4j.query.algebra.helpers;
13+
14+
import static org.assertj.core.api.Assertions.assertThat;
15+
import static org.junit.jupiter.api.DynamicTest.dynamicTest;
16+
17+
import java.util.List;
18+
import java.util.stream.Stream;
19+
20+
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
21+
import org.eclipse.rdf4j.query.algebra.BindingSetAssignment;
22+
import org.eclipse.rdf4j.query.algebra.Compare;
23+
import org.eclipse.rdf4j.query.algebra.Distinct;
24+
import org.eclipse.rdf4j.query.algebra.Exists;
25+
import org.eclipse.rdf4j.query.algebra.Extension;
26+
import org.eclipse.rdf4j.query.algebra.ExtensionElem;
27+
import org.eclipse.rdf4j.query.algebra.Filter;
28+
import org.eclipse.rdf4j.query.algebra.Join;
29+
import org.eclipse.rdf4j.query.algebra.LeftJoin;
30+
import org.eclipse.rdf4j.query.algebra.Or;
31+
import org.eclipse.rdf4j.query.algebra.Projection;
32+
import org.eclipse.rdf4j.query.algebra.ProjectionElem;
33+
import org.eclipse.rdf4j.query.algebra.ProjectionElemList;
34+
import org.eclipse.rdf4j.query.algebra.QueryModelNode;
35+
import org.eclipse.rdf4j.query.algebra.Slice;
36+
import org.eclipse.rdf4j.query.algebra.StatementPattern;
37+
import org.eclipse.rdf4j.query.algebra.TupleExpr;
38+
import org.eclipse.rdf4j.query.algebra.Union;
39+
import org.eclipse.rdf4j.query.algebra.ValueConstant;
40+
import org.eclipse.rdf4j.query.algebra.Var;
41+
import org.eclipse.rdf4j.query.explanation.GenericPlanNode;
42+
import org.eclipse.rdf4j.query.explanation.TelemetryMetricNames;
43+
import org.eclipse.rdf4j.query.impl.MapBindingSet;
44+
import org.junit.jupiter.api.DynamicTest;
45+
import org.junit.jupiter.api.TestFactory;
46+
47+
class CartesianJoinExplainAnalyzerFalsePositiveTest {
48+
49+
@TestFactory
50+
Stream<DynamicTest> doesNotReportCartesianProductWhenQueryContextReconnectsJoinSides() {
51+
return reconnectingQueries().map(query -> dynamicTest(query.name, () -> {
52+
GenericPlanNode plan = explain(query.expr);
53+
54+
assertThat(plan.toString())
55+
.as(query.sparql)
56+
.doesNotContain("joinType=Cartesian product");
57+
assertThat(findFirstPlan(plan, node -> "Cartesian product"
58+
.equals(node.getStringMetricActual(TelemetryMetricNames.JOIN_TYPE))))
59+
.as(query.sparql)
60+
.isNull();
61+
}));
62+
}
63+
64+
private static Stream<QueryCase> reconnectingQueries() {
65+
return Stream.of(
66+
new QueryCase(
67+
"mandatory FILTER equality",
68+
"""
69+
SELECT * WHERE {
70+
?s :p ?a .
71+
?x :q ?b .
72+
FILTER (?a = ?b)
73+
}
74+
""",
75+
filter(join(pattern("s", "p", "a"), pattern("x", "q", "b")), eq("a", "b"))),
76+
new QueryCase(
77+
"mandatory FILTER under projection",
78+
"""
79+
SELECT ?s WHERE {
80+
?s :p ?a .
81+
?x :q ?b .
82+
FILTER (?a = ?b)
83+
} LIMIT 10
84+
""",
85+
new Projection(new Slice(filter(join(pattern("s", "p", "a"), pattern("x", "q", "b")),
86+
eq("a", "b")), 0, 10), new ProjectionElemList(new ProjectionElem("s")))),
87+
new QueryCase(
88+
"mandatory FILTER EXISTS",
89+
"""
90+
SELECT * WHERE {
91+
?s :p ?a .
92+
?x :q ?b .
93+
FILTER EXISTS { ?a :sameAs ?b }
94+
}
95+
""",
96+
filter(join(pattern("s", "p", "a"), pattern("x", "q", "b")),
97+
new Exists(pattern("a", "sameAs", "b")))),
98+
new QueryCase(
99+
"outer FILTER over UNION branch",
100+
"""
101+
SELECT * WHERE {
102+
{ ?s :p ?a . ?x :q ?b }
103+
UNION
104+
{ ?s :fallback ?a }
105+
FILTER (?a = ?b)
106+
}
107+
""",
108+
filter(new Union(join(pattern("s", "p", "a"), pattern("x", "q", "b")),
109+
pattern("s", "fallback", "a")), eq("a", "b"))),
110+
new QueryCase(
111+
"outer FILTER over two UNION branches",
112+
"""
113+
SELECT * WHERE {
114+
{ ?s :p ?a . ?x :q ?b }
115+
UNION
116+
{ ?u :r ?c . ?v :t ?d }
117+
FILTER ((?a = ?b) || (?c = ?d))
118+
}
119+
""",
120+
filter(new Union(
121+
join(pattern("s", "p", "a"), pattern("x", "q", "b")),
122+
join(pattern("u", "r", "c"), pattern("v", "t", "d"))),
123+
new Or(eq("a", "b"), eq("c", "d")))),
124+
new QueryCase(
125+
"OPTIONAL right-side FILTER",
126+
"""
127+
SELECT * WHERE {
128+
?root :seed ?seed .
129+
OPTIONAL {
130+
?s :p ?a .
131+
?x :q ?b .
132+
FILTER (?a = ?b)
133+
}
134+
}
135+
""",
136+
new LeftJoin(pattern("root", "seedPred", "seed"),
137+
filter(join(pattern("s", "p", "a"), pattern("x", "q", "b")),
138+
eq("a", "b")))),
139+
new QueryCase(
140+
"OPTIONAL right-side condition",
141+
"""
142+
SELECT * WHERE {
143+
?root :seed ?seed .
144+
OPTIONAL {
145+
?s :p ?a .
146+
?x :q ?b .
147+
FILTER (?a = ?b)
148+
}
149+
}
150+
""",
151+
new LeftJoin(pattern("root", "seedPred", "seed"),
152+
join(pattern("s", "p", "a"), pattern("x", "q", "b")), eq("a", "b"))),
153+
new QueryCase(
154+
"outer FILTER over OPTIONAL right side",
155+
"""
156+
SELECT * WHERE {
157+
?root :seed ?seed .
158+
OPTIONAL {
159+
?s :p ?a .
160+
?x :q ?b .
161+
}
162+
FILTER (?a = ?b)
163+
}
164+
""",
165+
filter(new LeftJoin(pattern("root", "seedPred", "seed"),
166+
join(pattern("s", "p", "a"), pattern("x", "q", "b"))), eq("a", "b"))),
167+
new QueryCase(
168+
"BIND alias connected by outer FILTER",
169+
"""
170+
SELECT * WHERE {
171+
?s :p ?a .
172+
?x :q ?b .
173+
BIND(?a AS ?aCopy)
174+
FILTER (?aCopy = ?b)
175+
}
176+
""",
177+
filter(new Extension(join(pattern("s", "p", "a"), pattern("x", "q", "b")),
178+
new ExtensionElem(plainVar("a"), "aCopy")), eq("aCopy", "b"))),
179+
new QueryCase(
180+
"VALUES UNION OPTIONAL outer FILTER",
181+
"""
182+
SELECT (COUNT(DISTINCT ?entity) AS ?count) WHERE {
183+
VALUES ?target { 1 2 }
184+
{ ?entity a :Node . ?entity :connectsTo ?targetNode }
185+
UNION
186+
{ ?entity a :Node }
187+
OPTIONAL { ?entity :weight ?w }
188+
FILTER ((?w = ?target) || (?w = 3))
189+
}
190+
""",
191+
filter(new LeftJoin(
192+
join(singleValueBinding("target", 1),
193+
new Union(join(pattern("entity", "typePred", "nodeType"),
194+
pattern("entity", "connectsToPred", "targetNode")),
195+
pattern("entity", "typePred", "nodeType"))),
196+
pattern("entity", "weightPred", "w")),
197+
new Or(eq("w", "target"),
198+
new Compare(plainVar("w"),
199+
new ValueConstant(SimpleValueFactory.getInstance().createLiteral(3)),
200+
Compare.CompareOp.EQ)))));
201+
}
202+
203+
private static GenericPlanNode explain(TupleExpr expr) {
204+
QueryModelTreeToGenericPlanNode converter = new QueryModelTreeToGenericPlanNode(expr);
205+
expr.visit(converter);
206+
return converter.getGenericPlanNode();
207+
}
208+
209+
private static Filter filter(TupleExpr arg, org.eclipse.rdf4j.query.algebra.ValueExpr condition) {
210+
return new Filter(arg, condition);
211+
}
212+
213+
private static Join join(TupleExpr left, TupleExpr right) {
214+
Join join = new Join(left, right);
215+
join.setAlgorithm("JoinIterator");
216+
return join;
217+
}
218+
219+
private static Compare eq(String left, String right) {
220+
return new Compare(plainVar(left), plainVar(right), Compare.CompareOp.EQ);
221+
}
222+
223+
private static StatementPattern pattern(String subject, String predicate, String object) {
224+
return new StatementPattern(plainVar(subject), plainVar(predicate), plainVar(object));
225+
}
226+
227+
private static BindingSetAssignment singleValueBinding(String bindingName, int value) {
228+
MapBindingSet bindingSet = new MapBindingSet(1);
229+
bindingSet.addBinding(bindingName, SimpleValueFactory.getInstance().createLiteral(value));
230+
BindingSetAssignment assignment = new BindingSetAssignment();
231+
assignment.setBindingSets(List.of(bindingSet));
232+
return assignment;
233+
}
234+
235+
@SuppressWarnings("removal")
236+
private static Var plainVar(String name) {
237+
return new Var(name);
238+
}
239+
240+
private static GenericPlanNode findFirstPlan(GenericPlanNode node,
241+
java.util.function.Predicate<GenericPlanNode> predicate) {
242+
if (predicate.test(node)) {
243+
return node;
244+
}
245+
List<GenericPlanNode> plans = node.getPlans();
246+
if (plans == null) {
247+
return null;
248+
}
249+
for (GenericPlanNode child : plans) {
250+
GenericPlanNode match = findFirstPlan(child, predicate);
251+
if (match != null) {
252+
return match;
253+
}
254+
}
255+
return null;
256+
}
257+
258+
private static final class QueryCase {
259+
260+
private final String name;
261+
private final String sparql;
262+
private final TupleExpr expr;
263+
264+
private QueryCase(String name, String sparql, TupleExpr expr) {
265+
this.name = name;
266+
this.sparql = sparql;
267+
this.expr = expr;
268+
}
269+
}
270+
}

core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SketchBasedJoinEstimator.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ enum SketchPlannerPath {
411411
private static final int SKETCH_PAYLOAD_FORMAT_NATIVE = -1;
412412
private static final int SKETCH_PAYLOAD_FRAME_HEADER_BYTES = Integer.BYTES + Integer.BYTES;
413413
private static final int TARGET_SKETCH_PART_FILES = 128;
414-
private static final int DEFAULT_BUCKET_COUNT = 4*1024;
414+
private static final int DEFAULT_BUCKET_COUNT = 4 * 1024;
415415
private static final int DEFAULT_SKETCH_NOMINAL_ENTRIES = 64;
416416
private static final String ESTIMATE_CACHE_SECONDS_PROPERTY = "estimateCacheSeconds";
417417
private static final String ZERO_INTERSECTION_EXACT_DISTINCT_LIMIT_PROPERTY = "zeroIntersectionExactDistinctLimit";
@@ -3158,7 +3158,8 @@ private static final class State {
31583158

31593159
State(int k, int subjectBuckets, int predicateBuckets, int objectBuckets, int contextBuckets,
31603160
boolean contextPairSketchesEnabled) {
3161-
System.out.println("Initializing state: k=" + k + ", subjectBuckets=" + subjectBuckets + ", predicateBuckets="
3161+
System.out.println("Initializing state: k=" + k + ", subjectBuckets=" + subjectBuckets
3162+
+ ", predicateBuckets="
31623163
+ predicateBuckets + ", objectBuckets=" + objectBuckets + ", contextBuckets=" + contextBuckets
31633164
+ ", contextPairSketchesEnabled=" + contextPairSketchesEnabled);
31643165
this.k = k;

0 commit comments

Comments
 (0)