Skip to content

Commit 39b97e0

Browse files
committed
wip
1 parent 8201da8 commit 39b97e0

18 files changed

Lines changed: 7656 additions & 238 deletions

File tree

core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/QueryJoinOptimizer.java

Lines changed: 440 additions & 9 deletions
Large diffs are not rendered by default.

core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/FilterOptimizerTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -270,8 +270,8 @@ public void standardPipelinePreJoinFilterPassStillPushesFilterBelowJoinWhenOnlyF
270270

271271
@Test
272272
public void standardPipelineFinalFilterPassAnnotatesSelectivityFromCardinalityStatsUsedForPlacement() {
273-
String query = "SELECT * WHERE {?branch <urn:name> ?branchName . ?copy <urn:locatedAt> ?branch . "
274-
+ "FILTER(?branchName = \"Branch 0\" && ?branchName != \"Branch 1\") }";
273+
String query = "SELECT * WHERE {?branch <urn:rank> ?rank . ?copy <urn:locatedAt> ?branch . "
274+
+ "FILTER(?rank = 1 && ?rank != 2) }";
275275

276276
QueryRoot root = new QueryRoot(QueryParserUtil.parseQuery(QueryLanguage.SPARQL, query, null).getTupleExpr());
277277
StandardQueryOptimizerPipeline pipeline = new StandardQueryOptimizerPipeline(
@@ -290,7 +290,7 @@ public void standardPipelineFinalFilterPassAnnotatesSelectivityFromCardinalitySt
290290
.satisfies(filter -> {
291291
assertThat(filter.getArg()).isInstanceOf(StatementPattern.class);
292292
assertThat(filter.getDoubleMetricPlanned(TelemetryMetricNames.PLANNED_FILTER_PASS_RATIO))
293-
.isEqualTo(0.2d);
293+
.isEqualTo(1.0d);
294294
assertThat(filter.getStringMetricPlanned(TelemetryMetricNames.FILTER_SELECTIVITY_SOURCE))
295295
.isEqualTo("cardinality");
296296
});

core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/QueryJoinOptimizerTest.java

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,9 +491,40 @@ public void optimizeInvokesPlannerSpiWhenStatisticsProvidesPlan() {
491491
assertThat(statistics.planCalls)
492492
.as("QueryJoinOptimizer should delegate segment ordering to JoinOrderPlanner when available")
493493
.isEqualTo(1);
494+
assertThat(statistics.joinCardinalityCalls)
495+
.as("Planner-provided orders should bypass clone-based greedy join-cardinality scoring")
496+
.isZero();
494497
assertThat(predicates(flattenJoinLeaves(root.getArg()))).containsExactly(ex("pC"), ex("pB"), ex("pA"));
495498
}
496499

500+
@Test
501+
public void optimizeDoesNotUseCloneBasedGreedyWhenPlannerProvidesDenseCyclePlan() {
502+
BindingSetAssignment userPairValues = bindingSetAssignment(Map.of(
503+
"u1", ex("social/user/0"),
504+
"u2", ex("social/user/1")));
505+
BindingSetAssignment user3Values = bindingSetAssignment(Map.of("u3", ex("social/user/2")));
506+
StatementPattern u1FollowsU2 = statementPattern("u1", "u2", social("follows"));
507+
StatementPattern u1FollowsU3 = statementPattern("u1", "u3", social("follows"));
508+
StatementPattern u2FollowsU1 = statementPattern("u2", "u1", social("follows"));
509+
StatementPattern u3FollowsU1 = statementPattern("u3", "u1", social("follows"));
510+
StatementPattern u2FollowsU3 = statementPattern("u2", "u3", social("follows"));
511+
StatementPattern u3FollowsU2 = statementPattern("u3", "u2", social("follows"));
512+
List<TupleExpr> plannedOrder = List.of(userPairValues, user3Values, u1FollowsU2, u1FollowsU3,
513+
u2FollowsU1, u3FollowsU1, u2FollowsU3, u3FollowsU2);
514+
TupleExpr rootArg = new Join(new Join(new Join(new Join(new Join(new Join(new Join(userPairValues,
515+
user3Values), u1FollowsU2), u1FollowsU3), u2FollowsU1), u3FollowsU1), u2FollowsU3), u3FollowsU2);
516+
PlannerStatistics statistics = new PlannerStatistics(plannedOrder, plannedOrder);
517+
518+
new QueryJoinOptimizer(statistics, new EmptyTripleSource()).optimize(new QueryRoot(rootArg), null, null);
519+
520+
assertThat(statistics.planCalls)
521+
.as("Dense cyclic planner-supported segments should be delegated once")
522+
.isEqualTo(1);
523+
assertThat(statistics.joinCardinalityCalls)
524+
.as("Dense cyclic planner-supported segments must not enter greedy clone-based scoring")
525+
.isZero();
526+
}
527+
497528
@Test
498529
public void optimizeNormalizesPlannerOutputBindingsBeforeFirstUse() {
499530
BindingSetAssignment uValues = bindingSetAssignment("u", "u1");
@@ -767,6 +798,74 @@ public void optimizeLetsSelectiveLocalFilterOutputBeatBroadWorkRows() {
767798
.isLessThan(leafPredicates.indexOf(medical("hasEncounter")));
768799
}
769800

801+
@Test
802+
public void optimizeAddsValuesAnchorForMustBoundStringInFilter() {
803+
QueryRoot root = optimizeWithStatistics(String.join("\n",
804+
"PREFIX ex: <http://example.com/>",
805+
"SELECT * WHERE {",
806+
" ?s ex:name ?name .",
807+
" ?s ex:type ?type .",
808+
" FILTER (?name IN (\"Alice\", \"Bob\", \"Alice\"))",
809+
"}"), new EvaluationStatistics());
810+
811+
List<BindingSetAssignment> assignments = bindingSetAssignments(root, "name");
812+
assertThat(assignments)
813+
.as("A must-bound string IN filter can be represented as a same-term VALUES semijoin")
814+
.hasSize(1);
815+
assertThat(assignments.get(0).getBindingSets())
816+
.as("VALUES rows should be deduplicated by RDF-term identity")
817+
.hasSize(2);
818+
}
819+
820+
@Test
821+
public void optimizeDoesNotAddValuesAnchorForBindProducedInFilterVariable() {
822+
QueryRoot root = optimizeWithStatistics(String.join("\n",
823+
"PREFIX ex: <http://example.com/>",
824+
"SELECT * WHERE {",
825+
" ?s ex:p ?o .",
826+
" BIND((1 / 0) AS ?name)",
827+
" ?s ex:q ?q .",
828+
" FILTER (?name IN (\"Alice\", \"Bob\"))",
829+
"}"), new EvaluationStatistics());
830+
831+
assertThat(bindingSetAssignments(root, "name"))
832+
.as("BIND is not a must-bind proof: expression errors leave ?name unbound")
833+
.isEmpty();
834+
}
835+
836+
@Test
837+
public void optimizeDoesNotAddValuesAnchorForUnknownTypeValueEqualityInFilter() {
838+
assertThat(bindingSetAssignments(optimizeWithStatistics(String.join("\n",
839+
"PREFIX ex: <http://example.com/>",
840+
"SELECT * WHERE {",
841+
" ?s ex:value ?value .",
842+
" ?s ex:type ?type .",
843+
" FILTER (?value IN (1, 2.0))",
844+
"}"), new EvaluationStatistics()), "value"))
845+
.as("Numeric IN equality can match non-identical RDF terms, so same-term VALUES is unsafe")
846+
.isEmpty();
847+
848+
assertThat(bindingSetAssignments(optimizeWithStatistics(String.join("\n",
849+
"PREFIX ex: <http://example.com/>",
850+
"SELECT * WHERE {",
851+
" ?s ex:value ?value .",
852+
" ?s ex:type ?type .",
853+
" FILTER (?value IN (\"true\"^^<http://www.w3.org/2001/XMLSchema#boolean>))",
854+
"}"), new EvaluationStatistics()), "value"))
855+
.as("Boolean IN equality is value equality, not RDF-term identity")
856+
.isEmpty();
857+
858+
assertThat(bindingSetAssignments(optimizeWithStatistics(String.join("\n",
859+
"PREFIX ex: <http://example.com/>",
860+
"SELECT * WHERE {",
861+
" ?s ex:value ?value .",
862+
" ?s ex:type ?type .",
863+
" FILTER (?value IN (\"2020-01-01T00:00:00Z\"^^<http://www.w3.org/2001/XMLSchema#dateTime>))",
864+
"}"), new EvaluationStatistics()), "value"))
865+
.as("dateTime IN equality is value equality, not RDF-term identity")
866+
.isEmpty();
867+
}
868+
770869
@Test
771870
public void optimizeGroupsCheapValuesFilterBeforeCycleJoinAndExists() {
772871
BindingSetAssignment userPairValues = bindingSetAssignment(Map.of(
@@ -1002,6 +1101,20 @@ public void meet(Filter filter) throws RuntimeException {
10021101
return filters.get(0);
10031102
}
10041103

1104+
private static List<BindingSetAssignment> bindingSetAssignments(QueryModelNode root, String bindingName) {
1105+
List<BindingSetAssignment> assignments = new ArrayList<>();
1106+
root.visit(new AbstractQueryModelVisitor<RuntimeException>() {
1107+
@Override
1108+
public void meet(BindingSetAssignment assignment) throws RuntimeException {
1109+
if (assignment.getBindingNames().contains(bindingName)) {
1110+
assignments.add(assignment);
1111+
}
1112+
super.meet(assignment);
1113+
}
1114+
});
1115+
return assignments;
1116+
}
1117+
10051118
private Object buildJoinVisitor(QueryJoinOptimizer optimizer) throws Exception {
10061119
Class<?> joinVisitorClass = Class
10071120
.forName("org.eclipse.rdf4j.query.algebra.evaluation.optimizer.QueryJoinOptimizer$JoinVisitor");
@@ -1289,6 +1402,7 @@ private static final class PlannerStatistics extends EvaluationStatistics implem
12891402
private final List<TupleExpr> expectedArgs;
12901403
private List<JoinOrderPlanner.FilterConstraint> filterConstraints = List.of();
12911404
private int planCalls;
1405+
private int joinCardinalityCalls;
12921406

12931407
private PlannerStatistics(List<TupleExpr> orderedArgs) {
12941408
this(orderedArgs, List.of(orderedArgs.get(2), orderedArgs.get(1), orderedArgs.get(0)));
@@ -1306,6 +1420,9 @@ public boolean supportsJoinEstimation() {
13061420

13071421
@Override
13081422
public double getCardinality(TupleExpr expr) {
1423+
if (expr instanceof Join) {
1424+
joinCardinalityCalls++;
1425+
}
13091426
return 10.0d;
13101427
}
13111428

core/queryrender/src/main/java/org/eclipse/rdf4j/queryrender/sparql/ir/util/transform/UnwrapSingleBgpInUnionBranchesTransform.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,36 @@ private static IrUnion unwrapUnionBranches(IrUnion u) {
8181
}
8282
cur = flattened;
8383
}
84+
cur = flattenNonScopedChildBgps(cur);
8485
// Reapply the accumulated scope to the flattened branch BGP
8586
cur.setNewScope(branchScope);
8687
u2.addBranch(cur);
8788
}
8889
return u2;
8990
}
91+
92+
private static IrBGP flattenNonScopedChildBgps(IrBGP bgp) {
93+
List<IrNode> flattened = new ArrayList<>();
94+
boolean changed = false;
95+
for (IrNode line : bgp.getLines()) {
96+
if (line instanceof IrBGP && !((IrBGP) line).isNewScope()
97+
&& !containsScopedContainer((IrBGP) line)) {
98+
flattened.addAll(flattenNonScopedChildBgps((IrBGP) line).getLines());
99+
changed = true;
100+
} else {
101+
flattened.add(line);
102+
}
103+
}
104+
return changed ? BaseTransform.bgpWithLines(bgp, flattened) : bgp;
105+
}
106+
107+
private static boolean containsScopedContainer(IrBGP bgp) {
108+
for (IrNode line : bgp.getLines()) {
109+
if (line instanceof IrGraph || line instanceof IrOptional || line instanceof IrMinus
110+
|| line instanceof IrService || line instanceof IrSubSelect || line instanceof IrUnion) {
111+
return true;
112+
}
113+
}
114+
return false;
115+
}
90116
}

0 commit comments

Comments
 (0)