Skip to content

Commit 8201da8

Browse files
committed
queries are faster, but optimization takes the most time
1 parent 0de0f79 commit 8201da8

13 files changed

Lines changed: 848 additions & 115 deletions

File tree

core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/QueryJoinOptimizer.java

Lines changed: 268 additions & 27 deletions
Large diffs are not rendered by default.

core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/QueryJoinOptimizerTest.java

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.eclipse.rdf4j.query.MalformedQueryException;
3535
import org.eclipse.rdf4j.query.QueryLanguage;
3636
import org.eclipse.rdf4j.query.UnsupportedQueryLanguageException;
37+
import org.eclipse.rdf4j.query.algebra.And;
3738
import org.eclipse.rdf4j.query.algebra.BinaryTupleOperator;
3839
import org.eclipse.rdf4j.query.algebra.BindingSetAssignment;
3940
import org.eclipse.rdf4j.query.algebra.Compare;
@@ -52,6 +53,7 @@
5253
import org.eclipse.rdf4j.query.algebra.Var;
5354
import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet;
5455
import org.eclipse.rdf4j.query.algebra.evaluation.QueryOptimizerTest;
56+
import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.JoinFactorCostModel;
5557
import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.JoinOrderPlanner;
5658
import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.QueryJoinOptimizer;
5759
import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor;
@@ -509,6 +511,23 @@ public void optimizeNormalizesPlannerOutputBindingsBeforeFirstUse() {
509511
.containsExactly(uValues, vValues, follows, name);
510512
}
511513

514+
@Test
515+
public void optimizeNormalizesPlannerOutputFilterWrappedBindingsBeforeFirstUse() {
516+
BindingSetAssignment uValues = bindingSetAssignment("u", "u1");
517+
Filter uRestriction = filter(uValues, "u", "u");
518+
BindingSetAssignment vValues = bindingSetAssignment("v", "v1");
519+
StatementPattern follows = statementPattern("u", "v", ex("follows"));
520+
PlannerStatistics statistics = new PlannerStatistics(List.of(vValues, follows, uRestriction),
521+
List.of(uRestriction, vValues, follows));
522+
523+
QueryRoot root = new QueryRoot(new Join(new Join(uRestriction, vValues), follows));
524+
new QueryJoinOptimizer(statistics, new EmptyTripleSource()).optimize(root, null, null);
525+
526+
assertThat(flattenJoinLeaves(root.getArg()))
527+
.as("Filter-wrapped BindingSetAssignment factors should be positioned before first use")
528+
.containsExactly(uRestriction, vValues, follows);
529+
}
530+
512531
@Test
513532
public void optimizePassesDeferredFilterConstraintsToPlannerForFilteredPrefixChoice() {
514533
StatementPattern pValue = statementPattern("result", "p", ex("pValue"));
@@ -725,6 +744,60 @@ public void optimizeTreatsKnownZeroPassFilterAsSelectiveForGreedyPrefix() {
725744
.containsExactlyInAnyOrder(ex("pA"), ex("pB"));
726745
}
727746

747+
@Test
748+
public void optimizeLetsSelectiveLocalFilterOutputBeatBroadWorkRows() {
749+
StatementPattern patientType = statementPattern("patient", "type", rdfType());
750+
StatementPattern hasEncounter = statementPattern("patient", "enc", medical("hasEncounter"));
751+
StatementPattern hasObservation = statementPattern("enc", "obs", medical("hasObservation"));
752+
Filter valueFilter = filter(statementPattern("obs", "value", medical("value")), "value", "limit");
753+
754+
QueryRoot root = new QueryRoot(new Join(new Join(patientType, hasEncounter),
755+
new Join(hasObservation, valueFilter)));
756+
new QueryJoinOptimizer(
757+
new LocalFilterWorkRowsStatistics(Map.of(
758+
pairKey(rdfType(), medical("hasEncounter")), 100.0d,
759+
pairKey(medical("hasObservation"), medical("value")), 120.0d)),
760+
new EmptyTripleSource()).optimize(root, null, null);
761+
762+
List<String> leafPredicates = flattenedLeafPredicates(root.getArg());
763+
assertThat(leafPredicates)
764+
.as("A selective local literal filter should seed before a broad type-edge prefix even when its scan work is higher")
765+
.startsWith(medical("value"));
766+
assertThat(leafPredicates.indexOf(medical("value")))
767+
.isLessThan(leafPredicates.indexOf(medical("hasEncounter")));
768+
}
769+
770+
@Test
771+
public void optimizeGroupsCheapValuesFilterBeforeCycleJoinAndExists() {
772+
BindingSetAssignment userPairValues = bindingSetAssignment(Map.of(
773+
"u1", ex("social/user/0"),
774+
"u2", ex("social/user/1")));
775+
BindingSetAssignment user3Values = bindingSetAssignment(Map.of("u3", ex("social/user/2")));
776+
StatementPattern u1FollowsU2 = statementPattern("u1", "u2", social("follows"));
777+
StatementPattern u2FollowsU1 = statementPattern("u2", "u1", social("follows"));
778+
StatementPattern u1FollowsU3 = statementPattern("u1", "u3", social("follows"));
779+
StatementPattern u3FollowsU1 = statementPattern("u3", "u1", social("follows"));
780+
StatementPattern u2FollowsU3 = statementPattern("u2", "u3", social("follows"));
781+
StatementPattern u3FollowsU2 = statementPattern("u3", "u2", social("follows"));
782+
TupleExpr cycle = new Join(new Join(new Join(new Join(new Join(new Join(new Join(userPairValues, user3Values),
783+
u1FollowsU2), u1FollowsU3), u2FollowsU1), u3FollowsU1), u2FollowsU3), u3FollowsU2);
784+
Filter combinedFilter = new Filter(cycle, new And(
785+
new Compare(Var.of("u1"), Var.of("u3"), CompareOp.NE),
786+
new Exists(statementPattern("u1", "name", social("name")))));
787+
788+
QueryRoot root = new QueryRoot(combinedFilter);
789+
new QueryJoinOptimizer(new EvaluationStatistics(), new EmptyTripleSource()).optimize(root, null, null);
790+
791+
Filter relocatedValuesFilter = compareFilter(root, "u1", "u3");
792+
assertThat(countStatementPatterns(relocatedValuesFilter.getArg()))
793+
.as("A cheap inequality over VALUES-bound variables should be applied before any cycle join")
794+
.isZero();
795+
Filter existsFilter = existsFilter(root);
796+
assertThat(countStatementPatterns(relocatedValuesFilter.getArg()))
797+
.as("The cheap VALUES inequality should be scheduled before the expensive EXISTS filter")
798+
.isLessThanOrEqualTo(countStatementPatterns(existsFilter.getArg()));
799+
}
800+
728801
@Test
729802
public void optimizeDiscountsNotExistsUnlockAgainstConnectedInitialPair() {
730803
String query = String.join("\n",
@@ -864,6 +937,26 @@ public void meet(StatementPattern statementPattern) {
864937
return predicates.stream().limit(limit).collect(Collectors.toList());
865938
}
866939

940+
private static List<String> flattenedLeafPredicates(TupleExpr tupleExpr) {
941+
return flattenJoinLeavesKeepingScopeBarriers(unwrapQueryRoot(tupleExpr)).stream()
942+
.map(QueryJoinOptimizerTest::firstStatementPatternPredicate)
943+
.filter(predicate -> predicate != null)
944+
.collect(Collectors.toList());
945+
}
946+
947+
private static String firstStatementPatternPredicate(TupleExpr tupleExpr) {
948+
ArrayList<String> predicates = new ArrayList<>();
949+
unwrapQueryRoot(tupleExpr).visit(new AbstractQueryModelVisitor<RuntimeException>() {
950+
@Override
951+
public void meet(StatementPattern statementPattern) {
952+
if (predicates.isEmpty()) {
953+
predicates.add(statementPattern.getPredicateVar().getValue().stringValue());
954+
}
955+
}
956+
});
957+
return predicates.isEmpty() ? null : predicates.get(0);
958+
}
959+
867960
private static TupleExpr unwrapQueryRoot(TupleExpr tupleExpr) {
868961
while (tupleExpr instanceof QueryRoot || tupleExpr instanceof Projection) {
869962
if (tupleExpr instanceof QueryRoot) {
@@ -955,6 +1048,74 @@ private static BindingSetAssignment bindingSetAssignment(String varName, String
9551048
return assignment;
9561049
}
9571050

1051+
private static BindingSetAssignment bindingSetAssignment(Map<String, String> iriBindings) {
1052+
BindingSetAssignment assignment = new BindingSetAssignment();
1053+
QueryBindingSet bindingSet = new QueryBindingSet();
1054+
iriBindings.forEach((name, iri) -> bindingSet.addBinding(name, VF.createIRI(iri)));
1055+
assignment.setBindingSets(List.<BindingSet>of(bindingSet));
1056+
return assignment;
1057+
}
1058+
1059+
private static String rdfType() {
1060+
return "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
1061+
}
1062+
1063+
private static String medical(String localName) {
1064+
return "http://example.com/theme/medical/" + localName;
1065+
}
1066+
1067+
private static String social(String localName) {
1068+
return "http://example.com/theme/social/" + localName;
1069+
}
1070+
1071+
private static Filter compareFilter(QueryModelNode root, String leftVarName, String rightVarName) {
1072+
return filters(root).stream()
1073+
.filter(filter -> isCompareFilter(filter, leftVarName, rightVarName))
1074+
.findFirst()
1075+
.orElseThrow(() -> new AssertionError("Missing compare filter " + leftVarName + " != "
1076+
+ rightVarName + " in " + root));
1077+
}
1078+
1079+
private static Filter existsFilter(QueryModelNode root) {
1080+
return filters(root).stream()
1081+
.filter(filter -> filter.getCondition() instanceof Exists)
1082+
.findFirst()
1083+
.orElseThrow(() -> new AssertionError("Missing EXISTS filter in " + root));
1084+
}
1085+
1086+
private static List<Filter> filters(QueryModelNode root) {
1087+
ArrayList<Filter> filters = new ArrayList<>();
1088+
root.visit(new AbstractQueryModelVisitor<RuntimeException>() {
1089+
@Override
1090+
public void meet(Filter filter) throws RuntimeException {
1091+
filters.add(filter);
1092+
super.meet(filter);
1093+
}
1094+
});
1095+
return filters;
1096+
}
1097+
1098+
private static boolean isCompareFilter(Filter filter, String leftVarName, String rightVarName) {
1099+
if (!(filter.getCondition() instanceof Compare)) {
1100+
return false;
1101+
}
1102+
Compare compare = (Compare) filter.getCondition();
1103+
return compare.getOperator() == CompareOp.NE
1104+
&& compare.getLeftArg().equals(Var.of(leftVarName))
1105+
&& compare.getRightArg().equals(Var.of(rightVarName));
1106+
}
1107+
1108+
private static int countStatementPatterns(TupleExpr tupleExpr) {
1109+
ArrayList<StatementPattern> patterns = new ArrayList<>();
1110+
tupleExpr.visit(new AbstractQueryModelVisitor<RuntimeException>() {
1111+
@Override
1112+
public void meet(StatementPattern statementPattern) throws RuntimeException {
1113+
patterns.add(statementPattern);
1114+
}
1115+
});
1116+
return patterns.size();
1117+
}
1118+
9581119
private static String getPredicateValue(TupleExpr expr) {
9591120
return ((StatementPattern) expr).getPredicateVar().getValue().stringValue();
9601121
}
@@ -989,6 +1150,10 @@ private static void collectTupleExprPredicates(TupleExpr expr, List<String> pred
9891150
collectTupleExprPredicates(join.getRightArg(), predicates);
9901151
return;
9911152
}
1153+
if (expr instanceof Filter) {
1154+
collectTupleExprPredicates(((Filter) expr).getArg(), predicates);
1155+
return;
1156+
}
9921157
if (expr instanceof StatementPattern) {
9931158
predicates.add(getPredicateValue(expr));
9941159
}
@@ -1200,6 +1365,61 @@ public FilterPassEstimate estimateFilterPass(Filter filter) {
12001365
}
12011366
}
12021367

1368+
private static final class LocalFilterWorkRowsStatistics extends EvaluationStatistics
1369+
implements JoinFactorCostModel {
1370+
private final Map<String, Double> joinCosts;
1371+
1372+
private LocalFilterWorkRowsStatistics(Map<String, Double> joinCosts) {
1373+
this.joinCosts = joinCosts;
1374+
}
1375+
1376+
@Override
1377+
public boolean supportsJoinEstimation() {
1378+
return true;
1379+
}
1380+
1381+
@Override
1382+
public boolean supportsFilterSelectivityCosting() {
1383+
return true;
1384+
}
1385+
1386+
@Override
1387+
public double getCardinality(TupleExpr expr) {
1388+
if (expr instanceof Join) {
1389+
Join join = (Join) expr;
1390+
String left = tupleExprKey(join.getLeftArg());
1391+
String right = tupleExprKey(join.getRightArg());
1392+
if (left != null && right != null) {
1393+
return joinCosts.getOrDefault(pairKey(left, right), 1000.0d);
1394+
}
1395+
return 1000.0d;
1396+
}
1397+
if (expr instanceof Filter) {
1398+
return 10.0d;
1399+
}
1400+
if (expr instanceof StatementPattern) {
1401+
return 1000.0d;
1402+
}
1403+
return super.getCardinality(expr);
1404+
}
1405+
1406+
@Override
1407+
public Optional<FactorCostEstimate> estimateFactorCost(TupleExpr factor, Set<String> currentlyBoundVars) {
1408+
if (factor instanceof Filter) {
1409+
return Optional.of(new FactorCostEstimate(10_000.0d, 10.0d));
1410+
}
1411+
if (factor instanceof StatementPattern) {
1412+
return Optional.of(new FactorCostEstimate(1_000.0d, 1_000.0d));
1413+
}
1414+
return Optional.empty();
1415+
}
1416+
1417+
@Override
1418+
public FilterPassEstimate estimateFilterPass(Filter filter) {
1419+
return new FilterPassEstimate(0.01d, FilterPassEstimate.Source.LEARNED_FILTER);
1420+
}
1421+
}
1422+
12031423
private static final class FilterSelectivityStatistics extends EvaluationStatistics {
12041424
@Override
12051425
public double estimateFilterPassRatio(Filter filter) {

core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SketchBasedJoinEstimator.java

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5815,7 +5815,7 @@ private String lookupKey(ValueExpr valueExpr, Set<String> currentlyBoundVars) {
58155815

58165816
private double resolveFilterMultiplier(Filter filter, StatementPattern pattern) {
58175817
double knownMultiplier = estimateKnownFilterMultiplier(filter, pattern);
5818-
return knownMultiplier > 0.0d ? knownMultiplier : 1.0d;
5818+
return knownMultiplier >= 0.0d ? knownMultiplier : 1.0d;
58195819
}
58205820

58215821
private double resolveFilterMultiplier(Filter filter, TuplePlanEstimate estimate) {
@@ -5825,7 +5825,7 @@ private double resolveFilterMultiplier(Filter filter, TuplePlanEstimate estimate
58255825
StatementPattern patternLocalBase = basePatternForFilter(filter);
58265826
if (patternLocalBase != null) {
58275827
double knownMultiplier = estimateKnownFilterMultiplier(filter, patternLocalBase);
5828-
if (knownMultiplier > 0.0d) {
5828+
if (knownMultiplier >= 0.0d) {
58295829
return knownMultiplier;
58305830
}
58315831
}
@@ -7526,7 +7526,7 @@ private void appendSketchPayload(State state, SketchAddress address, UpdateSketc
75267526
private void appendSketchPayload(State state, int entryId, SketchAddress address, UpdateSketch sketch)
75277527
throws IOException {
75287528
TrackedByteArray payload = serializeTrackedSketchPayload(MemoryCategory.SERIALIZATION_BUFFERS,
7529-
transientOwner(MEMORY_OWNER_SERIALIZATION_BUFFER, entryId), sketch);
7529+
transientOwner(MEMORY_OWNER_SERIALIZATION_BUFFER, entryId), sketch, false);
75307530
try {
75317531
appendSketchPayload(state, entryId, address, payload.bytes);
75327532
} finally {
@@ -8046,8 +8046,16 @@ private static int validateManifestHeader(DataInputStream in) throws IOException
80468046

80478047
private TrackedByteArray serializeTrackedSketchPayload(MemoryCategory category, long ownerId, UpdateSketch sketch)
80488048
throws IOException {
8049+
return serializeTrackedSketchPayload(category, ownerId, sketch, true);
8050+
}
8051+
8052+
private TrackedByteArray serializeTrackedSketchPayload(MemoryCategory category, long ownerId, UpdateSketch sketch,
8053+
boolean enforceCapacity)
8054+
throws IOException {
80498055
long predictedBytes = estimateByteArrayBytes(Math.max(128, sketch.getCurrentBytes()));
8050-
ensureEstimatorCapacity(predictedBytes, true);
8056+
if (enforceCapacity) {
8057+
ensureEstimatorCapacity(predictedBytes, true);
8058+
}
80518059
reserveTrackedMemory(category, ownerId, predictedBytes);
80528060
try (ByteArrayOutputStream bos = new ByteArrayOutputStream(128);
80538061
DataOutputStream out = new DataOutputStream(bos)) {

core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SketchJoinOrderPlanner.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ private double newlyUnlockedFilterPassRatio(long previousMask, long nextMask) {
515515
continue;
516516
}
517517
double filterPassRatio = filter.getEstimatedPassRatio();
518-
if (Double.isFinite(filterPassRatio) && filterPassRatio > 0.0d && filterPassRatio <= 1.0d) {
518+
if (Double.isFinite(filterPassRatio) && filterPassRatio >= 0.0d && filterPassRatio <= 1.0d) {
519519
passRatio *= filterPassRatio;
520520
found = true;
521521
}

0 commit comments

Comments
 (0)