|
34 | 34 | import org.eclipse.rdf4j.query.MalformedQueryException; |
35 | 35 | import org.eclipse.rdf4j.query.QueryLanguage; |
36 | 36 | import org.eclipse.rdf4j.query.UnsupportedQueryLanguageException; |
| 37 | +import org.eclipse.rdf4j.query.algebra.And; |
37 | 38 | import org.eclipse.rdf4j.query.algebra.BinaryTupleOperator; |
38 | 39 | import org.eclipse.rdf4j.query.algebra.BindingSetAssignment; |
39 | 40 | import org.eclipse.rdf4j.query.algebra.Compare; |
|
52 | 53 | import org.eclipse.rdf4j.query.algebra.Var; |
53 | 54 | import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet; |
54 | 55 | import org.eclipse.rdf4j.query.algebra.evaluation.QueryOptimizerTest; |
| 56 | +import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.JoinFactorCostModel; |
55 | 57 | import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.JoinOrderPlanner; |
56 | 58 | import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.QueryJoinOptimizer; |
57 | 59 | import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor; |
@@ -509,6 +511,23 @@ public void optimizeNormalizesPlannerOutputBindingsBeforeFirstUse() { |
509 | 511 | .containsExactly(uValues, vValues, follows, name); |
510 | 512 | } |
511 | 513 |
|
| 514 | + @Test |
| 515 | + public void optimizeNormalizesPlannerOutputFilterWrappedBindingsBeforeFirstUse() { |
| 516 | + BindingSetAssignment uValues = bindingSetAssignment("u", "u1"); |
| 517 | + Filter uRestriction = filter(uValues, "u", "u"); |
| 518 | + BindingSetAssignment vValues = bindingSetAssignment("v", "v1"); |
| 519 | + StatementPattern follows = statementPattern("u", "v", ex("follows")); |
| 520 | + PlannerStatistics statistics = new PlannerStatistics(List.of(vValues, follows, uRestriction), |
| 521 | + List.of(uRestriction, vValues, follows)); |
| 522 | + |
| 523 | + QueryRoot root = new QueryRoot(new Join(new Join(uRestriction, vValues), follows)); |
| 524 | + new QueryJoinOptimizer(statistics, new EmptyTripleSource()).optimize(root, null, null); |
| 525 | + |
| 526 | + assertThat(flattenJoinLeaves(root.getArg())) |
| 527 | + .as("Filter-wrapped BindingSetAssignment factors should be positioned before first use") |
| 528 | + .containsExactly(uRestriction, vValues, follows); |
| 529 | + } |
| 530 | + |
512 | 531 | @Test |
513 | 532 | public void optimizePassesDeferredFilterConstraintsToPlannerForFilteredPrefixChoice() { |
514 | 533 | StatementPattern pValue = statementPattern("result", "p", ex("pValue")); |
@@ -725,6 +744,60 @@ public void optimizeTreatsKnownZeroPassFilterAsSelectiveForGreedyPrefix() { |
725 | 744 | .containsExactlyInAnyOrder(ex("pA"), ex("pB")); |
726 | 745 | } |
727 | 746 |
|
| 747 | + @Test |
| 748 | + public void optimizeLetsSelectiveLocalFilterOutputBeatBroadWorkRows() { |
| 749 | + StatementPattern patientType = statementPattern("patient", "type", rdfType()); |
| 750 | + StatementPattern hasEncounter = statementPattern("patient", "enc", medical("hasEncounter")); |
| 751 | + StatementPattern hasObservation = statementPattern("enc", "obs", medical("hasObservation")); |
| 752 | + Filter valueFilter = filter(statementPattern("obs", "value", medical("value")), "value", "limit"); |
| 753 | + |
| 754 | + QueryRoot root = new QueryRoot(new Join(new Join(patientType, hasEncounter), |
| 755 | + new Join(hasObservation, valueFilter))); |
| 756 | + new QueryJoinOptimizer( |
| 757 | + new LocalFilterWorkRowsStatistics(Map.of( |
| 758 | + pairKey(rdfType(), medical("hasEncounter")), 100.0d, |
| 759 | + pairKey(medical("hasObservation"), medical("value")), 120.0d)), |
| 760 | + new EmptyTripleSource()).optimize(root, null, null); |
| 761 | + |
| 762 | + List<String> leafPredicates = flattenedLeafPredicates(root.getArg()); |
| 763 | + assertThat(leafPredicates) |
| 764 | + .as("A selective local literal filter should seed before a broad type-edge prefix even when its scan work is higher") |
| 765 | + .startsWith(medical("value")); |
| 766 | + assertThat(leafPredicates.indexOf(medical("value"))) |
| 767 | + .isLessThan(leafPredicates.indexOf(medical("hasEncounter"))); |
| 768 | + } |
| 769 | + |
| 770 | + @Test |
| 771 | + public void optimizeGroupsCheapValuesFilterBeforeCycleJoinAndExists() { |
| 772 | + BindingSetAssignment userPairValues = bindingSetAssignment(Map.of( |
| 773 | + "u1", ex("social/user/0"), |
| 774 | + "u2", ex("social/user/1"))); |
| 775 | + BindingSetAssignment user3Values = bindingSetAssignment(Map.of("u3", ex("social/user/2"))); |
| 776 | + StatementPattern u1FollowsU2 = statementPattern("u1", "u2", social("follows")); |
| 777 | + StatementPattern u2FollowsU1 = statementPattern("u2", "u1", social("follows")); |
| 778 | + StatementPattern u1FollowsU3 = statementPattern("u1", "u3", social("follows")); |
| 779 | + StatementPattern u3FollowsU1 = statementPattern("u3", "u1", social("follows")); |
| 780 | + StatementPattern u2FollowsU3 = statementPattern("u2", "u3", social("follows")); |
| 781 | + StatementPattern u3FollowsU2 = statementPattern("u3", "u2", social("follows")); |
| 782 | + TupleExpr cycle = new Join(new Join(new Join(new Join(new Join(new Join(new Join(userPairValues, user3Values), |
| 783 | + u1FollowsU2), u1FollowsU3), u2FollowsU1), u3FollowsU1), u2FollowsU3), u3FollowsU2); |
| 784 | + Filter combinedFilter = new Filter(cycle, new And( |
| 785 | + new Compare(Var.of("u1"), Var.of("u3"), CompareOp.NE), |
| 786 | + new Exists(statementPattern("u1", "name", social("name"))))); |
| 787 | + |
| 788 | + QueryRoot root = new QueryRoot(combinedFilter); |
| 789 | + new QueryJoinOptimizer(new EvaluationStatistics(), new EmptyTripleSource()).optimize(root, null, null); |
| 790 | + |
| 791 | + Filter relocatedValuesFilter = compareFilter(root, "u1", "u3"); |
| 792 | + assertThat(countStatementPatterns(relocatedValuesFilter.getArg())) |
| 793 | + .as("A cheap inequality over VALUES-bound variables should be applied before any cycle join") |
| 794 | + .isZero(); |
| 795 | + Filter existsFilter = existsFilter(root); |
| 796 | + assertThat(countStatementPatterns(relocatedValuesFilter.getArg())) |
| 797 | + .as("The cheap VALUES inequality should be scheduled before the expensive EXISTS filter") |
| 798 | + .isLessThanOrEqualTo(countStatementPatterns(existsFilter.getArg())); |
| 799 | + } |
| 800 | + |
728 | 801 | @Test |
729 | 802 | public void optimizeDiscountsNotExistsUnlockAgainstConnectedInitialPair() { |
730 | 803 | String query = String.join("\n", |
@@ -864,6 +937,26 @@ public void meet(StatementPattern statementPattern) { |
864 | 937 | return predicates.stream().limit(limit).collect(Collectors.toList()); |
865 | 938 | } |
866 | 939 |
|
| 940 | + private static List<String> flattenedLeafPredicates(TupleExpr tupleExpr) { |
| 941 | + return flattenJoinLeavesKeepingScopeBarriers(unwrapQueryRoot(tupleExpr)).stream() |
| 942 | + .map(QueryJoinOptimizerTest::firstStatementPatternPredicate) |
| 943 | + .filter(predicate -> predicate != null) |
| 944 | + .collect(Collectors.toList()); |
| 945 | + } |
| 946 | + |
| 947 | + private static String firstStatementPatternPredicate(TupleExpr tupleExpr) { |
| 948 | + ArrayList<String> predicates = new ArrayList<>(); |
| 949 | + unwrapQueryRoot(tupleExpr).visit(new AbstractQueryModelVisitor<RuntimeException>() { |
| 950 | + @Override |
| 951 | + public void meet(StatementPattern statementPattern) { |
| 952 | + if (predicates.isEmpty()) { |
| 953 | + predicates.add(statementPattern.getPredicateVar().getValue().stringValue()); |
| 954 | + } |
| 955 | + } |
| 956 | + }); |
| 957 | + return predicates.isEmpty() ? null : predicates.get(0); |
| 958 | + } |
| 959 | + |
867 | 960 | private static TupleExpr unwrapQueryRoot(TupleExpr tupleExpr) { |
868 | 961 | while (tupleExpr instanceof QueryRoot || tupleExpr instanceof Projection) { |
869 | 962 | if (tupleExpr instanceof QueryRoot) { |
@@ -955,6 +1048,74 @@ private static BindingSetAssignment bindingSetAssignment(String varName, String |
955 | 1048 | return assignment; |
956 | 1049 | } |
957 | 1050 |
|
| 1051 | + private static BindingSetAssignment bindingSetAssignment(Map<String, String> iriBindings) { |
| 1052 | + BindingSetAssignment assignment = new BindingSetAssignment(); |
| 1053 | + QueryBindingSet bindingSet = new QueryBindingSet(); |
| 1054 | + iriBindings.forEach((name, iri) -> bindingSet.addBinding(name, VF.createIRI(iri))); |
| 1055 | + assignment.setBindingSets(List.<BindingSet>of(bindingSet)); |
| 1056 | + return assignment; |
| 1057 | + } |
| 1058 | + |
| 1059 | + private static String rdfType() { |
| 1060 | + return "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; |
| 1061 | + } |
| 1062 | + |
| 1063 | + private static String medical(String localName) { |
| 1064 | + return "http://example.com/theme/medical/" + localName; |
| 1065 | + } |
| 1066 | + |
| 1067 | + private static String social(String localName) { |
| 1068 | + return "http://example.com/theme/social/" + localName; |
| 1069 | + } |
| 1070 | + |
| 1071 | + private static Filter compareFilter(QueryModelNode root, String leftVarName, String rightVarName) { |
| 1072 | + return filters(root).stream() |
| 1073 | + .filter(filter -> isCompareFilter(filter, leftVarName, rightVarName)) |
| 1074 | + .findFirst() |
| 1075 | + .orElseThrow(() -> new AssertionError("Missing compare filter " + leftVarName + " != " |
| 1076 | + + rightVarName + " in " + root)); |
| 1077 | + } |
| 1078 | + |
| 1079 | + private static Filter existsFilter(QueryModelNode root) { |
| 1080 | + return filters(root).stream() |
| 1081 | + .filter(filter -> filter.getCondition() instanceof Exists) |
| 1082 | + .findFirst() |
| 1083 | + .orElseThrow(() -> new AssertionError("Missing EXISTS filter in " + root)); |
| 1084 | + } |
| 1085 | + |
| 1086 | + private static List<Filter> filters(QueryModelNode root) { |
| 1087 | + ArrayList<Filter> filters = new ArrayList<>(); |
| 1088 | + root.visit(new AbstractQueryModelVisitor<RuntimeException>() { |
| 1089 | + @Override |
| 1090 | + public void meet(Filter filter) throws RuntimeException { |
| 1091 | + filters.add(filter); |
| 1092 | + super.meet(filter); |
| 1093 | + } |
| 1094 | + }); |
| 1095 | + return filters; |
| 1096 | + } |
| 1097 | + |
| 1098 | + private static boolean isCompareFilter(Filter filter, String leftVarName, String rightVarName) { |
| 1099 | + if (!(filter.getCondition() instanceof Compare)) { |
| 1100 | + return false; |
| 1101 | + } |
| 1102 | + Compare compare = (Compare) filter.getCondition(); |
| 1103 | + return compare.getOperator() == CompareOp.NE |
| 1104 | + && compare.getLeftArg().equals(Var.of(leftVarName)) |
| 1105 | + && compare.getRightArg().equals(Var.of(rightVarName)); |
| 1106 | + } |
| 1107 | + |
| 1108 | + private static int countStatementPatterns(TupleExpr tupleExpr) { |
| 1109 | + ArrayList<StatementPattern> patterns = new ArrayList<>(); |
| 1110 | + tupleExpr.visit(new AbstractQueryModelVisitor<RuntimeException>() { |
| 1111 | + @Override |
| 1112 | + public void meet(StatementPattern statementPattern) throws RuntimeException { |
| 1113 | + patterns.add(statementPattern); |
| 1114 | + } |
| 1115 | + }); |
| 1116 | + return patterns.size(); |
| 1117 | + } |
| 1118 | + |
958 | 1119 | private static String getPredicateValue(TupleExpr expr) { |
959 | 1120 | return ((StatementPattern) expr).getPredicateVar().getValue().stringValue(); |
960 | 1121 | } |
@@ -989,6 +1150,10 @@ private static void collectTupleExprPredicates(TupleExpr expr, List<String> pred |
989 | 1150 | collectTupleExprPredicates(join.getRightArg(), predicates); |
990 | 1151 | return; |
991 | 1152 | } |
| 1153 | + if (expr instanceof Filter) { |
| 1154 | + collectTupleExprPredicates(((Filter) expr).getArg(), predicates); |
| 1155 | + return; |
| 1156 | + } |
992 | 1157 | if (expr instanceof StatementPattern) { |
993 | 1158 | predicates.add(getPredicateValue(expr)); |
994 | 1159 | } |
@@ -1200,6 +1365,61 @@ public FilterPassEstimate estimateFilterPass(Filter filter) { |
1200 | 1365 | } |
1201 | 1366 | } |
1202 | 1367 |
|
| 1368 | + private static final class LocalFilterWorkRowsStatistics extends EvaluationStatistics |
| 1369 | + implements JoinFactorCostModel { |
| 1370 | + private final Map<String, Double> joinCosts; |
| 1371 | + |
| 1372 | + private LocalFilterWorkRowsStatistics(Map<String, Double> joinCosts) { |
| 1373 | + this.joinCosts = joinCosts; |
| 1374 | + } |
| 1375 | + |
| 1376 | + @Override |
| 1377 | + public boolean supportsJoinEstimation() { |
| 1378 | + return true; |
| 1379 | + } |
| 1380 | + |
| 1381 | + @Override |
| 1382 | + public boolean supportsFilterSelectivityCosting() { |
| 1383 | + return true; |
| 1384 | + } |
| 1385 | + |
| 1386 | + @Override |
| 1387 | + public double getCardinality(TupleExpr expr) { |
| 1388 | + if (expr instanceof Join) { |
| 1389 | + Join join = (Join) expr; |
| 1390 | + String left = tupleExprKey(join.getLeftArg()); |
| 1391 | + String right = tupleExprKey(join.getRightArg()); |
| 1392 | + if (left != null && right != null) { |
| 1393 | + return joinCosts.getOrDefault(pairKey(left, right), 1000.0d); |
| 1394 | + } |
| 1395 | + return 1000.0d; |
| 1396 | + } |
| 1397 | + if (expr instanceof Filter) { |
| 1398 | + return 10.0d; |
| 1399 | + } |
| 1400 | + if (expr instanceof StatementPattern) { |
| 1401 | + return 1000.0d; |
| 1402 | + } |
| 1403 | + return super.getCardinality(expr); |
| 1404 | + } |
| 1405 | + |
| 1406 | + @Override |
| 1407 | + public Optional<FactorCostEstimate> estimateFactorCost(TupleExpr factor, Set<String> currentlyBoundVars) { |
| 1408 | + if (factor instanceof Filter) { |
| 1409 | + return Optional.of(new FactorCostEstimate(10_000.0d, 10.0d)); |
| 1410 | + } |
| 1411 | + if (factor instanceof StatementPattern) { |
| 1412 | + return Optional.of(new FactorCostEstimate(1_000.0d, 1_000.0d)); |
| 1413 | + } |
| 1414 | + return Optional.empty(); |
| 1415 | + } |
| 1416 | + |
| 1417 | + @Override |
| 1418 | + public FilterPassEstimate estimateFilterPass(Filter filter) { |
| 1419 | + return new FilterPassEstimate(0.01d, FilterPassEstimate.Source.LEARNED_FILTER); |
| 1420 | + } |
| 1421 | + } |
| 1422 | + |
1203 | 1423 | private static final class FilterSelectivityStatistics extends EvaluationStatistics { |
1204 | 1424 | @Override |
1205 | 1425 | public double estimateFilterPassRatio(Filter filter) { |
|
0 commit comments