Skip to content

Commit 5f67425

Browse files
authored
GH-4878 optimise sub-select (#4879)
* GH-4878 optimise sub-selects * JoinVisitor needs to be possible to extend for developers who extend RDF4J * add benchmark * adjustments based on code review
1 parent 151c313 commit 5f67425

10 files changed

Lines changed: 496 additions & 180 deletions

File tree

core/query/src/main/java/org/eclipse/rdf4j/query/explanation/GenericPlanNode.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,8 @@ static private String toHumanReadableNumber(Double number) {
353353
humanReadbleString = Math.round(number / 100_000) / 10.0 + "M";
354354
} else if (number > 1_000) {
355355
humanReadbleString = Math.round(number / 100) / 10.0 + "K";
356+
} else if (number < 10 && number > 0) {
357+
humanReadbleString = String.format("%.2f", number);
356358
} else if (number >= 0) {
357359
humanReadbleString = Math.round(number) + "";
358360
} else {

core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/QueryJoinOptimizer.java

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
import org.eclipse.rdf4j.query.QueryEvaluationException;
3535
import org.eclipse.rdf4j.query.algebra.AbstractQueryModelNode;
3636
import org.eclipse.rdf4j.query.algebra.BindingSetAssignment;
37-
import org.eclipse.rdf4j.query.algebra.Extension;
3837
import org.eclipse.rdf4j.query.algebra.Join;
3938
import org.eclipse.rdf4j.query.algebra.LeftJoin;
4039
import org.eclipse.rdf4j.query.algebra.StatementPattern;
@@ -100,16 +99,23 @@ public void optimize(TupleExpr tupleExpr, Dataset dataset, BindingSet bindings)
10099
tupleExpr.visit(new JoinVisitor(statistics, trackResultSize, tripleSource));
101100
}
102101

103-
private static class JoinVisitor extends AbstractSimpleQueryModelVisitor<RuntimeException> {
102+
/**
103+
* This can be extended by subclasses to allow for adjustments to the optimization process.
104+
*/
105+
@SuppressWarnings("InnerClassMayBeStatic")
106+
protected class JoinVisitor extends AbstractSimpleQueryModelVisitor<RuntimeException> {
104107

105108
private final EvaluationStatistics statistics;
106109
private final TripleSource tripleSource;
110+
private final boolean trackResultSize;
107111
Set<String> boundVars = new HashSet<>();
112+
private double currentHighestCost = 1;
108113

109114
private JoinVisitor(EvaluationStatistics statistics, boolean trackResultSize, TripleSource tripleSource) {
110115
super(trackResultSize);
111116
this.statistics = statistics;
112117
this.tripleSource = tripleSource;
118+
this.trackResultSize = trackResultSize;
113119
}
114120

115121
@Override
@@ -145,7 +151,6 @@ private void optimizePriorityJoin(Set<String> origBoundVars, TupleExpr join) {
145151

146152
@Override
147153
public void meet(Join node) {
148-
149154
Set<String> origBoundVars = boundVars;
150155
try {
151156
boundVars = new HashSet<>(boundVars);
@@ -155,10 +160,13 @@ public void meet(Join node) {
155160

156161
// get all extensions (BIND clause)
157162
List<TupleExpr> orderedExtensions = getExtensionTupleExprs(joinArgs);
163+
optimizeInNewScope(orderedExtensions);
158164
joinArgs.removeAll(orderedExtensions);
159165

160166
// get all subselects and order them
161-
List<TupleExpr> orderedSubselects = reorderSubselects(getSubSelects(joinArgs));
167+
List<TupleExpr> subSelects = getSubSelects(joinArgs);
168+
optimizeInNewScope(subSelects);
169+
List<TupleExpr> orderedSubselects = reorderSubselects(subSelects);
162170
joinArgs.removeAll(orderedSubselects);
163171

164172
// Reorder the subselects and extensions to a more optimal sequence
@@ -215,6 +223,7 @@ public void meet(Join node) {
215223
// order all other join arguments based on available statistics
216224
while (!joinArgs.isEmpty()) {
217225
TupleExpr tupleExpr = selectNextTupleExpr(joinArgs, cardinalityMap, varsMap, varFreqMap);
226+
this.currentHighestCost = Math.max(currentHighestCost, tupleExpr.getCostEstimate());
218227

219228
joinArgs.remove(tupleExpr);
220229
orderedJoinArgs.addLast(tupleExpr);
@@ -321,6 +330,12 @@ public void meet(Join node) {
321330
}
322331
}
323332

333+
private void optimizeInNewScope(List<TupleExpr> subSelects) {
334+
for (TupleExpr subSelect : subSelects) {
335+
subSelect.visit(new JoinVisitor(statistics, trackResultSize, tripleSource));
336+
}
337+
}
338+
324339
private boolean joinSizeIsTooDifferent(double cardinality, double second) {
325340
if (cardinality > second && cardinality / MERGE_JOIN_CARDINALITY_SIZE_DIFF_MULTIPLIER > second) {
326341
return true;
@@ -393,16 +408,6 @@ protected <M extends Map<Var, Integer>> void fillVarFreqMap(List<Var> varList, M
393408
}
394409
}
395410

396-
protected List<Extension> getExtensions(List<TupleExpr> expressions) {
397-
List<Extension> extensions = new ArrayList<>();
398-
for (TupleExpr expr : expressions) {
399-
if (expr instanceof Extension) {
400-
extensions.add((Extension) expr);
401-
}
402-
}
403-
return extensions;
404-
}
405-
406411
private List<TupleExpr> getExtensionTupleExprs(List<TupleExpr> expressions) {
407412
if (expressions.isEmpty()) {
408413
return List.of();
@@ -424,6 +429,14 @@ private List<TupleExpr> getExtensionTupleExprs(List<TupleExpr> expressions) {
424429
return extensions;
425430
}
426431

432+
/**
433+
* This method returns all direct sub-selects in the given list of expressions.
434+
* <p>
435+
* This method is meant to be possible to override by subclasses.
436+
*
437+
* @param expressions
438+
* @return
439+
*/
427440
protected List<TupleExpr> getSubSelects(List<TupleExpr> expressions) {
428441
if (expressions.isEmpty()) {
429442
return List.of();
@@ -647,6 +660,11 @@ protected double getTupleExprCost(TupleExpr tupleExpr, Map<TupleExpr, Double> ca
647660
cost = cardinalityMap.get(tupleExpr);
648661
}
649662

663+
// Adding 5 to the cost allows us to order tuple expressions based on which variables are already bound even
664+
// if the statistics returns a cardinality of 0. This is useful for cases where the statistics are
665+
// inaccurate, such as when querying the data added in the current transaction.
666+
cost += 5;
667+
650668
List<Var> vars = varsMap.get(tupleExpr);
651669

652670
// Compensate for variables that are bound earlier in the evaluation
@@ -656,8 +674,15 @@ protected double getTupleExprCost(TupleExpr tupleExpr, Map<TupleExpr, Double> ca
656674
int nonConstantVarCount = vars.size() - constantVars;
657675

658676
if (nonConstantVarCount > 0) {
659-
double exp = (double) unboundVars.size() / nonConstantVarCount;
660-
cost = Math.pow(cost, exp);
677+
int boundVarCount = nonConstantVarCount - unboundVars.size();
678+
if (boundVarCount == 0) {
679+
// Cartesian Product!
680+
cost = cost * currentHighestCost;
681+
} else {
682+
double exp = (double) unboundVars.size() / nonConstantVarCount;
683+
cost = Math.pow(cost, exp);
684+
}
685+
661686
}
662687

663688
if (unboundVars.isEmpty()) {
@@ -771,7 +796,7 @@ private void mergeJoinForCrossJoin(Deque<TupleExpr> orderedJoinArgs, Set<Var> su
771796
}
772797
}
773798

774-
private static class StatementPatternVarCollector extends StatementPatternVisitor {
799+
private class StatementPatternVarCollector extends StatementPatternVisitor {
775800

776801
private final TupleExpr tupleExpr;
777802
private List<Var> vars;

core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/QueryCostEstimatesTest.java

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
*******************************************************************************/
1111
package org.eclipse.rdf4j.query.algebra.evaluation.impl;
1212

13+
import static org.assertj.core.api.Assertions.assertThat;
1314
import static org.junit.jupiter.api.Assertions.assertEquals;
1415

1516
import org.eclipse.rdf4j.common.exception.RDF4JException;
@@ -23,8 +24,6 @@
2324
*/
2425
public class QueryCostEstimatesTest {
2526

26-
private final String LINE_SEP = System.lineSeparator();
27-
2827
@Test
2928
public void testBindingSetAssignmentOptimization() throws RDF4JException {
3029
String query = "prefix ex: <ex:>" + "select ?s ?p ?o ?x where {" + " ex:s1 ex:pred ?v. "
@@ -36,31 +35,34 @@ public void testBindingSetAssignmentOptimization() throws RDF4JException {
3635
QueryJoinOptimizer opt = new QueryJoinOptimizer(new EvaluationStatistics(), new EmptyTripleSource());
3736
opt.optimize(q.getTupleExpr(), null, null);
3837

39-
assertEquals("QueryRoot" + LINE_SEP +
40-
" Projection" + LINE_SEP +
41-
" ProjectionElemList" + LINE_SEP +
42-
" ProjectionElem \"s\"" + LINE_SEP +
43-
" ProjectionElem \"p\"" + LINE_SEP +
44-
" ProjectionElem \"o\"" + LINE_SEP +
45-
" ProjectionElem \"x\"" + LINE_SEP +
46-
" Join" + LINE_SEP +
47-
" StatementPattern (costEstimate=1, resultSizeEstimate=1)" + LINE_SEP +
48-
" Var (name=_const_5c6ba46_uri, value=ex:s2, anonymous)" + LINE_SEP +
49-
" Var (name=_const_af00e088_uri, value=ex:pred, anonymous)" + LINE_SEP +
50-
" Var (name=_const_17c09_lit_e2eec718, value=\"bah\", anonymous)" + LINE_SEP +
51-
" Join" + LINE_SEP +
52-
" StatementPattern (costEstimate=10, resultSizeEstimate=10)" + LINE_SEP +
53-
" Var (name=_const_5c6ba45_uri, value=ex:s1, anonymous)" + LINE_SEP +
54-
" Var (name=_const_af00e088_uri, value=ex:pred, anonymous)" + LINE_SEP +
55-
" Var (name=v)" + LINE_SEP +
56-
" LeftJoin (new scope) (costEstimate=1000, resultSizeEstimate=1000)" + LINE_SEP +
57-
" StatementPattern (resultSizeEstimate=1000)" + LINE_SEP +
58-
" Var (name=s)" + LINE_SEP +
59-
" Var (name=p)" + LINE_SEP +
60-
" Var (name=o)" + LINE_SEP +
61-
" BindingSetAssignment ([[x=ex:a], [x=ex:b], [x=ex:c], [x=ex:d], [x=ex:e], [x=ex:f], [x=ex:g]])"
62-
+ LINE_SEP,
63-
q.getTupleExpr().toString());
38+
String actual = q.getTupleExpr().toString();
39+
40+
assertThat(actual).contains(System.lineSeparator());
41+
42+
assertThat(actual).isEqualToNormalizingNewlines("QueryRoot\n" +
43+
" Projection\n" +
44+
" ProjectionElemList\n" +
45+
" ProjectionElem \"s\"\n" +
46+
" ProjectionElem \"p\"\n" +
47+
" ProjectionElem \"o\"\n" +
48+
" ProjectionElem \"x\"\n" +
49+
" Join\n" +
50+
" StatementPattern (costEstimate=6.00, resultSizeEstimate=1.00)\n" +
51+
" Var (name=_const_5c6ba46_uri, value=ex:s2, anonymous)\n" +
52+
" Var (name=_const_af00e088_uri, value=ex:pred, anonymous)\n" +
53+
" Var (name=_const_17c09_lit_e2eec718, value=\"bah\", anonymous)\n" +
54+
" Join\n" +
55+
" StatementPattern (costEstimate=90, resultSizeEstimate=10)\n" +
56+
" Var (name=_const_5c6ba45_uri, value=ex:s1, anonymous)\n" +
57+
" Var (name=_const_af00e088_uri, value=ex:pred, anonymous)\n" +
58+
" Var (name=v)\n" +
59+
" LeftJoin (new scope) (costEstimate=90.5K, resultSizeEstimate=1000)\n" +
60+
" StatementPattern (resultSizeEstimate=1000)\n" +
61+
" Var (name=s)\n" +
62+
" Var (name=p)\n" +
63+
" Var (name=o)\n" +
64+
" BindingSetAssignment ([[x=ex:a], [x=ex:b], [x=ex:c], [x=ex:d], [x=ex:e], [x=ex:f], [x=ex:g]])\n"
65+
);
6466

6567
}
6668

core/queryalgebra/model/src/main/java/org/eclipse/rdf4j/query/algebra/AbstractQueryModelNode.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,16 +170,16 @@ public void setTotalTimeNanosActual(long totalTimeNanosActual) {
170170
/**
171171
* @return Human readable number. Eg. 12.1M for 1212213.4 and UNKNOWN for -1.
172172
*/
173-
static String toHumanReadbleNumber(double number) {
173+
static String toHumanReadableNumber(double number) {
174174
String humanReadbleString;
175175
if (number == Double.POSITIVE_INFINITY) {
176176
humanReadbleString = "∞";
177177
} else if (number > 1_000_000) {
178178
humanReadbleString = Math.round(number / 100_000) / 10.0 + "M";
179179
} else if (number > 1_000) {
180180
humanReadbleString = Math.round(number / 100) / 10.0 + "K";
181-
} else if (number >= 0) {
182-
humanReadbleString = Math.round(number) + "";
181+
} else if (number < 10 && number > 0) {
182+
humanReadbleString = String.format("%.2f", number);
183183
} else {
184184
humanReadbleString = "UNKNOWN";
185185
}

core/queryalgebra/model/src/main/java/org/eclipse/rdf4j/query/algebra/helpers/QueryModelTreePrinter.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ static String toHumanReadableNumber(double number) {
107107
humanReadbleString = Math.round(number / 100_000) / 10.0 + "M";
108108
} else if (number > 1_000) {
109109
humanReadbleString = Math.round(number / 100) / 10.0 + "K";
110+
} else if (number < 10 && number > 0) {
111+
humanReadbleString = String.format("%.2f", number);
110112
} else if (number >= 0) {
111113
humanReadbleString = Math.round(number) + "";
112114
} else {

core/queryalgebra/model/src/test/java/org/eclipse/rdf4j/query/algebra/AbstractQueryModelNodeTest.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,35 +21,35 @@ public void getCardinalityString() {
2121

2222
{
2323
StatementPattern statementPattern = new StatementPattern(new Var("s"), new Var("p"), new Var("o"));
24-
String cardinalityString = statementPattern.toHumanReadbleNumber(statementPattern.getResultSizeEstimate());
24+
String cardinalityString = statementPattern.toHumanReadableNumber(statementPattern.getResultSizeEstimate());
2525
assertEquals("UNKNOWN", cardinalityString);
2626
}
2727

2828
{
2929
StatementPattern statementPattern = new StatementPattern(new Var("s"), new Var("p"), new Var("o"));
3030
statementPattern.setResultSizeEstimate(1234);
31-
String cardinalityString = statementPattern.toHumanReadbleNumber(statementPattern.getResultSizeEstimate());
31+
String cardinalityString = statementPattern.toHumanReadableNumber(statementPattern.getResultSizeEstimate());
3232
assertEquals("1.2K", cardinalityString);
3333
}
3434

3535
{
3636
StatementPattern statementPattern = new StatementPattern(new Var("s"), new Var("p"), new Var("o"));
3737
statementPattern.setResultSizeEstimate(1910000);
38-
String cardinalityString = statementPattern.toHumanReadbleNumber(statementPattern.getResultSizeEstimate());
38+
String cardinalityString = statementPattern.toHumanReadableNumber(statementPattern.getResultSizeEstimate());
3939
assertEquals("1.9M", cardinalityString);
4040
}
4141

4242
{
4343
StatementPattern statementPattern = new StatementPattern(new Var("s"), new Var("p"), new Var("o"));
4444
statementPattern.setResultSizeEstimate(1990000);
45-
String cardinalityString = statementPattern.toHumanReadbleNumber(statementPattern.getResultSizeEstimate());
45+
String cardinalityString = statementPattern.toHumanReadableNumber(statementPattern.getResultSizeEstimate());
4646
assertEquals("2.0M", cardinalityString);
4747
}
4848

4949
{
5050
StatementPattern statementPattern = new StatementPattern(new Var("s"), new Var("p"), new Var("o"));
5151
statementPattern.setResultSizeEstimate(912000);
52-
String cardinalityString = statementPattern.toHumanReadbleNumber(statementPattern.getResultSizeEstimate());
52+
String cardinalityString = statementPattern.toHumanReadableNumber(statementPattern.getResultSizeEstimate());
5353
assertEquals("912.0K", cardinalityString);
5454
}
5555

0 commit comments

Comments
 (0)