Skip to content

Commit 67b9888

Browse files
committed
very good results with few regressions
1 parent b389762 commit 67b9888

4 files changed

Lines changed: 1360 additions & 1240 deletions

File tree

core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/QueryJoinOptimizer.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ private void optimizeJoinReplacement(TupleExpr replaceTarget, Join join,
387387

388388
if (root != null) {
389389
root = reapplyDeferredFilters(root, orderedJoinPlan.rootDeferredFilters);
390+
normalizeFilterLocalBindingSetAssignmentOrder(root);
390391

391392
if (TupleExprs.isVariableScopeChange(replaceTarget)) {
392393
((AbstractQueryModelNode) root).setVariableScopeChange(true);
@@ -401,6 +402,36 @@ private void optimizeJoinReplacement(TupleExpr replaceTarget, Join join,
401402
}
402403
}
403404

405+
private void normalizeFilterLocalBindingSetAssignmentOrder(TupleExpr root) {
406+
root.visit(new AbstractSimpleQueryModelVisitor<RuntimeException>() {
407+
@Override
408+
public void meet(Filter filter) {
409+
filter.getArg().visit(this);
410+
411+
List<TupleExpr> factors = new ArrayList<>();
412+
collectFilterJoinFactors(filter.getArg(), factors);
413+
if (factors.size() < 2) {
414+
return;
415+
}
416+
417+
List<TupleExpr> positionedFactors = positionBindingSetAssignmentsInSegment(factors);
418+
if (!sameIdentityOrder(factors, positionedFactors)) {
419+
filter.setArg(buildJoinRoot(new ArrayDeque<>(positionedFactors)));
420+
}
421+
}
422+
});
423+
}
424+
425+
private void collectFilterJoinFactors(TupleExpr tupleExpr, List<TupleExpr> factors) {
426+
if (tupleExpr instanceof Join && !isJoinOrderSeparator(tupleExpr)) {
427+
Join join = (Join) tupleExpr;
428+
collectFilterJoinFactors(join.getLeftArg(), factors);
429+
collectFilterJoinFactors(join.getRightArg(), factors);
430+
} else {
431+
factors.add(tupleExpr);
432+
}
433+
}
434+
404435
private void visitDeferredFilterConditions(List<Filter> filters, Set<String> outerBoundVars) {
405436
for (Filter filter : filters) {
406437
Set<String> origBoundVars = boundVars;

core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/LmdbThemeQueryRegressionTest.java

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,41 @@ class LmdbThemeQueryRegressionTest {
9090
"<http://example.com/theme/train/partOfLine> ?line"),
9191
anchor(Theme.TRAIN, 7, "<http://example.com/theme/train/name> ?name",
9292
"<http://example.com/theme/train/passesThrough> ?op"));
93+
private static final String MEDICAL_Q1_FASTEST_RENDERED_QUERY = String.join("\n",
94+
"SELECT (COUNT(DISTINCT ?entity) AS ?count) WHERE {",
95+
" {",
96+
" ?entity a <http://example.com/theme/medical/Condition> .",
97+
" ?entity <http://example.com/theme/medical/code> ?code .",
98+
" VALUES ?target { \"DX-200\" \"DX-201\" }",
99+
" FILTER ((?code = ?target) || (?code = \"DX-202\"))",
100+
" }",
101+
" UNION",
102+
" {",
103+
" ?entity a <http://example.com/theme/medical/Medication> .",
104+
" ?entity <http://example.com/theme/medical/code> ?code .",
105+
" VALUES ?target { \"DX-200\" \"DX-201\" }",
106+
" FILTER ((?code = ?target) || (?code = \"DX-202\"))",
107+
" }",
108+
" OPTIONAL {",
109+
" ?entity <http://example.com/theme/medical/code> ?alt .",
110+
" }",
111+
"}");
112+
private static final String LIBRARY_Q9_FASTEST_RENDERED_QUERY = String.join("\n",
113+
"SELECT (COUNT(DISTINCT ?member) AS ?count) WHERE {",
114+
" ?author <http://example.com/theme/library/name> ?authorName .",
115+
" FILTER ((?authorName = ?target) || (?authorName = \"Author 3\"))",
116+
" ?book <http://example.com/theme/library/writtenBy> ?author .",
117+
" ?book <http://example.com/theme/library/hasCopy> ?copy .",
118+
" ?loan <http://example.com/theme/library/loanedCopy> ?copy .",
119+
" ?loan <http://example.com/theme/library/borrowedBy> ?member .",
120+
" ?member a <http://example.com/theme/library/Member> .",
121+
" ?loan a <http://example.com/theme/library/Loan> .",
122+
" VALUES ?target { \"Author 1\" \"Author 2\" }",
123+
" OPTIONAL {",
124+
" ?book <http://example.com/theme/library/title> ?optTitle .",
125+
" }",
126+
" FILTER ((?optTitle != \"\") && NOT EXISTS { ?loan <http://example.com/theme/library/dueDate> ?due . FILTER (?due < \"2024-01-10\"^^<http://www.w3.org/2001/XMLSchema#date>) })",
127+
"}");
93128

94129
@ParameterizedTest(name = "{0}")
95130
@MethodSource("highValueThemes")
@@ -243,6 +278,68 @@ void electricalGridGeneratorCapacityThresholdUsesFastestKnownShape(@TempDir Path
243278
}
244279
}
245280

281+
@Test
282+
void medicalConditionsOrMedicationsByCodeUsesBranchLocalValuesAndFilter(@TempDir Path dataDir) throws Exception {
283+
Theme theme = Theme.MEDICAL_RECORDS;
284+
Path themeDir = dataDir.resolve(theme.name());
285+
LmdbStore store = new LmdbStore(themeDir.toFile(), ConfigUtil.createConfig());
286+
SailRepository repository = new SailRepository(store);
287+
try {
288+
BenchmarkJoinEstimatorSupport.prepareEstimatorForBulkLoad(repository, store);
289+
loadData(repository, theme);
290+
persistEstimatorAfterBulkLoad(repository, store);
291+
primeLearnedFilterStats(repository, theme, 1);
292+
BenchmarkJoinEstimatorSupport.persistStoreStatistics(store);
293+
} finally {
294+
shutdownAndRelease(repository, store);
295+
}
296+
297+
store = new LmdbStore(themeDir.toFile(), ConfigUtil.createConfig());
298+
repository = new SailRepository(store);
299+
try {
300+
OptimizerSnapshot snapshot = explainOptimized(repository, theme, 1);
301+
assertPlannerDiagnosticsPresent(theme, 1, snapshot.plan());
302+
if (!MEDICAL_Q1_FASTEST_RENDERED_QUERY.equals(snapshot.renderedQuery().trim())) {
303+
throw new AssertionError("Medical q1 should match the fastest branch-local VALUES/filter shape\n"
304+
+ "Expected:\n" + MEDICAL_Q1_FASTEST_RENDERED_QUERY + "\nActual:\n"
305+
+ snapshot.renderedQuery() + "\nPlan:\n" + snapshot.plan());
306+
}
307+
} finally {
308+
shutdownAndRelease(repository, store);
309+
}
310+
}
311+
312+
@Test
313+
void libraryMembersBorrowingBooksByAuthorsUsesFastestKnownShape(@TempDir Path dataDir) throws Exception {
314+
Theme theme = Theme.LIBRARY;
315+
Path themeDir = dataDir.resolve(theme.name());
316+
LmdbStore store = new LmdbStore(themeDir.toFile(), ConfigUtil.createConfig());
317+
SailRepository repository = new SailRepository(store);
318+
try {
319+
BenchmarkJoinEstimatorSupport.prepareEstimatorForBulkLoad(repository, store);
320+
loadData(repository, theme);
321+
persistEstimatorAfterBulkLoad(repository, store);
322+
primeLearnedFilterStats(repository, theme, 9);
323+
BenchmarkJoinEstimatorSupport.persistStoreStatistics(store);
324+
} finally {
325+
shutdownAndRelease(repository, store);
326+
}
327+
328+
store = new LmdbStore(themeDir.toFile(), ConfigUtil.createConfig());
329+
repository = new SailRepository(store);
330+
try {
331+
OptimizerSnapshot snapshot = explainOptimized(repository, theme, 9);
332+
assertPlannerDiagnosticsPresent(theme, 9, snapshot.plan());
333+
if (!LIBRARY_Q9_FASTEST_RENDERED_QUERY.equals(snapshot.renderedQuery().trim())) {
334+
throw new AssertionError("Library q9 should match the fastest author/book/loan shape\n"
335+
+ "Expected:\n" + LIBRARY_Q9_FASTEST_RENDERED_QUERY + "\nActual:\n"
336+
+ snapshot.renderedQuery() + "\nPlan:\n" + snapshot.plan());
337+
}
338+
} finally {
339+
shutdownAndRelease(repository, store);
340+
}
341+
}
342+
246343
private static Stream<Theme> highValueThemes() {
247344
return Stream.of(Theme.PHARMA, Theme.LIBRARY, Theme.MEDICAL_RECORDS, Theme.ENGINEERING,
248345
Theme.ELECTRICAL_GRID, Theme.TRAIN, Theme.SOCIAL_MEDIA);

core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/ThemeQueryBenchmark.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,10 @@
6969
import org.openjdk.jmh.runner.options.TimeValue;
7070

7171
@State(Scope.Benchmark)
72-
@Warmup(iterations = 1, batchSize = 1, timeUnit = TimeUnit.SECONDS, time = 5)
72+
@Warmup(iterations = 1, batchSize = 1, timeUnit = TimeUnit.SECONDS, time = 10)
7373
@BenchmarkMode({ Mode.AverageTime })
7474
@Fork(value = 1, jvmArgs = { "-Xms1G", "-Xmx32G" })
75-
@Measurement(iterations = 1, batchSize = 1, timeUnit = TimeUnit.SECONDS, time = 2)
75+
@Measurement(iterations = 1, batchSize = 1, timeUnit = TimeUnit.SECONDS, time = 5)
7676
@OutputTimeUnit(TimeUnit.MILLISECONDS)
7777
public class ThemeQueryBenchmark {
7878

@@ -100,16 +100,16 @@ public class ThemeQueryBenchmark {
100100
private static final long EXPECTED_VALUES_DATA_SIZE_BYTES = 713687040L;
101101

102102
@Param({
103-
// "0",
104-
// "1",
105-
// "2",
106-
// "3",
107-
// "4",
108-
// "5",
109-
// "6",
110-
// "7",
111-
// "8",
112-
// "9",
103+
"0",
104+
"1",
105+
"2",
106+
"3",
107+
"4",
108+
"5",
109+
"6",
110+
"7",
111+
"8",
112+
"9",
113113
"10",
114114
// "11",
115115
// "12"

0 commit comments

Comments
 (0)