Skip to content

Commit 774b116

Browse files
committed
lmdb store gets its own optimizer pipeline and custom optimizers
1 parent 2acb7ca commit 774b116

16 files changed

Lines changed: 13417 additions & 1339 deletions

File tree

core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/FilterOptimizer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,8 @@ public FilterOptimizer(EvaluationStatistics statistics) {
102102
this(statistics, mergeAdjacentFilters, true);
103103
}
104104

105-
FilterOptimizer(EvaluationStatistics statistics, boolean mergeAdjacentFilters,
106-
boolean considerJoinPlacementCost) {
105+
public FilterOptimizer(EvaluationStatistics statistics, boolean mergeAdjacentFilters,
106+
boolean considerJoinPlacementCost) {
107107
this.statistics = statistics;
108108
this.mergeAdjacentFilters = mergeAdjacentFilters;
109109
this.considerJoinPlacementCost = considerJoinPlacementCost;
Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2026 Eclipse RDF4J contributors.
3+
*
4+
* All rights reserved. This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Distribution License v1.0
6+
* which accompanies this distribution, and is available at
7+
* http://www.eclipse.org/org/documents/edl-v10.php.
8+
*
9+
* SPDX-License-Identifier: BSD-3-Clause
10+
*******************************************************************************/
11+
// Some portions generated by Codex
12+
package org.eclipse.rdf4j.sail.lmdb;
13+
14+
import java.util.ArrayDeque;
15+
import java.util.ArrayList;
16+
import java.util.Collections;
17+
import java.util.Deque;
18+
import java.util.HashSet;
19+
import java.util.List;
20+
import java.util.Optional;
21+
import java.util.Set;
22+
23+
import org.eclipse.rdf4j.query.algebra.StatementPattern;
24+
import org.eclipse.rdf4j.query.algebra.TupleExpr;
25+
import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.JoinOrderPlanner;
26+
27+
final class LmdbDeferredFilterPlacer {
28+
29+
private final LmdbUnionFilterDistributor.BranchOptimizer factorOptimizer;
30+
private final LmdbUnionFilterDistributor.JoinFactory joinFactory;
31+
private final LmdbUnionFilterDistributor.FilterWrapper filterWrapper;
32+
33+
LmdbDeferredFilterPlacer(LmdbUnionFilterDistributor.BranchOptimizer factorOptimizer,
34+
LmdbUnionFilterDistributor.JoinFactory joinFactory,
35+
LmdbUnionFilterDistributor.FilterWrapper filterWrapper) {
36+
this.factorOptimizer = factorOptimizer;
37+
this.joinFactory = joinFactory;
38+
this.filterWrapper = filterWrapper;
39+
}
40+
41+
TupleExpr buildSegmentRoot(Deque<TupleExpr> orderedArgs, List<DeferredFilter> filters,
42+
Set<String> boundBeforeSegment) {
43+
if (orderedArgs.isEmpty()) {
44+
return null;
45+
}
46+
List<TupleExpr> orderedJoinArgs = new ArrayList<>(orderedArgs);
47+
List<DeferredFilter> pendingFilters = new ArrayList<>(filters);
48+
List<Set<StatementPattern>> remainingPatternsAfterFactor = LmdbJoinPlanSupport
49+
.remainingPatternsAfterFactors(orderedJoinArgs);
50+
List<SegmentFactor> factors = new ArrayList<>(orderedJoinArgs.size());
51+
Set<String> prefixBindingNames = new HashSet<>(boundBeforeSegment);
52+
for (int i = 0; i < orderedJoinArgs.size(); i++) {
53+
TupleExpr optimized = factorOptimizer.optimize(orderedJoinArgs.get(i), prefixBindingNames);
54+
Set<StatementPattern> currentPatterns = LmdbJoinPlanSupport.collectPatternIdentities(optimized);
55+
optimized = applyPrefixBindingDeferredFilters(optimized, pendingFilters, prefixBindingNames,
56+
currentPatterns, remainingPatternsAfterFactor.get(i));
57+
factors.add(new SegmentFactor(optimized, currentPatterns));
58+
prefixBindingNames.addAll(optimized.getBindingNames());
59+
}
60+
List<DeferredFilter> unresolvedFilters = new ArrayList<>();
61+
for (DeferredFilter filter : LmdbJoinPlanSupport.sortDeferredFilters(pendingFilters)) {
62+
if (!groupDeferredFilterOnSmallestWindow(factors, filter, boundBeforeSegment)) {
63+
unresolvedFilters.add(filter);
64+
}
65+
}
66+
67+
Deque<TupleExpr> roots = new ArrayDeque<>(factors.size());
68+
for (SegmentFactor factor : factors) {
69+
roots.addLast(factor.tupleExpr);
70+
}
71+
TupleExpr root = buildJoinRoot(roots);
72+
for (DeferredFilter filter : unresolvedFilters) {
73+
root = filterWrapper.wrap(root, List.of(filter), "root");
74+
}
75+
return root;
76+
}
77+
78+
private TupleExpr applyPrefixBindingDeferredFilters(TupleExpr tupleExpr,
79+
List<DeferredFilter> deferredFilters, Set<String> prefixBindingNames,
80+
Set<StatementPattern> currentPatterns, Set<StatementPattern> remainingPatterns) {
81+
if (deferredFilters.isEmpty()) {
82+
return tupleExpr;
83+
}
84+
Optional<Set<String>> assignmentNames = LmdbJoinPlanSupport.positionableBindingSetAssignmentNames(tupleExpr);
85+
if (assignmentNames.isEmpty()) {
86+
return tupleExpr;
87+
}
88+
Set<String> availableNames = new HashSet<>(prefixBindingNames);
89+
Set<String> assignmentBindingNames = assignmentNames.get();
90+
availableNames.addAll(assignmentBindingNames);
91+
List<DeferredFilter> prefixFilters = new ArrayList<>();
92+
for (int i = 0; i < deferredFilters.size();) {
93+
DeferredFilter deferredFilter = deferredFilters.get(i);
94+
if (prefixBindingNames.containsAll(deferredFilter.requiredVars)
95+
|| !availableNames.containsAll(deferredFilter.requiredVars)
96+
|| deferredFilter.patternLocalBase != null
97+
|| deferredFilter.originPatterns.isEmpty()
98+
|| !currentPatterns.containsAll(deferredFilter.originPatterns)
99+
|| !Collections.disjoint(remainingPatterns, deferredFilter.originPatterns)
100+
|| hasPendingSplitExistsFilter(deferredFilters, deferredFilter, availableNames,
101+
assignmentBindingNames)
102+
|| (!assignmentBindingNames.containsAll(deferredFilter.requiredVars)
103+
&& LmdbJoinPlanSupport.containsExists(deferredFilter.condition))) {
104+
i++;
105+
continue;
106+
}
107+
prefixFilters.add(deferredFilter);
108+
deferredFilters.remove(i);
109+
}
110+
return prefixFilters.isEmpty() ? tupleExpr
111+
: filterWrapper.wrap(tupleExpr, prefixFilters, "bindingPrefix");
112+
}
113+
114+
private boolean hasPendingSplitExistsFilter(List<DeferredFilter> deferredFilters, DeferredFilter candidate,
115+
Set<String> availableNames, Set<String> assignmentBindingNames) {
116+
if (LmdbJoinPlanSupport.containsExists(candidate.condition)) {
117+
return false;
118+
}
119+
for (DeferredFilter deferredFilter : deferredFilters) {
120+
if (deferredFilter == candidate || !LmdbJoinPlanSupport.containsExists(deferredFilter.condition)
121+
|| !availableNames.containsAll(deferredFilter.requiredVars)
122+
|| assignmentBindingNames.containsAll(deferredFilter.requiredVars)
123+
|| assignmentBindingNames.containsAll(candidate.requiredVars)) {
124+
continue;
125+
}
126+
return true;
127+
}
128+
return false;
129+
}
130+
131+
private boolean groupDeferredFilterOnSmallestWindow(List<SegmentFactor> factors, DeferredFilter filter,
132+
Set<String> boundBeforeSegment) {
133+
int[] window = null;
134+
if (filter.conditionCost == JoinOrderPlanner.FILTER_COST_CHEAP) {
135+
window = smallestSinglePatternBindingCoveringWindow(factors, filter.requiredVars, boundBeforeSegment);
136+
if (window != null) {
137+
return groupDeferredFilterOnWindow(factors, filter, window);
138+
}
139+
}
140+
if (!LmdbJoinPlanSupport.containsExists(filter.condition)
141+
&& groupDeferredFilterOnBindingAssignments(factors, filter, boundBeforeSegment)) {
142+
return true;
143+
}
144+
if (filter.conditionCost == JoinOrderPlanner.FILTER_COST_CHEAP) {
145+
window = smallestBindingCoveringWindow(factors, filter.requiredVars, boundBeforeSegment);
146+
}
147+
if (window == null) {
148+
if (filter.originPatterns.isEmpty()) {
149+
return false;
150+
}
151+
window = smallestPatternCoveringWindow(factors, filter.originPatterns);
152+
if (window == null) {
153+
return false;
154+
}
155+
}
156+
return groupDeferredFilterOnWindow(factors, filter, window);
157+
}
158+
159+
private int[] smallestSinglePatternBindingCoveringWindow(List<SegmentFactor> factors, Set<String> requiredVars,
160+
Set<String> boundBeforeSegment) {
161+
if (requiredVars.isEmpty()) {
162+
return null;
163+
}
164+
for (int i = 0; i < factors.size(); i++) {
165+
SegmentFactor factor = factors.get(i);
166+
if (factor.containedPatterns.size() != 1) {
167+
continue;
168+
}
169+
Set<String> availableVars = new HashSet<>(boundBeforeSegment);
170+
availableVars.addAll(factor.bindingNames);
171+
if (availableVars.containsAll(requiredVars)) {
172+
return new int[] { i, i };
173+
}
174+
}
175+
return null;
176+
}
177+
178+
private boolean groupDeferredFilterOnBindingAssignments(List<SegmentFactor> factors,
179+
DeferredFilter deferredFilter, Set<String> boundBeforeSegment) {
180+
Set<String> missingVars = new HashSet<>(deferredFilter.requiredVars);
181+
missingVars.removeAll(boundBeforeSegment);
182+
if (missingVars.isEmpty()) {
183+
return false;
184+
}
185+
186+
List<Integer> selectedIndexes = new ArrayList<>();
187+
for (int i = 0; i < factors.size(); i++) {
188+
SegmentFactor factor = factors.get(i);
189+
if (!LmdbJoinPlanSupport.isBindingOnlyFactor(factor)
190+
|| Collections.disjoint(factor.bindingNames, missingVars)) {
191+
continue;
192+
}
193+
selectedIndexes.add(i);
194+
missingVars.removeAll(factor.bindingNames);
195+
if (missingVars.isEmpty()) {
196+
break;
197+
}
198+
}
199+
if (!missingVars.isEmpty() || selectedIndexes.isEmpty()) {
200+
return false;
201+
}
202+
203+
Deque<TupleExpr> selectedRoots = new ArrayDeque<>(selectedIndexes.size());
204+
Set<StatementPattern> containedPatterns = LmdbJoinPlanSupport.identityPatternSet();
205+
for (Integer selectedIndex : selectedIndexes) {
206+
SegmentFactor factor = factors.get(selectedIndex);
207+
selectedRoots.addLast(factor.tupleExpr);
208+
containedPatterns.addAll(factor.containedPatterns);
209+
}
210+
TupleExpr filteredRoot = filterWrapper.wrap(buildJoinRoot(selectedRoots), List.of(deferredFilter),
211+
"bindingAssignments");
212+
SegmentFactor groupedFactor = new SegmentFactor(filteredRoot, containedPatterns);
213+
int insertionIndex = selectedIndexes.get(0);
214+
for (int i = selectedIndexes.size() - 1; i >= 0; i--) {
215+
factors.remove((int) selectedIndexes.get(i));
216+
}
217+
factors.add(insertionIndex, groupedFactor);
218+
return true;
219+
}
220+
221+
private boolean groupDeferredFilterOnWindow(List<SegmentFactor> factors, DeferredFilter filter, int[] window) {
222+
Deque<TupleExpr> windowRoots = new ArrayDeque<>(window[1] - window[0] + 1);
223+
Set<StatementPattern> containedPatterns = LmdbJoinPlanSupport.identityPatternSet();
224+
for (int i = window[0]; i <= window[1]; i++) {
225+
SegmentFactor factor = factors.get(i);
226+
windowRoots.addLast(factor.tupleExpr);
227+
containedPatterns.addAll(factor.containedPatterns);
228+
}
229+
TupleExpr filteredRoot = filterWrapper.wrap(buildJoinRoot(windowRoots), List.of(filter), "smallestWindow");
230+
for (int i = window[1]; i >= window[0]; i--) {
231+
factors.remove(i);
232+
}
233+
factors.add(window[0], new SegmentFactor(filteredRoot, containedPatterns));
234+
return true;
235+
}
236+
237+
private int[] smallestBindingCoveringWindow(List<SegmentFactor> factors, Set<String> requiredVars,
238+
Set<String> boundBeforeSegment) {
239+
if (factors.isEmpty()) {
240+
return null;
241+
}
242+
int bestStart = -1;
243+
int bestEnd = -1;
244+
int bestCost = Integer.MAX_VALUE;
245+
int bestSize = Integer.MAX_VALUE;
246+
for (int start = 0; start < factors.size(); start++) {
247+
Set<String> availableVars = new HashSet<>(boundBeforeSegment);
248+
int cost = 0;
249+
for (int end = start; end < factors.size(); end++) {
250+
SegmentFactor factor = factors.get(end);
251+
availableVars.addAll(factor.bindingNames);
252+
cost += LmdbJoinPlanSupport.isBindingOnlyFactor(factor) ? 0 : 1;
253+
if (availableVars.containsAll(requiredVars)) {
254+
int size = end - start + 1;
255+
if (cost < bestCost || (cost == bestCost && size < bestSize)) {
256+
bestStart = start;
257+
bestEnd = end;
258+
bestCost = cost;
259+
bestSize = size;
260+
}
261+
break;
262+
}
263+
}
264+
}
265+
return bestStart < 0 ? null : new int[] { bestStart, bestEnd };
266+
}
267+
268+
private int[] smallestPatternCoveringWindow(List<SegmentFactor> factors,
269+
Set<StatementPattern> originPatterns) {
270+
int bestStart = -1;
271+
int bestEnd = -1;
272+
int bestSize = Integer.MAX_VALUE;
273+
for (int start = 0; start < factors.size(); start++) {
274+
Set<StatementPattern> covered = LmdbJoinPlanSupport.identityPatternSet();
275+
for (int end = start; end < factors.size(); end++) {
276+
covered.addAll(factors.get(end).containedPatterns);
277+
if (covered.containsAll(originPatterns)) {
278+
int size = end - start + 1;
279+
if (size < bestSize) {
280+
bestStart = start;
281+
bestEnd = end;
282+
bestSize = size;
283+
}
284+
break;
285+
}
286+
}
287+
}
288+
return bestStart < 0 ? null : new int[] { bestStart, bestEnd };
289+
}
290+
291+
private TupleExpr buildJoinRoot(Deque<TupleExpr> orderedArgs) {
292+
if (orderedArgs.isEmpty()) {
293+
return null;
294+
}
295+
TupleExpr root = orderedArgs.removeLast();
296+
while (!orderedArgs.isEmpty()) {
297+
root = joinFactory.create(orderedArgs.removeLast(), root);
298+
}
299+
return root;
300+
}
301+
}

0 commit comments

Comments
 (0)