Skip to content

Commit 0675c7c

Browse files
committed
good query plans, but large overhead for optimizing the query
1 parent ada255c commit 0675c7c

3 files changed

Lines changed: 504 additions & 24 deletions

File tree

core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SketchJoinOrderPlanner.java

Lines changed: 57 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -202,8 +202,16 @@ private static void addProvidedFilterConditionVars(TupleExpr tupleExpr, Set<Stri
202202
}
203203

204204
private SketchBasedJoinEstimator.SketchPlannerPath classifyGraph() {
205-
int physicalComponents = physicalComponentCount();
206-
if (physicalComponents > 1 && !isConnectedWithDeferredFilters()) {
205+
List<Set<Integer>> physicalComponents = physicalComponents();
206+
int requiredPhysicalComponents = 0;
207+
Set<Integer> requiredFactors = new LinkedHashSet<>();
208+
for (Set<Integer> component : physicalComponents) {
209+
if (!isIgnorableSmallBindingSetAssignmentComponent(component)) {
210+
requiredPhysicalComponents++;
211+
requiredFactors.addAll(component);
212+
}
213+
}
214+
if (requiredPhysicalComponents > 1 && !isConnectedWithDeferredFilters(requiredFactors)) {
207215
return SketchBasedJoinEstimator.SketchPlannerPath.UNSUPPORTED_SHAPE;
208216
}
209217
int edgeCount = 0;
@@ -212,30 +220,32 @@ private SketchBasedJoinEstimator.SketchPlannerPath classifyGraph() {
212220
edgeCount += factor.arity();
213221
variables.addAll(factor.connectivityVars());
214222
}
215-
boolean cyclicOrDense = edgeCount != factors.size() + variables.size() - physicalComponents;
216-
recordDebug("graph classification: physicalComponents=" + physicalComponents + " variables="
223+
boolean cyclicOrDense = edgeCount != factors.size() + variables.size() - physicalComponents.size();
224+
recordDebug("graph classification: physicalComponents=" + physicalComponents.size()
225+
+ " requiredPhysicalComponents=" + requiredPhysicalComponents + " variables="
217226
+ new TreeSet<>(variables) + " edgeCount=" + edgeCount + " cyclicOrDense=" + cyclicOrDense);
218227
return null;
219228
}
220229

221-
private int physicalComponentCount() {
230+
private List<Set<Integer>> physicalComponents() {
222231
Set<Integer> seenFactors = new LinkedHashSet<>();
223-
int components = 0;
232+
List<Set<Integer>> components = new ArrayList<>();
224233
for (int i = 0; i < factors.size(); i++) {
225234
if (seenFactors.contains(i)) {
226235
continue;
227236
}
228-
components++;
229-
visitPhysicalComponent(i, seenFactors);
237+
components.add(physicalComponent(i, seenFactors));
230238
}
231-
return components;
239+
return List.copyOf(components);
232240
}
233241

234-
private void visitPhysicalComponent(int startFactor, Set<Integer> seenFactors) {
242+
private Set<Integer> physicalComponent(int startFactor, Set<Integer> seenFactors) {
243+
Set<Integer> component = new LinkedHashSet<>();
235244
Set<String> seenVariables = new LinkedHashSet<>();
236245
ArrayDeque<Object> queue = new ArrayDeque<>();
237246
queue.add(Integer.valueOf(startFactor));
238247
seenFactors.add(startFactor);
248+
component.add(startFactor);
239249
while (!queue.isEmpty()) {
240250
Object next = queue.removeFirst();
241251
if (next instanceof Integer factorIndex) {
@@ -250,18 +260,36 @@ private void visitPhysicalComponent(int startFactor, Set<Integer> seenFactors) {
250260
for (int i = 0; i < factors.size(); i++) {
251261
if (!seenFactors.contains(i) && factors.get(i).connectivityVars().contains(variable)) {
252262
seenFactors.add(i);
263+
component.add(i);
253264
queue.addLast(Integer.valueOf(i));
254265
}
255266
}
256267
}
268+
return Set.copyOf(component);
257269
}
258270

259-
private boolean isConnectedWithDeferredFilters() {
271+
private boolean isIgnorableSmallBindingSetAssignmentComponent(Set<Integer> component) {
272+
if (component.isEmpty()) {
273+
return false;
274+
}
275+
for (Integer factorIndex : component) {
276+
if (!isSmallBindingSetAssignment(factorIndex.intValue())) {
277+
return false;
278+
}
279+
}
280+
return true;
281+
}
282+
283+
private boolean isConnectedWithDeferredFilters(Set<Integer> requiredFactors) {
284+
if (requiredFactors.isEmpty()) {
285+
return true;
286+
}
260287
Set<Integer> seenFactors = new LinkedHashSet<>();
261288
Set<String> seenVariables = new LinkedHashSet<>();
262289
ArrayDeque<Object> queue = new ArrayDeque<>();
263-
queue.add(Integer.valueOf(0));
264-
seenFactors.add(0);
290+
Integer startFactor = requiredFactors.iterator().next();
291+
queue.add(startFactor);
292+
seenFactors.add(startFactor);
265293
while (!queue.isEmpty()) {
266294
Object next = queue.removeFirst();
267295
if (next instanceof Integer factorIndex) {
@@ -285,7 +313,7 @@ private boolean isConnectedWithDeferredFilters() {
285313
}
286314
}
287315
}
288-
return seenFactors.size() == factors.size();
316+
return seenFactors.containsAll(requiredFactors);
289317
}
290318

291319
private List<String> deferredFilterVariablesConnectedTo(String variable) {
@@ -642,7 +670,8 @@ private String connectionReason(long mask, int candidateIndex) {
642670
if (!sharedVariables.isEmpty()) {
643671
return sharedVariables.size() == 1 ? sharedVariables.iterator().next() : "shared:" + sharedVariables;
644672
}
645-
if (mask != 0L && isSmallBindingSetAssignment(candidateIndex)) {
673+
if (mask != 0L && isSmallBindingSetAssignment(candidateIndex)
674+
&& !hasReachableNonSmallCandidate(mask, candidateIndex)) {
646675
return "small-values-anchor";
647676
}
648677
if (isSmallBindingSetOnlyMask(mask)) {
@@ -655,6 +684,19 @@ private String connectionReason(long mask, int candidateIndex) {
655684
return null;
656685
}
657686

687+
private boolean hasReachableNonSmallCandidate(long mask, int ignoredCandidateIndex) {
688+
for (int i = 0; i < factors.size(); i++) {
689+
if (i == ignoredCandidateIndex || contains(mask, i) || isSmallBindingSetAssignment(i)) {
690+
continue;
691+
}
692+
if (!sharedVariables(mask, i).isEmpty() || deferredFilterConnection(mask, i) != null
693+
|| isSmallBindingSetOnlyMask(mask)) {
694+
return true;
695+
}
696+
}
697+
return false;
698+
}
699+
658700
private boolean isSmallBindingSetOnlyMask(long mask) {
659701
if (mask == 0L) {
660702
return false;

core/sail/base/src/test/java/org/eclipse/rdf4j/sail/base/SketchBasedJoinEstimatorJoinOrderPlannerTest.java

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ void planJoinOrderUsesBindingSetAssignmentSketchIntersectionForFinalRows() {
366366
}
367367

368368
@Test
369-
void planJoinOrderHandlesDisconnectedBindingAssignmentAlongsideConnectedChain() {
369+
void planJoinOrderSupportsDisconnectedSmallBindingAssignmentAlongsideConnectedChain() {
370370
StubSailStore store = new StubSailStore();
371371
IRI rdfType = VF.createIRI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
372372
IRI patientType = VF.createIRI("urn:Patient");
@@ -408,17 +408,25 @@ void planJoinOrderHandlesDisconnectedBindingAssignmentAlongsideConnectedChain()
408408

409409
Optional<JoinOrderPlanner.JoinOrderPlan> greedyPlan = estimator.planJoinOrder(args, Set.of(),
410410
JoinOrderPlanner.Algorithm.GREEDY);
411-
assertTrue(greedyPlan.isEmpty(),
412-
"Disconnected binding assignments should now fall back through Optional.empty()");
413-
assertEquals(SketchBasedJoinEstimator.SketchPlannerPath.UNSUPPORTED_SHAPE, estimator.lastJoinOrderPlannerPath(),
414-
"Disconnected binding assignments should report an unsupported planner shape");
411+
assertTrue(greedyPlan.isPresent(),
412+
"Small disconnected binding assignments should not make the connected graph unsupported");
413+
assertEquals(SketchBasedJoinEstimator.SketchPlannerPath.ROBUST_USED, estimator.lastJoinOrderPlannerPath(),
414+
"Small disconnected binding assignments should stay on the robust planner path");
415+
assertEquals(bindings, greedyPlan.get().getOrderedArgs().get(greedyPlan.get().getOrderedArgs().size() - 1),
416+
"Small disconnected binding assignments should be appended after the connected graph");
415417

416418
Optional<JoinOrderPlanner.JoinOrderPlan> dynamicProgrammingPlan = estimator.planJoinOrder(args, Set.of(),
417419
JoinOrderPlanner.Algorithm.DYNAMIC_PROGRAMMING);
418-
assertTrue(dynamicProgrammingPlan.isEmpty(),
419-
"Disconnected binding assignments should now fall back through Optional.empty()");
420-
assertEquals(SketchBasedJoinEstimator.SketchPlannerPath.UNSUPPORTED_SHAPE, estimator.lastJoinOrderPlannerPath(),
421-
"Disconnected binding assignments should report an unsupported planner shape");
420+
assertTrue(dynamicProgrammingPlan.isPresent(),
421+
"Small disconnected binding assignments should not make the connected graph unsupported");
422+
assertEquals(SketchBasedJoinEstimator.SketchPlannerPath.ROBUST_USED, estimator.lastJoinOrderPlannerPath(),
423+
"Small disconnected binding assignments should stay on the robust planner path");
424+
assertEquals(bindings,
425+
dynamicProgrammingPlan.get()
426+
.getOrderedArgs()
427+
.get(dynamicProgrammingPlan.get().getOrderedArgs().size()
428+
- 1),
429+
"Small disconnected binding assignments should be appended after the connected graph");
422430
}
423431

424432
@Test

0 commit comments

Comments
 (0)