Skip to content

Commit e7fe55c

Browse files
committed
best overall
1 parent c8f67e3 commit e7fe55c

22 files changed

Lines changed: 26105 additions & 7556 deletions

File tree

core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/FilterIterator.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
*
99
* SPDX-License-Identifier: BSD-3-Clause
1010
*******************************************************************************/
11+
// Some portions generated by Codex
1112
package org.eclipse.rdf4j.query.algebra.evaluation.iterator;
1213

1314
import java.util.Iterator;
@@ -96,7 +97,7 @@ public FilterIterator(Filter filter, CloseableIteration<BindingSet> iter, QueryV
9697
this.filterNode = filter;
9798
this.evaluationStatistics = evaluationStatistics;
9899
this.runtimeTelemetryEnabled = filter != null && filter.isRuntimeTelemetryEnabled();
99-
this.recordFilterOutcomes = filter != null && evaluationStatistics != null;
100+
this.recordFilterOutcomes = shouldRecordFilterOutcomes(filter, evaluationStatistics);
100101
this.condition = condition;
101102
this.strategy = strategy;
102103
if (!isPartOfSubQuery(filter)) {
@@ -118,7 +119,7 @@ private FilterIterator(Filter filterNode, CloseableIteration<BindingSet> iter,
118119
this.filterNode = filterNode;
119120
this.evaluationStatistics = evaluationStatistics;
120121
this.runtimeTelemetryEnabled = filterNode != null && filterNode.isRuntimeTelemetryEnabled();
121-
this.recordFilterOutcomes = filterNode != null && evaluationStatistics != null;
122+
this.recordFilterOutcomes = shouldRecordFilterOutcomes(filterNode, evaluationStatistics);
122123
this.condition = condition;
123124
this.strategy = strategy;
124125
// FIXME Jeen Boekstra scopeBindingNames should include bindings from superquery
@@ -155,6 +156,10 @@ private static Function<BindingSet, BindingSet> buildRetainFunction(Filter filte
155156
};
156157
}
157158

159+
private static boolean shouldRecordFilterOutcomes(Filter filter, EvaluationStatistics evaluationStatistics) {
160+
return filter != null && evaluationStatistics != null && !isPartOfSubQuery(filter);
161+
}
162+
158163
@Override
159164
protected boolean accept(BindingSet bindings) throws QueryEvaluationException {
160165
if (runtimeTelemetryEnabled) {

core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/DefaultEvaluationStrategyTelemetryRegressionTest.java

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,23 @@
1818
import java.util.Map;
1919

2020
import org.eclipse.rdf4j.common.iteration.CloseableIteration;
21+
import org.eclipse.rdf4j.common.iteration.CloseableIteratorIteration;
22+
import org.eclipse.rdf4j.model.impl.BooleanLiteral;
2123
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
2224
import org.eclipse.rdf4j.model.vocabulary.RDF;
2325
import org.eclipse.rdf4j.query.BindingSet;
2426
import org.eclipse.rdf4j.query.Dataset;
2527
import org.eclipse.rdf4j.query.algebra.BindingSetAssignment;
2628
import org.eclipse.rdf4j.query.algebra.Compare;
29+
import org.eclipse.rdf4j.query.algebra.Exists;
2730
import org.eclipse.rdf4j.query.algebra.Filter;
2831
import org.eclipse.rdf4j.query.algebra.MathExpr;
2932
import org.eclipse.rdf4j.query.algebra.StatementPattern;
3033
import org.eclipse.rdf4j.query.algebra.ValueConstant;
3134
import org.eclipse.rdf4j.query.algebra.Var;
3235
import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet;
3336
import org.eclipse.rdf4j.query.algebra.evaluation.QueryValueEvaluationStep;
37+
import org.eclipse.rdf4j.query.algebra.evaluation.iterator.FilterIterator;
3438
import org.eclipse.rdf4j.query.impl.EmptyBindingSet;
3539
import org.junit.jupiter.api.AfterEach;
3640
import org.junit.jupiter.api.Test;
@@ -136,6 +140,63 @@ void filterEvaluationRecordsOutcomesWhenRuntimeTelemetryDisabled() {
136140
assertThat(statistics.filteredCount).isEqualTo(1L);
137141
}
138142

143+
@Test
144+
void subqueryFilterDoesNotRecordCorrelatedOutcomeFeedback() {
145+
BindingSetAssignment assignments = new BindingSetAssignment();
146+
QueryBindingSet keep = new QueryBindingSet();
147+
keep.addBinding("name", SimpleValueFactory.getInstance().createLiteral("keep"));
148+
assignments.setBindingSets(List.of(keep));
149+
150+
Filter filter = new Filter(assignments,
151+
new Compare(Var.of("name"),
152+
new ValueConstant(SimpleValueFactory.getInstance().createLiteral("keep")),
153+
Compare.CompareOp.EQ));
154+
Exists exists = new Exists(filter);
155+
156+
RecordingEvaluationStatistics statistics = new RecordingEvaluationStatistics();
157+
DefaultEvaluationStrategy strategy = new DefaultEvaluationStrategy(new EmptyTripleSource(), null, null, 0,
158+
statistics);
159+
160+
assertThat(strategy.precompile(exists, new QueryEvaluationContext.Minimal((Dataset) null))
161+
.evaluate(EmptyBindingSet.getInstance())).isEqualTo(BooleanLiteral.TRUE);
162+
163+
assertThat(statistics.recordCalls).isZero();
164+
assertThat(statistics.passedCount).isZero();
165+
assertThat(statistics.filteredCount).isZero();
166+
}
167+
168+
@Test
169+
void repeatedLocalPatternFilterRecordsTopLevelOutcomeFeedback() {
170+
StatementPattern pattern = new StatementPattern(Var.of("s"),
171+
Var.of("p", SimpleValueFactory.getInstance().createIRI("urn:p")), Var.of("value"));
172+
pattern.setDoubleMetricPlanned("plannedRepeatedInvocations", 1000.0d);
173+
Filter filter = new Filter(pattern,
174+
new Compare(Var.of("value"),
175+
new ValueConstant(SimpleValueFactory.getInstance().createLiteral("keep")),
176+
Compare.CompareOp.EQ));
177+
178+
QueryBindingSet keep = new QueryBindingSet();
179+
keep.addBinding("s", SimpleValueFactory.getInstance().createIRI("urn:s"));
180+
keep.addBinding("value", SimpleValueFactory.getInstance().createLiteral("keep"));
181+
182+
RecordingEvaluationStatistics statistics = new RecordingEvaluationStatistics();
183+
DefaultEvaluationStrategy strategy = new DefaultEvaluationStrategy(new EmptyTripleSource(), null, null, 0,
184+
statistics);
185+
186+
try (FilterIterator iterator = new FilterIterator(filter,
187+
new CloseableIteratorIteration<>(List.of(keep).iterator()),
188+
new QueryValueEvaluationStep.ConstantQueryValueEvaluationStep(BooleanLiteral.TRUE), strategy,
189+
statistics)) {
190+
assertThat(iterator.hasNext()).isTrue();
191+
assertThat(iterator.next().getValue("value").stringValue()).isEqualTo("keep");
192+
assertThat(iterator.hasNext()).isFalse();
193+
}
194+
195+
assertThat(statistics.recordCalls).isEqualTo(1);
196+
assertThat(statistics.passedCount).isEqualTo(1L);
197+
assertThat(statistics.filteredCount).isZero();
198+
}
199+
139200
private static StatementPattern statementPatternWithMetrics(int index) {
140201
StatementPattern statementPattern = new StatementPattern(
141202
Var.of("s", SimpleValueFactory.getInstance().createIRI("urn:test:s" + index)),
@@ -177,11 +238,13 @@ private static int evictionCheckInterval() {
177238
}
178239

179240
private static final class RecordingEvaluationStatistics extends EvaluationStatistics {
241+
private long recordCalls;
180242
private long passedCount;
181243
private long filteredCount;
182244

183245
@Override
184246
public void recordFilterOutcome(Filter filter, long passedCount, long filteredCount) {
247+
recordCalls++;
185248
this.passedCount += passedCount;
186249
this.filteredCount += filteredCount;
187250
}

core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SketchBasedJoinEstimator.java

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,13 @@
6868
import org.eclipse.rdf4j.query.algebra.LeftJoin;
6969
import org.eclipse.rdf4j.query.algebra.ListMemberOperator;
7070
import org.eclipse.rdf4j.query.algebra.Or;
71+
import org.eclipse.rdf4j.query.algebra.QueryModelNode;
7172
import org.eclipse.rdf4j.query.algebra.Reduced;
7273
import org.eclipse.rdf4j.query.algebra.SameTerm;
7374
import org.eclipse.rdf4j.query.algebra.SingletonSet;
7475
import org.eclipse.rdf4j.query.algebra.Slice;
7576
import org.eclipse.rdf4j.query.algebra.StatementPattern;
77+
import org.eclipse.rdf4j.query.algebra.SubQueryValueOperator;
7678
import org.eclipse.rdf4j.query.algebra.TupleExpr;
7779
import org.eclipse.rdf4j.query.algebra.UnaryTupleOperator;
7880
import org.eclipse.rdf4j.query.algebra.ValueConstant;
@@ -3501,20 +3503,18 @@ public double estimateFilterPassRatio(Filter filter) {
35013503
}
35023504

35033505
public EvaluationStatistics.FilterPassEstimate estimateFilterPass(Filter filter) {
3504-
StatementPattern pattern = basePatternForFilter(filter);
3505-
if (filter == null || filter.getCondition() == null) {
3506-
return new EvaluationStatistics.FilterPassEstimate(-1.0d,
3507-
EvaluationStatistics.FilterPassEstimate.Source.UNKNOWN);
3506+
if (filter == null || filter.getCondition() == null || isPartOfSubQuery(filter)) {
3507+
return unknownFilterPassEstimate();
35083508
}
3509+
StatementPattern pattern = basePatternForFilter(filter);
35093510
if (pattern == null) {
35103511
TuplePlanEstimate argEstimate = estimateTupleExprPlan(filter.getArg());
35113512
double heuristicMultiplier = estimateHeuristicFilterMultiplier(argEstimate, filter.getCondition());
35123513
if (heuristicMultiplier > 0.0d) {
35133514
return new EvaluationStatistics.FilterPassEstimate(heuristicMultiplier,
35143515
EvaluationStatistics.FilterPassEstimate.Source.HEURISTIC);
35153516
}
3516-
return new EvaluationStatistics.FilterPassEstimate(-1.0d,
3517-
EvaluationStatistics.FilterPassEstimate.Source.UNKNOWN);
3517+
return unknownFilterPassEstimate();
35183518
}
35193519
JoinStatsProvider statsProvider = learnedStatsProvider;
35203520
PatternKey patternKey = FilterSelectivityKeys.patternKeyFor(pattern);
@@ -3553,10 +3553,22 @@ public EvaluationStatistics.FilterPassEstimate estimateFilterPass(Filter filter)
35533553
return new EvaluationStatistics.FilterPassEstimate(heuristicMultiplier,
35543554
EvaluationStatistics.FilterPassEstimate.Source.HEURISTIC);
35553555
}
3556+
return unknownFilterPassEstimate();
3557+
}
3558+
3559+
private static EvaluationStatistics.FilterPassEstimate unknownFilterPassEstimate() {
35563560
return new EvaluationStatistics.FilterPassEstimate(-1.0d,
35573561
EvaluationStatistics.FilterPassEstimate.Source.UNKNOWN);
35583562
}
35593563

3564+
private static boolean isPartOfSubQuery(QueryModelNode node) {
3565+
if (node instanceof SubQueryValueOperator) {
3566+
return true;
3567+
}
3568+
QueryModelNode parent = node.getParentNode();
3569+
return parent != null && isPartOfSubQuery(parent);
3570+
}
3571+
35603572
private TuplePlanEstimate toPlannerTupleEstimate(TupleExpr tupleExpr, Set<String> initiallyBoundVars) {
35613573
TuplePlanEstimate estimate;
35623574
if (tupleExpr instanceof BindingSetAssignment) {
@@ -4188,16 +4200,23 @@ private SharedVarEstimate estimateSharedVarJoin(double leftRows, double rightRow
41884200
if (distinct == 0.0d) {
41894201
return new SharedVarEstimate(0.0d, 0.0d, intersection);
41904202
}
4191-
double rows = Math.min(disconnectedRows, intersectionResult.rows);
4192-
return new SharedVarEstimate(normalizeRows(rows), distinct, intersection);
4203+
double rows = normalizeRows(Math.min(disconnectedRows, intersectionResult.rows));
4204+
return new SharedVarEstimate(rows, distinct, intersection);
41934205
}
4206+
return new SharedVarEstimate(
4207+
estimateStatsSharedVarJoinRows(leftRows, rightRows, leftDistinct, rightDistinct, disconnectedRows),
4208+
Math.min(leftDistinct, rightDistinct), null);
4209+
}
4210+
4211+
private double estimateStatsSharedVarJoinRows(double leftRows, double rightRows, double leftDistinct,
4212+
double rightDistinct, double disconnectedRows) {
41944213
double rows = leftRows * rightRows;
41954214
if (!Double.isFinite(rows)) {
41964215
rows = Double.MAX_VALUE;
41974216
}
41984217
rows = rows / Math.max(leftDistinct, rightDistinct);
41994218
rows = Math.min(rows, disconnectedRows);
4200-
return new SharedVarEstimate(normalizeRows(rows), Math.min(leftDistinct, rightDistinct), null);
4219+
return normalizeRows(rows);
42014220
}
42024221

42034222
private static SketchIntersectionResult intersectJoinOrderingSketches(ArrayOfDoublesSketch left,

0 commit comments

Comments
 (0)