Skip to content

Commit 34c47f6

Browse files
committed
GH-5686 perf: add lmdb estimator-driven join optimization
1 parent 0e0f111 commit 34c47f6

10 files changed

Lines changed: 2472 additions & 134 deletions

File tree

core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/QueryJoinOptimizer.java

Lines changed: 610 additions & 11 deletions
Large diffs are not rendered by default.

core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/QueryJoinOptimizerTest.java

Lines changed: 1464 additions & 0 deletions
Large diffs are not rendered by default.

core/sail/lmdb/pom.xml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,24 @@
222222
<plugin>
223223
<artifactId>maven-assembly-plugin</artifactId>
224224
</plugin>
225+
<plugin>
226+
<groupId>org.apache.maven.plugins</groupId>
227+
<artifactId>maven-compiler-plugin</artifactId>
228+
<executions>
229+
<execution>
230+
<id>default-testCompile</id>
231+
<configuration>
232+
<annotationProcessorPaths>
233+
<path>
234+
<groupId>org.openjdk.jmh</groupId>
235+
<artifactId>jmh-generator-annprocess</artifactId>
236+
<version>${jmhVersion}</version>
237+
</path>
238+
</annotationProcessorPaths>
239+
</configuration>
240+
</execution>
241+
</executions>
242+
</plugin>
225243
</plugins>
226244
</build>
227245
</project>

core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbEvaluationStatistics.java

Lines changed: 119 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,12 @@
88
*
99
* SPDX-License-Identifier: BSD-3-Clause
1010
*******************************************************************************/
11+
// Some portions generated by Codex
1112
package org.eclipse.rdf4j.sail.lmdb;
1213

1314
import java.io.IOException;
15+
import java.util.Map;
16+
import java.util.concurrent.ConcurrentHashMap;
1417

1518
import org.eclipse.rdf4j.model.IRI;
1619
import org.eclipse.rdf4j.model.Resource;
@@ -28,14 +31,19 @@
2831
class LmdbEvaluationStatistics extends EvaluationStatistics {
2932

3033
private static final Logger log = LoggerFactory.getLogger(LmdbEvaluationStatistics.class);
34+
private static final int SHARED_CACHE_MAX_ENTRIES = 262_144;
35+
private static final Map<SharedCardinalityKey, Double> sharedCardinalityCache = new ConcurrentHashMap<>();
3136

3237
private final ValueStore valueStore;
3338

3439
private final TripleStore tripleStore;
40+
private final int tripleStoreIdentity;
41+
private final Map<CardinalityKey, Double> cardinalityCache = new ConcurrentHashMap<>();
3542

3643
public LmdbEvaluationStatistics(ValueStore valueStore, TripleStore tripleStore) {
3744
this.valueStore = valueStore;
3845
this.tripleStore = tripleStore;
46+
this.tripleStoreIdentity = System.identityHashCode(tripleStore);
3947
}
4048

4149
@Override
@@ -112,6 +120,116 @@ private double cardinality(Resource subj, IRI pred, Value obj, Resource context)
112120
}
113121
}
114122

115-
return tripleStore.cardinality(subjID, predID, objID, contextID);
123+
CardinalityKey key = new CardinalityKey(subjID, predID, objID, contextID);
124+
Double cached = cardinalityCache.get(key);
125+
if (cached != null) {
126+
return cached;
127+
}
128+
129+
long revisionId = valueStore.getRevision().getRevisionId();
130+
SharedCardinalityKey sharedKey = new SharedCardinalityKey(tripleStoreIdentity, revisionId, key);
131+
Double sharedCached = sharedCardinalityCache.get(sharedKey);
132+
if (sharedCached != null) {
133+
cardinalityCache.put(key, sharedCached);
134+
return sharedCached;
135+
}
136+
137+
double cardinality = tripleStore.cardinality(subjID, predID, objID, contextID);
138+
cardinalityCache.put(key, cardinality);
139+
cacheSharedCardinality(sharedKey, cardinality);
140+
return cardinality;
141+
}
142+
143+
private static void cacheSharedCardinality(SharedCardinalityKey key, double cardinality) {
144+
if (sharedCardinalityCache.size() >= SHARED_CACHE_MAX_ENTRIES) {
145+
sharedCardinalityCache.clear();
146+
}
147+
sharedCardinalityCache.put(key, cardinality);
148+
}
149+
150+
private static final class CardinalityKey {
151+
private final long subjID;
152+
private final long predID;
153+
private final long objID;
154+
private final long contextID;
155+
156+
private CardinalityKey(long subjID, long predID, long objID, long contextID) {
157+
this.subjID = subjID;
158+
this.predID = predID;
159+
this.objID = objID;
160+
this.contextID = contextID;
161+
}
162+
163+
@Override
164+
public boolean equals(Object obj) {
165+
if (this == obj) {
166+
return true;
167+
}
168+
if (!(obj instanceof CardinalityKey)) {
169+
return false;
170+
}
171+
CardinalityKey other = (CardinalityKey) obj;
172+
return subjID == other.subjID
173+
&& predID == other.predID
174+
&& objID == other.objID
175+
&& contextID == other.contextID;
176+
}
177+
178+
@Override
179+
public int hashCode() {
180+
int result = Long.hashCode(subjID);
181+
result = 31 * result + Long.hashCode(predID);
182+
result = 31 * result + Long.hashCode(objID);
183+
result = 31 * result + Long.hashCode(contextID);
184+
return result;
185+
}
186+
}
187+
188+
private static final class SharedCardinalityKey {
189+
private final int tripleStoreIdentity;
190+
private final long revisionId;
191+
private final long subjID;
192+
private final long predID;
193+
private final long objID;
194+
private final long contextID;
195+
private final int hashCode;
196+
197+
private SharedCardinalityKey(int tripleStoreIdentity, long revisionId, CardinalityKey cardinalityKey) {
198+
this.tripleStoreIdentity = tripleStoreIdentity;
199+
this.revisionId = revisionId;
200+
this.subjID = cardinalityKey.subjID;
201+
this.predID = cardinalityKey.predID;
202+
this.objID = cardinalityKey.objID;
203+
this.contextID = cardinalityKey.contextID;
204+
int hash = Integer.hashCode(tripleStoreIdentity);
205+
hash = 31 * hash + Long.hashCode(revisionId);
206+
hash = 31 * hash + Long.hashCode(subjID);
207+
hash = 31 * hash + Long.hashCode(predID);
208+
hash = 31 * hash + Long.hashCode(objID);
209+
hash = 31 * hash + Long.hashCode(contextID);
210+
this.hashCode = hash;
211+
}
212+
213+
@Override
214+
public boolean equals(Object obj) {
215+
if (this == obj) {
216+
return true;
217+
}
218+
if (!(obj instanceof SharedCardinalityKey)) {
219+
return false;
220+
}
221+
SharedCardinalityKey other = (SharedCardinalityKey) obj;
222+
return tripleStoreIdentity == other.tripleStoreIdentity
223+
&& revisionId == other.revisionId
224+
&& subjID == other.subjID
225+
&& predID == other.predID
226+
&& objID == other.objID
227+
&& contextID == other.contextID;
228+
}
229+
230+
@Override
231+
public int hashCode() {
232+
return hashCode;
233+
}
116234
}
117235
}

core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
*
99
* SPDX-License-Identifier: BSD-3-Clause
1010
*******************************************************************************/
11+
// Some portions generated by Codex
1112
package org.eclipse.rdf4j.sail.lmdb;
1213

1314
import static org.eclipse.rdf4j.sail.lmdb.LmdbUtil.E;
@@ -52,6 +53,7 @@
5253
import static org.lwjgl.util.lmdb.LMDB.mdb_txn_abort;
5354
import static org.lwjgl.util.lmdb.LMDB.mdb_txn_begin;
5455
import static org.lwjgl.util.lmdb.LMDB.mdb_txn_commit;
56+
import static org.lwjgl.util.lmdb.LMDB.mdb_txn_id;
5557

5658
import java.io.Closeable;
5759
import java.io.File;
@@ -85,6 +87,7 @@
8587
import org.eclipse.rdf4j.sail.lmdb.TxnRecordCache.Record;
8688
import org.eclipse.rdf4j.sail.lmdb.TxnRecordCache.RecordCacheIterator;
8789
import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig;
90+
import org.eclipse.rdf4j.sail.lmdb.estimate.LmdbPageCardinalityEstimator;
8891
import org.eclipse.rdf4j.sail.lmdb.util.GroupMatcher;
8992
import org.eclipse.rdf4j.sail.lmdb.util.IndexKeyWriters;
9093
import org.lwjgl.PointerBuffer;
@@ -152,6 +155,7 @@ class TripleStore implements Closeable {
152155
* The directory that is used to store the index files.
153156
*/
154157
private final File dir;
158+
private final File dataMdbFile;
155159
/**
156160
* Object containing meta-data for the triple store.
157161
*/
@@ -166,9 +170,11 @@ class TripleStore implements Closeable {
166170
private final int contextsDbi;
167171
private int pageSize;
168172
private final boolean autoGrow;
173+
private final boolean usePageEstimator;
169174
private long mapSize;
170175
private long writeTxn;
171176
private final TxnManager txnManager;
177+
private final LmdbPageCardinalityEstimator pageEstimator;
172178

173179
private TxnRecordCache recordCache = null;
174180

@@ -195,8 +201,10 @@ public int compareRegion(ByteBuffer array1, int startIdx1, ByteBuffer array2, in
195201

196202
TripleStore(File dir, LmdbStoreConfig config, ValueStore valueStore) throws IOException, SailException {
197203
this.dir = dir;
204+
this.dataMdbFile = new File(dir, "data.mdb");
198205
boolean forceSync = config.getForceSync();
199206
this.autoGrow = config.getAutoGrow();
207+
this.usePageEstimator = config.getPageCardinalityEstimator();
200208
this.valueStore = valueStore;
201209

202210
// create directory if it not exists
@@ -229,6 +237,7 @@ public int compareRegion(ByteBuffer array1, int startIdx1, ByteBuffer array2, in
229237
});
230238

231239
txnManager = new TxnManager(env, Mode.RESET);
240+
pageEstimator = usePageEstimator ? new LmdbPageCardinalityEstimator(dataMdbFile) : null;
232241

233242
File propFile = new File(this.dir, PROPERTIES_FILE);
234243
String indexSpecStr = config.getTripleIndexes();
@@ -462,6 +471,14 @@ public void close() throws IOException {
462471
endTransaction(false);
463472

464473
List<Throwable> caughtExceptions = new ArrayList<>();
474+
if (pageEstimator != null) {
475+
try {
476+
pageEstimator.close();
477+
} catch (Throwable e) {
478+
logger.warn("Failed to close page estimator", e);
479+
caughtExceptions.add(e);
480+
}
481+
}
465482
for (TripleIndex index : indexes) {
466483
try {
467484
index.close();
@@ -674,6 +691,64 @@ protected void filterUsedIds(Collection<Long> ids) throws IOException {
674691

675692
protected double cardinality(long subj, long pred, long obj, long context) throws IOException {
676693
TripleIndex index = getBestIndex(subj, pred, obj, context);
694+
if (!usePageEstimator) {
695+
return cardinalityUsingSamplingEstimator(index, subj, pred, obj, context);
696+
}
697+
698+
try {
699+
return cardinalityUsingPageEstimator(index, subj, pred, obj, context);
700+
} catch (IOException | RuntimeException e) {
701+
logger.warn("Page-walk cardinality estimator failed for index {}, falling back to sampling",
702+
new String(index.getFieldSeq()), e);
703+
return cardinalityUsingSamplingEstimator(index, subj, pred, obj, context);
704+
}
705+
}
706+
707+
private double cardinalityUsingPageEstimator(TripleIndex index, long subj, long pred, long obj, long context)
708+
throws IOException {
709+
LmdbPageCardinalityEstimator estimator = pageEstimator;
710+
if (estimator == null) {
711+
return cardinalityUsingSamplingEstimator(index, subj, pred, obj, context);
712+
}
713+
int relevantParts = index.getPatternScore(subj, pred, obj, context);
714+
final String explicitDbName = new String(index.getFieldSeq());
715+
final String inferredDbName = explicitDbName + "-inf";
716+
717+
return txnManager.doWith((stack, txn) -> {
718+
long txnId = mdb_txn_id(txn);
719+
if (relevantParts == 0) {
720+
long explicitEntries = estimator.totalEntries(txnId, explicitDbName);
721+
long inferredEntries = estimator.totalEntries(txnId, inferredDbName);
722+
return (double) (explicitEntries + inferredEntries);
723+
}
724+
725+
ByteBuffer minKeyBuffer = ByteBuffer.allocate(MAX_KEY_LENGTH);
726+
index.getMinKey(minKeyBuffer, subj, pred, obj, context);
727+
minKeyBuffer.flip();
728+
byte[] minKey = toArray(minKeyBuffer);
729+
730+
ByteBuffer maxKeyBuffer = ByteBuffer.allocate(MAX_KEY_LENGTH);
731+
index.getMaxKey(maxKeyBuffer, subj, pred, obj, context);
732+
maxKeyBuffer.flip();
733+
byte[] maxKey = toArray(maxKeyBuffer);
734+
735+
GroupMatcher matcher = index.createMatcher(subj, pred, obj, context);
736+
long explicitCount = estimator.estimateEntries(txnId, explicitDbName, minKey, minKey.length, maxKey,
737+
maxKey.length, matcher);
738+
long inferredCount = estimator.estimateEntries(txnId, inferredDbName, minKey, minKey.length, maxKey,
739+
maxKey.length, matcher);
740+
return (double) (explicitCount + inferredCount);
741+
});
742+
}
743+
744+
private static byte[] toArray(ByteBuffer buffer) {
745+
byte[] data = new byte[buffer.remaining()];
746+
buffer.get(data);
747+
return data;
748+
}
749+
750+
private double cardinalityUsingSamplingEstimator(TripleIndex index, long subj, long pred, long obj, long context)
751+
throws IOException {
677752

678753
int relevantParts = index.getPatternScore(subj, pred, obj, context);
679754
if (relevantParts == 0) {

0 commit comments

Comments
 (0)