Skip to content

Commit 7e53d02

Browse files
committed
GH-5685 initial implementation
1 parent 7896551 commit 7e53d02

12 files changed

Lines changed: 1116 additions & 24 deletions

File tree

core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
*
99
* SPDX-License-Identifier: BSD-3-Clause
1010
*******************************************************************************/
11+
// Some portions generated by Codex
1112
package org.eclipse.rdf4j.sail.lmdb;
1213

1314
import static org.eclipse.rdf4j.sail.lmdb.LmdbUtil.E;
@@ -52,6 +53,7 @@
5253
import static org.lwjgl.util.lmdb.LMDB.mdb_txn_abort;
5354
import static org.lwjgl.util.lmdb.LMDB.mdb_txn_begin;
5455
import static org.lwjgl.util.lmdb.LMDB.mdb_txn_commit;
56+
import static org.lwjgl.util.lmdb.LMDB.mdb_txn_id;
5557

5658
import java.io.Closeable;
5759
import java.io.File;
@@ -85,6 +87,7 @@
8587
import org.eclipse.rdf4j.sail.lmdb.TxnRecordCache.Record;
8688
import org.eclipse.rdf4j.sail.lmdb.TxnRecordCache.RecordCacheIterator;
8789
import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig;
90+
import org.eclipse.rdf4j.sail.lmdb.estimate.LmdbPageCardinalityEstimator;
8891
import org.eclipse.rdf4j.sail.lmdb.util.GroupMatcher;
8992
import org.eclipse.rdf4j.sail.lmdb.util.IndexKeyWriters;
9093
import org.lwjgl.PointerBuffer;
@@ -152,6 +155,7 @@ class TripleStore implements Closeable {
152155
* The directory that is used to store the index files.
153156
*/
154157
private final File dir;
158+
private final File dataMdbFile;
155159
/**
156160
* Object containing meta-data for the triple store.
157161
*/
@@ -166,9 +170,11 @@ class TripleStore implements Closeable {
166170
private final int contextsDbi;
167171
private int pageSize;
168172
private final boolean autoGrow;
173+
private final boolean usePageEstimator;
169174
private long mapSize;
170175
private long writeTxn;
171176
private final TxnManager txnManager;
177+
private final LmdbPageCardinalityEstimator pageEstimator;
172178

173179
private TxnRecordCache recordCache = null;
174180

@@ -195,8 +201,10 @@ public int compareRegion(ByteBuffer array1, int startIdx1, ByteBuffer array2, in
195201

196202
TripleStore(File dir, LmdbStoreConfig config, ValueStore valueStore) throws IOException, SailException {
197203
this.dir = dir;
204+
this.dataMdbFile = new File(dir, "data.mdb");
198205
boolean forceSync = config.getForceSync();
199206
this.autoGrow = config.getAutoGrow();
207+
this.usePageEstimator = config.getPageCardinalityEstimator();
200208
this.valueStore = valueStore;
201209

202210
// create directory if it not exists
@@ -229,6 +237,7 @@ public int compareRegion(ByteBuffer array1, int startIdx1, ByteBuffer array2, in
229237
});
230238

231239
txnManager = new TxnManager(env, Mode.RESET);
240+
pageEstimator = usePageEstimator ? new LmdbPageCardinalityEstimator(dataMdbFile) : null;
232241

233242
File propFile = new File(this.dir, PROPERTIES_FILE);
234243
String indexSpecStr = config.getTripleIndexes();
@@ -462,6 +471,14 @@ public void close() throws IOException {
462471
endTransaction(false);
463472

464473
List<Throwable> caughtExceptions = new ArrayList<>();
474+
if (pageEstimator != null) {
475+
try {
476+
pageEstimator.close();
477+
} catch (Throwable e) {
478+
logger.warn("Failed to close page estimator", e);
479+
caughtExceptions.add(e);
480+
}
481+
}
465482
for (TripleIndex index : indexes) {
466483
try {
467484
index.close();
@@ -674,6 +691,64 @@ protected void filterUsedIds(Collection<Long> ids) throws IOException {
674691

675692
protected double cardinality(long subj, long pred, long obj, long context) throws IOException {
676693
TripleIndex index = getBestIndex(subj, pred, obj, context);
694+
if (!usePageEstimator) {
695+
return cardinalityUsingSamplingEstimator(index, subj, pred, obj, context);
696+
}
697+
698+
try {
699+
return cardinalityUsingPageEstimator(index, subj, pred, obj, context);
700+
} catch (IOException | RuntimeException e) {
701+
logger.warn("Page-walk cardinality estimator failed for index {}, falling back to sampling",
702+
new String(index.getFieldSeq()), e);
703+
return cardinalityUsingSamplingEstimator(index, subj, pred, obj, context);
704+
}
705+
}
706+
707+
private double cardinalityUsingPageEstimator(TripleIndex index, long subj, long pred, long obj, long context)
708+
throws IOException {
709+
LmdbPageCardinalityEstimator estimator = pageEstimator;
710+
if (estimator == null) {
711+
return cardinalityUsingSamplingEstimator(index, subj, pred, obj, context);
712+
}
713+
int relevantParts = index.getPatternScore(subj, pred, obj, context);
714+
final String explicitDbName = new String(index.getFieldSeq());
715+
final String inferredDbName = explicitDbName + "-inf";
716+
717+
return txnManager.doWith((stack, txn) -> {
718+
long txnId = mdb_txn_id(txn);
719+
if (relevantParts == 0) {
720+
long explicitEntries = estimator.totalEntries(txnId, explicitDbName);
721+
long inferredEntries = estimator.totalEntries(txnId, inferredDbName);
722+
return (double) (explicitEntries + inferredEntries);
723+
}
724+
725+
ByteBuffer minKeyBuffer = ByteBuffer.allocate(MAX_KEY_LENGTH);
726+
index.getMinKey(minKeyBuffer, subj, pred, obj, context);
727+
minKeyBuffer.flip();
728+
byte[] minKey = toArray(minKeyBuffer);
729+
730+
ByteBuffer maxKeyBuffer = ByteBuffer.allocate(MAX_KEY_LENGTH);
731+
index.getMaxKey(maxKeyBuffer, subj, pred, obj, context);
732+
maxKeyBuffer.flip();
733+
byte[] maxKey = toArray(maxKeyBuffer);
734+
735+
GroupMatcher matcher = index.createMatcher(subj, pred, obj, context);
736+
long explicitCount = estimator.estimateEntries(txnId, explicitDbName, minKey, minKey.length, maxKey,
737+
maxKey.length, matcher);
738+
long inferredCount = estimator.estimateEntries(txnId, inferredDbName, minKey, minKey.length, maxKey,
739+
maxKey.length, matcher);
740+
return (double) (explicitCount + inferredCount);
741+
});
742+
}
743+
744+
private static byte[] toArray(ByteBuffer buffer) {
745+
byte[] data = new byte[buffer.remaining()];
746+
buffer.get(data);
747+
return data;
748+
}
749+
750+
private double cardinalityUsingSamplingEstimator(TripleIndex index, long subj, long pred, long obj, long context)
751+
throws IOException {
677752

678753
int relevantParts = index.getPatternScore(subj, pred, obj, context);
679754
if (relevantParts == 0) {

0 commit comments

Comments
 (0)