Skip to content

Commit 35ae593

Browse files
committed
GH-5686 feat: add lmdb page-cardinality estimator package and regressions
1 parent 34c47f6 commit 35ae593

16 files changed

Lines changed: 2595 additions & 0 deletions
Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,295 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2026 Eclipse RDF4J contributors.
3+
*
4+
* All rights reserved. This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Distribution License v1.0
6+
* which accompanies this distribution, and is available at
7+
* http://www.eclipse.org/org/documents/edl-v10.php.
8+
*
9+
* SPDX-License-Identifier: BSD-3-Clause
10+
*******************************************************************************/
11+
// Some portions generated by Codex
12+
package org.eclipse.rdf4j.sail.lmdb.estimate;
13+
14+
import java.io.IOException;
15+
import java.nio.ByteBuffer;
16+
import java.util.ArrayList;
17+
import java.util.List;
18+
19+
import org.eclipse.rdf4j.sail.lmdb.util.GroupMatcher;
20+
21+
final class LmdbBtreeRangeCounter {
22+
23+
private final LmdbDataFile dataFile;
24+
private final LmdbMeta meta;
25+
26+
LmdbBtreeRangeCounter(LmdbDataFile dataFile, LmdbMeta meta) {
27+
this.dataFile = dataFile;
28+
this.meta = meta;
29+
}
30+
31+
RangeCountResult countRange(LmdbDb db, byte[] minKey, int minKeyLength, byte[] maxKey, int maxKeyLength,
32+
GroupMatcher matcher) throws IOException {
33+
RangeCountResult result = new RangeCountResult();
34+
if (db.isEmpty()) {
35+
return result;
36+
}
37+
38+
SeekCursor cursor = seek(db, minKey, minKeyLength, result);
39+
if (cursor == null) {
40+
return result;
41+
}
42+
43+
while (true) {
44+
while (cursor.leafIndex < cursor.leafPage.numKeys) {
45+
LmdbNode node = cursor.leafPage.node(cursor.leafIndex);
46+
int cmpMax = LmdbKeyComparator.compare(cursor.leafPage.buffer, node.keyOffset, node.keySize, maxKey,
47+
maxKeyLength);
48+
if (cmpMax > 0) {
49+
return result;
50+
}
51+
if (matcher == null || matches(node, cursor.leafPage.buffer, matcher)) {
52+
result.entries += countNodeEntries(node, cursor.leafPage, result);
53+
}
54+
cursor.leafIndex++;
55+
}
56+
57+
if (!advanceToNextLeaf(cursor, result)) {
58+
return result;
59+
}
60+
}
61+
}
62+
63+
byte[] findValueByExactKey(LmdbDb db, byte[] key, int keyLength, RangeCountResult ioStats) throws IOException {
64+
if (db.isEmpty()) {
65+
return null;
66+
}
67+
SeekCursor cursor = seek(db, key, keyLength, ioStats);
68+
if (cursor == null || cursor.leafIndex >= cursor.leafPage.numKeys) {
69+
return null;
70+
}
71+
72+
LmdbNode node = cursor.leafPage.node(cursor.leafIndex);
73+
int cmp = LmdbKeyComparator.compare(cursor.leafPage.buffer, node.keyOffset, node.keySize, key, keyLength);
74+
if (cmp != 0) {
75+
return null;
76+
}
77+
78+
byte[] value = new byte[node.valueSize];
79+
ByteBuffer duplicate = cursor.leafPage.buffer.duplicate();
80+
duplicate.position(node.valueOffset);
81+
duplicate.get(value, 0, node.valueSize);
82+
return value;
83+
}
84+
85+
private SeekCursor seek(LmdbDb db, byte[] searchKey, int searchKeyLength, RangeCountResult stats)
86+
throws IOException {
87+
List<BranchFrame> branchPath = new ArrayList<>();
88+
LmdbPage page = dataFile.readPage(db.rootPgno, meta);
89+
if (page.isBranch()) {
90+
stats.branchPagesRead++;
91+
}
92+
if (page.isLeaf() || page.isLeaf2()) {
93+
stats.leafPagesRead++;
94+
}
95+
96+
while (page.isBranch()) {
97+
if (page.numKeys == 0) {
98+
throw new IOException("Corrupt branch page with zero keys: " + page.expectedPgno);
99+
}
100+
SearchResult search = findFirstGreaterOrEqual(page, searchKey, searchKeyLength, false);
101+
int childIndex;
102+
if (search.index >= page.numKeys) {
103+
childIndex = page.numKeys - 1;
104+
} else {
105+
childIndex = search.index;
106+
if (!search.exact) {
107+
childIndex--;
108+
}
109+
}
110+
if (childIndex < 0 || childIndex >= page.numKeys) {
111+
throw new IOException("Corrupt branch descent index " + childIndex + " for page " + page.expectedPgno);
112+
}
113+
LmdbNode branchNode = page.node(childIndex);
114+
branchPath.add(new BranchFrame(page, childIndex));
115+
page = dataFile.readPage(branchNode.branchPgno, meta);
116+
if (page.isBranch()) {
117+
stats.branchPagesRead++;
118+
}
119+
if (page.isLeaf() || page.isLeaf2()) {
120+
stats.leafPagesRead++;
121+
}
122+
}
123+
124+
if (!page.isLeaf() && !page.isLeaf2()) {
125+
throw new IOException("Expected leaf page, found flags=" + page.flags + " on page " + page.expectedPgno);
126+
}
127+
128+
SearchResult leafSearch = findFirstGreaterOrEqual(page, searchKey, searchKeyLength, true);
129+
SeekCursor cursor = new SeekCursor(page, leafSearch.index, branchPath);
130+
if (cursor.leafIndex >= cursor.leafPage.numKeys && !advanceToNextLeaf(cursor, stats)) {
131+
return null;
132+
}
133+
return cursor;
134+
}
135+
136+
private boolean advanceToNextLeaf(SeekCursor cursor, RangeCountResult stats) throws IOException {
137+
while (!cursor.branchPath.isEmpty()) {
138+
BranchFrame last = cursor.branchPath.get(cursor.branchPath.size() - 1);
139+
int nextChild = last.childIndex + 1;
140+
if (nextChild < last.page.numKeys) {
141+
last.childIndex = nextChild;
142+
LmdbNode nextNode = last.page.node(nextChild);
143+
LmdbPage page = dataFile.readPage(nextNode.branchPgno, meta);
144+
if (page.isBranch()) {
145+
stats.branchPagesRead++;
146+
}
147+
if (page.isLeaf() || page.isLeaf2()) {
148+
stats.leafPagesRead++;
149+
}
150+
151+
while (page.isBranch()) {
152+
if (page.numKeys == 0) {
153+
throw new IOException("Corrupt branch page with zero keys: " + page.expectedPgno);
154+
}
155+
cursor.branchPath.add(new BranchFrame(page, 0));
156+
LmdbNode firstNode = page.node(0);
157+
page = dataFile.readPage(firstNode.branchPgno, meta);
158+
if (page.isBranch()) {
159+
stats.branchPagesRead++;
160+
}
161+
if (page.isLeaf() || page.isLeaf2()) {
162+
stats.leafPagesRead++;
163+
}
164+
}
165+
166+
if (!page.isLeaf() && !page.isLeaf2()) {
167+
throw new IOException("Expected leaf page while advancing, found flags=" + page.flags);
168+
}
169+
cursor.leafPage = page;
170+
cursor.leafIndex = 0;
171+
return true;
172+
}
173+
cursor.branchPath.remove(cursor.branchPath.size() - 1);
174+
}
175+
return false;
176+
}
177+
178+
private SearchResult findFirstGreaterOrEqual(LmdbPage page, byte[] key, int keyLength, boolean leafSearch)
179+
throws IOException {
180+
if (page.numKeys == 0) {
181+
return new SearchResult(0, false);
182+
}
183+
184+
int low;
185+
int high = page.numKeys - 1;
186+
if (leafSearch || page.isLeaf()) {
187+
low = 0;
188+
} else {
189+
low = 1;
190+
}
191+
int index = 0;
192+
int rc = -1;
193+
194+
while (low <= high) {
195+
index = (low + high) >>> 1;
196+
LmdbNode node = page.node(index);
197+
rc = LmdbKeyComparator.compare(key, keyLength, page.buffer, node.keyOffset, node.keySize);
198+
if (rc == 0) {
199+
return new SearchResult(index, true);
200+
}
201+
if (rc > 0) {
202+
low = index + 1;
203+
} else {
204+
high = index - 1;
205+
}
206+
}
207+
208+
if (rc > 0) {
209+
index++;
210+
}
211+
if (low > high) {
212+
index = low;
213+
}
214+
return new SearchResult(index, false);
215+
}
216+
217+
private boolean matches(LmdbNode node, ByteBuffer pageBuffer, GroupMatcher matcher) {
218+
ByteBuffer keySlice = pageBuffer.duplicate();
219+
keySlice.order(pageBuffer.order());
220+
keySlice.position(node.keyOffset);
221+
keySlice.limit(node.keyOffset + node.keySize);
222+
ByteBuffer keyView = keySlice.slice();
223+
keyView.order(pageBuffer.order());
224+
return matcher.matches(keyView);
225+
}
226+
227+
private long countNodeEntries(LmdbNode node, LmdbPage page, RangeCountResult stats) throws IOException {
228+
if ((node.nodeFlags & LmdbFormat.F_SUBDATA) != 0 && node.valueSize >= LmdbFormat.META_DB_SIZE) {
229+
ByteBuffer dup = page.buffer.duplicate();
230+
dup.order(page.buffer.order());
231+
dup.position(node.valueOffset);
232+
LmdbDb subDb = LmdbDb.parse(dup, node.valueOffset);
233+
return subDb.entries;
234+
}
235+
if ((node.nodeFlags & LmdbFormat.F_DUPDATA) != 0 && node.valueSize >= LmdbFormat.PAGE_HEADER_SIZE) {
236+
return countSubPageEntries(page.buffer, node.valueOffset, node.valueSize);
237+
}
238+
if ((node.nodeFlags & LmdbFormat.F_BIGDATA) != 0 && node.valueSize >= Long.BYTES) {
239+
long overflowPgno = page.buffer.getLong(node.valueOffset);
240+
LmdbPage overflowPage = dataFile.readPage(overflowPgno, meta);
241+
stats.overflowPagesRead += Math.max(overflowPage.overflowPages, 1);
242+
}
243+
return 1;
244+
}
245+
246+
private long countSubPageEntries(ByteBuffer buffer, int offset, int length) {
247+
if (offset + length > buffer.limit()) {
248+
return 0;
249+
}
250+
int flags = LmdbFormat.unsignedShort(buffer, offset + 10);
251+
int lower = LmdbFormat.unsignedShort(buffer, offset + 12);
252+
if ((flags & LmdbFormat.P_LEAF2) != 0) {
253+
int keySize = LmdbFormat.unsignedShort(buffer, offset + 8);
254+
if (keySize <= 0) {
255+
return 0;
256+
}
257+
int bytes = Math.max(lower - LmdbFormat.PAGE_HEADER_SIZE, 0);
258+
return bytes / keySize;
259+
}
260+
return Math.max(LmdbFormat.numKeys(lower), 0);
261+
}
262+
263+
private static final class SearchResult {
264+
final int index;
265+
final boolean exact;
266+
267+
SearchResult(int index, boolean exact) {
268+
this.index = index;
269+
this.exact = exact;
270+
}
271+
}
272+
273+
private static final class BranchFrame {
274+
final LmdbPage page;
275+
int childIndex;
276+
277+
BranchFrame(LmdbPage page, int childIndex) {
278+
this.page = page;
279+
this.childIndex = childIndex;
280+
}
281+
}
282+
283+
private static final class SeekCursor {
284+
LmdbPage leafPage;
285+
int leafIndex;
286+
final List<BranchFrame> branchPath;
287+
288+
SeekCursor(LmdbPage leafPage, int leafIndex, List<BranchFrame> branchPath) {
289+
this.leafPage = leafPage;
290+
this.leafIndex = leafIndex;
291+
this.branchPath = branchPath;
292+
}
293+
}
294+
295+
}

0 commit comments

Comments
 (0)