|
| 1 | +/******************************************************************************* |
| 2 | + * Copyright (c) 2026 Eclipse RDF4J contributors. |
| 3 | + * |
| 4 | + * All rights reserved. This program and the accompanying materials |
| 5 | + * are made available under the terms of the Eclipse Distribution License v1.0 |
| 6 | + * which accompanies this distribution, and is available at |
| 7 | + * http://www.eclipse.org/org/documents/edl-v10.php. |
| 8 | + * |
| 9 | + * SPDX-License-Identifier: BSD-3-Clause |
| 10 | + *******************************************************************************/ |
| 11 | +// Some portions generated by Codex |
| 12 | +package org.eclipse.rdf4j.sail.lmdb.estimate; |
| 13 | + |
| 14 | +import java.io.IOException; |
| 15 | +import java.nio.ByteBuffer; |
| 16 | +import java.util.ArrayList; |
| 17 | +import java.util.List; |
| 18 | + |
| 19 | +import org.eclipse.rdf4j.sail.lmdb.util.GroupMatcher; |
| 20 | + |
| 21 | +final class LmdbBtreeRangeCounter { |
| 22 | + |
| 23 | + private final LmdbDataFile dataFile; |
| 24 | + private final LmdbMeta meta; |
| 25 | + |
| 26 | + LmdbBtreeRangeCounter(LmdbDataFile dataFile, LmdbMeta meta) { |
| 27 | + this.dataFile = dataFile; |
| 28 | + this.meta = meta; |
| 29 | + } |
| 30 | + |
| 31 | + RangeCountResult countRange(LmdbDb db, byte[] minKey, int minKeyLength, byte[] maxKey, int maxKeyLength, |
| 32 | + GroupMatcher matcher) throws IOException { |
| 33 | + RangeCountResult result = new RangeCountResult(); |
| 34 | + if (db.isEmpty()) { |
| 35 | + return result; |
| 36 | + } |
| 37 | + |
| 38 | + SeekCursor cursor = seek(db, minKey, minKeyLength, result); |
| 39 | + if (cursor == null) { |
| 40 | + return result; |
| 41 | + } |
| 42 | + |
| 43 | + while (true) { |
| 44 | + while (cursor.leafIndex < cursor.leafPage.numKeys) { |
| 45 | + LmdbNode node = cursor.leafPage.node(cursor.leafIndex); |
| 46 | + int cmpMax = LmdbKeyComparator.compare(cursor.leafPage.buffer, node.keyOffset, node.keySize, maxKey, |
| 47 | + maxKeyLength); |
| 48 | + if (cmpMax > 0) { |
| 49 | + return result; |
| 50 | + } |
| 51 | + if (matcher == null || matches(node, cursor.leafPage.buffer, matcher)) { |
| 52 | + result.entries += countNodeEntries(node, cursor.leafPage, result); |
| 53 | + } |
| 54 | + cursor.leafIndex++; |
| 55 | + } |
| 56 | + |
| 57 | + if (!advanceToNextLeaf(cursor, result)) { |
| 58 | + return result; |
| 59 | + } |
| 60 | + } |
| 61 | + } |
| 62 | + |
| 63 | + byte[] findValueByExactKey(LmdbDb db, byte[] key, int keyLength, RangeCountResult ioStats) throws IOException { |
| 64 | + if (db.isEmpty()) { |
| 65 | + return null; |
| 66 | + } |
| 67 | + SeekCursor cursor = seek(db, key, keyLength, ioStats); |
| 68 | + if (cursor == null || cursor.leafIndex >= cursor.leafPage.numKeys) { |
| 69 | + return null; |
| 70 | + } |
| 71 | + |
| 72 | + LmdbNode node = cursor.leafPage.node(cursor.leafIndex); |
| 73 | + int cmp = LmdbKeyComparator.compare(cursor.leafPage.buffer, node.keyOffset, node.keySize, key, keyLength); |
| 74 | + if (cmp != 0) { |
| 75 | + return null; |
| 76 | + } |
| 77 | + |
| 78 | + byte[] value = new byte[node.valueSize]; |
| 79 | + ByteBuffer duplicate = cursor.leafPage.buffer.duplicate(); |
| 80 | + duplicate.position(node.valueOffset); |
| 81 | + duplicate.get(value, 0, node.valueSize); |
| 82 | + return value; |
| 83 | + } |
| 84 | + |
| 85 | + private SeekCursor seek(LmdbDb db, byte[] searchKey, int searchKeyLength, RangeCountResult stats) |
| 86 | + throws IOException { |
| 87 | + List<BranchFrame> branchPath = new ArrayList<>(); |
| 88 | + LmdbPage page = dataFile.readPage(db.rootPgno, meta); |
| 89 | + if (page.isBranch()) { |
| 90 | + stats.branchPagesRead++; |
| 91 | + } |
| 92 | + if (page.isLeaf() || page.isLeaf2()) { |
| 93 | + stats.leafPagesRead++; |
| 94 | + } |
| 95 | + |
| 96 | + while (page.isBranch()) { |
| 97 | + if (page.numKeys == 0) { |
| 98 | + throw new IOException("Corrupt branch page with zero keys: " + page.expectedPgno); |
| 99 | + } |
| 100 | + SearchResult search = findFirstGreaterOrEqual(page, searchKey, searchKeyLength, false); |
| 101 | + int childIndex; |
| 102 | + if (search.index >= page.numKeys) { |
| 103 | + childIndex = page.numKeys - 1; |
| 104 | + } else { |
| 105 | + childIndex = search.index; |
| 106 | + if (!search.exact) { |
| 107 | + childIndex--; |
| 108 | + } |
| 109 | + } |
| 110 | + if (childIndex < 0 || childIndex >= page.numKeys) { |
| 111 | + throw new IOException("Corrupt branch descent index " + childIndex + " for page " + page.expectedPgno); |
| 112 | + } |
| 113 | + LmdbNode branchNode = page.node(childIndex); |
| 114 | + branchPath.add(new BranchFrame(page, childIndex)); |
| 115 | + page = dataFile.readPage(branchNode.branchPgno, meta); |
| 116 | + if (page.isBranch()) { |
| 117 | + stats.branchPagesRead++; |
| 118 | + } |
| 119 | + if (page.isLeaf() || page.isLeaf2()) { |
| 120 | + stats.leafPagesRead++; |
| 121 | + } |
| 122 | + } |
| 123 | + |
| 124 | + if (!page.isLeaf() && !page.isLeaf2()) { |
| 125 | + throw new IOException("Expected leaf page, found flags=" + page.flags + " on page " + page.expectedPgno); |
| 126 | + } |
| 127 | + |
| 128 | + SearchResult leafSearch = findFirstGreaterOrEqual(page, searchKey, searchKeyLength, true); |
| 129 | + SeekCursor cursor = new SeekCursor(page, leafSearch.index, branchPath); |
| 130 | + if (cursor.leafIndex >= cursor.leafPage.numKeys && !advanceToNextLeaf(cursor, stats)) { |
| 131 | + return null; |
| 132 | + } |
| 133 | + return cursor; |
| 134 | + } |
| 135 | + |
| 136 | + private boolean advanceToNextLeaf(SeekCursor cursor, RangeCountResult stats) throws IOException { |
| 137 | + while (!cursor.branchPath.isEmpty()) { |
| 138 | + BranchFrame last = cursor.branchPath.get(cursor.branchPath.size() - 1); |
| 139 | + int nextChild = last.childIndex + 1; |
| 140 | + if (nextChild < last.page.numKeys) { |
| 141 | + last.childIndex = nextChild; |
| 142 | + LmdbNode nextNode = last.page.node(nextChild); |
| 143 | + LmdbPage page = dataFile.readPage(nextNode.branchPgno, meta); |
| 144 | + if (page.isBranch()) { |
| 145 | + stats.branchPagesRead++; |
| 146 | + } |
| 147 | + if (page.isLeaf() || page.isLeaf2()) { |
| 148 | + stats.leafPagesRead++; |
| 149 | + } |
| 150 | + |
| 151 | + while (page.isBranch()) { |
| 152 | + if (page.numKeys == 0) { |
| 153 | + throw new IOException("Corrupt branch page with zero keys: " + page.expectedPgno); |
| 154 | + } |
| 155 | + cursor.branchPath.add(new BranchFrame(page, 0)); |
| 156 | + LmdbNode firstNode = page.node(0); |
| 157 | + page = dataFile.readPage(firstNode.branchPgno, meta); |
| 158 | + if (page.isBranch()) { |
| 159 | + stats.branchPagesRead++; |
| 160 | + } |
| 161 | + if (page.isLeaf() || page.isLeaf2()) { |
| 162 | + stats.leafPagesRead++; |
| 163 | + } |
| 164 | + } |
| 165 | + |
| 166 | + if (!page.isLeaf() && !page.isLeaf2()) { |
| 167 | + throw new IOException("Expected leaf page while advancing, found flags=" + page.flags); |
| 168 | + } |
| 169 | + cursor.leafPage = page; |
| 170 | + cursor.leafIndex = 0; |
| 171 | + return true; |
| 172 | + } |
| 173 | + cursor.branchPath.remove(cursor.branchPath.size() - 1); |
| 174 | + } |
| 175 | + return false; |
| 176 | + } |
| 177 | + |
| 178 | + private SearchResult findFirstGreaterOrEqual(LmdbPage page, byte[] key, int keyLength, boolean leafSearch) |
| 179 | + throws IOException { |
| 180 | + if (page.numKeys == 0) { |
| 181 | + return new SearchResult(0, false); |
| 182 | + } |
| 183 | + |
| 184 | + int low; |
| 185 | + int high = page.numKeys - 1; |
| 186 | + if (leafSearch || page.isLeaf()) { |
| 187 | + low = 0; |
| 188 | + } else { |
| 189 | + low = 1; |
| 190 | + } |
| 191 | + int index = 0; |
| 192 | + int rc = -1; |
| 193 | + |
| 194 | + while (low <= high) { |
| 195 | + index = (low + high) >>> 1; |
| 196 | + LmdbNode node = page.node(index); |
| 197 | + rc = LmdbKeyComparator.compare(key, keyLength, page.buffer, node.keyOffset, node.keySize); |
| 198 | + if (rc == 0) { |
| 199 | + return new SearchResult(index, true); |
| 200 | + } |
| 201 | + if (rc > 0) { |
| 202 | + low = index + 1; |
| 203 | + } else { |
| 204 | + high = index - 1; |
| 205 | + } |
| 206 | + } |
| 207 | + |
| 208 | + if (rc > 0) { |
| 209 | + index++; |
| 210 | + } |
| 211 | + if (low > high) { |
| 212 | + index = low; |
| 213 | + } |
| 214 | + return new SearchResult(index, false); |
| 215 | + } |
| 216 | + |
| 217 | + private boolean matches(LmdbNode node, ByteBuffer pageBuffer, GroupMatcher matcher) { |
| 218 | + ByteBuffer keySlice = pageBuffer.duplicate(); |
| 219 | + keySlice.order(pageBuffer.order()); |
| 220 | + keySlice.position(node.keyOffset); |
| 221 | + keySlice.limit(node.keyOffset + node.keySize); |
| 222 | + ByteBuffer keyView = keySlice.slice(); |
| 223 | + keyView.order(pageBuffer.order()); |
| 224 | + return matcher.matches(keyView); |
| 225 | + } |
| 226 | + |
| 227 | + private long countNodeEntries(LmdbNode node, LmdbPage page, RangeCountResult stats) throws IOException { |
| 228 | + if ((node.nodeFlags & LmdbFormat.F_SUBDATA) != 0 && node.valueSize >= LmdbFormat.META_DB_SIZE) { |
| 229 | + ByteBuffer dup = page.buffer.duplicate(); |
| 230 | + dup.order(page.buffer.order()); |
| 231 | + dup.position(node.valueOffset); |
| 232 | + LmdbDb subDb = LmdbDb.parse(dup, node.valueOffset); |
| 233 | + return subDb.entries; |
| 234 | + } |
| 235 | + if ((node.nodeFlags & LmdbFormat.F_DUPDATA) != 0 && node.valueSize >= LmdbFormat.PAGE_HEADER_SIZE) { |
| 236 | + return countSubPageEntries(page.buffer, node.valueOffset, node.valueSize); |
| 237 | + } |
| 238 | + if ((node.nodeFlags & LmdbFormat.F_BIGDATA) != 0 && node.valueSize >= Long.BYTES) { |
| 239 | + long overflowPgno = page.buffer.getLong(node.valueOffset); |
| 240 | + LmdbPage overflowPage = dataFile.readPage(overflowPgno, meta); |
| 241 | + stats.overflowPagesRead += Math.max(overflowPage.overflowPages, 1); |
| 242 | + } |
| 243 | + return 1; |
| 244 | + } |
| 245 | + |
| 246 | + private long countSubPageEntries(ByteBuffer buffer, int offset, int length) { |
| 247 | + if (offset + length > buffer.limit()) { |
| 248 | + return 0; |
| 249 | + } |
| 250 | + int flags = LmdbFormat.unsignedShort(buffer, offset + 10); |
| 251 | + int lower = LmdbFormat.unsignedShort(buffer, offset + 12); |
| 252 | + if ((flags & LmdbFormat.P_LEAF2) != 0) { |
| 253 | + int keySize = LmdbFormat.unsignedShort(buffer, offset + 8); |
| 254 | + if (keySize <= 0) { |
| 255 | + return 0; |
| 256 | + } |
| 257 | + int bytes = Math.max(lower - LmdbFormat.PAGE_HEADER_SIZE, 0); |
| 258 | + return bytes / keySize; |
| 259 | + } |
| 260 | + return Math.max(LmdbFormat.numKeys(lower), 0); |
| 261 | + } |
| 262 | + |
| 263 | + private static final class SearchResult { |
| 264 | + final int index; |
| 265 | + final boolean exact; |
| 266 | + |
| 267 | + SearchResult(int index, boolean exact) { |
| 268 | + this.index = index; |
| 269 | + this.exact = exact; |
| 270 | + } |
| 271 | + } |
| 272 | + |
| 273 | + private static final class BranchFrame { |
| 274 | + final LmdbPage page; |
| 275 | + int childIndex; |
| 276 | + |
| 277 | + BranchFrame(LmdbPage page, int childIndex) { |
| 278 | + this.page = page; |
| 279 | + this.childIndex = childIndex; |
| 280 | + } |
| 281 | + } |
| 282 | + |
| 283 | + private static final class SeekCursor { |
| 284 | + LmdbPage leafPage; |
| 285 | + int leafIndex; |
| 286 | + final List<BranchFrame> branchPath; |
| 287 | + |
| 288 | + SeekCursor(LmdbPage leafPage, int leafIndex, List<BranchFrame> branchPath) { |
| 289 | + this.leafPage = leafPage; |
| 290 | + this.leafIndex = leafIndex; |
| 291 | + this.branchPath = branchPath; |
| 292 | + } |
| 293 | + } |
| 294 | + |
| 295 | +} |
0 commit comments