diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java index 4727312854d..5387bfdce79 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java @@ -240,7 +240,8 @@ abstract static class StatefulOperation implements Operation { /** * Creates a new {@link LmdbSailStore}. */ - public LmdbSailStore(File dataDir, LmdbStoreConfig config) throws IOException, SailException { + public LmdbSailStore(File dataDir, StoreProperties properties, LmdbStoreConfig config) + throws IOException, SailException { this.setFactory = new PersistentSetFactory<>(dataDir); this.bulkOperationSize = config.getBulkOperationSize(); Function encode = element -> { @@ -254,9 +255,9 @@ public LmdbSailStore(File dataDir, LmdbStoreConfig config) throws IOException, S boolean initialized = false; try { namespaceStore = new NamespaceStore(dataDir); - var valueStore = new ValueStore(new File(dataDir, "values"), config); + var valueStore = new ValueStore(new File(dataDir, "values"), properties, config); this.valueStore = valueStore; - tripleStore = new TripleStore(new File(dataDir, "triples"), config, valueStore); + tripleStore = new TripleStore(new File(dataDir, "triples"), properties, config, valueStore); mayHaveInferred = tripleStore.hasTriples(false); initialized = true; } finally { @@ -977,7 +978,8 @@ private long removeStatements(long subj, long pred, long obj, boolean explicit, tripleStore.removeTriplesByContext(subj, pred, obj, contextId, explicit, quad -> { removeCount[0]++; for (long id : quad) { - if (id != 0L) { + if (id != 0L && !ValueIds.isInlined(id)) { + // only add references, exclude inlined values unusedIds.add(id); } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStore.java index 694c7741124..1cd5ceaa45c 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStore.java @@ -12,7 +12,6 @@ import java.io.File; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.Comparator; @@ -20,13 +19,11 @@ import java.util.function.Supplier; import java.util.stream.Stream; -import org.apache.commons.io.FileUtils; import org.eclipse.rdf4j.collection.factory.api.CollectionFactory; import org.eclipse.rdf4j.collection.factory.mapdb.MapDb3CollectionFactory; import org.eclipse.rdf4j.common.annotation.Experimental; import org.eclipse.rdf4j.common.concurrent.locks.Lock; import org.eclipse.rdf4j.common.concurrent.locks.LockManager; -import org.eclipse.rdf4j.common.io.MavenUtil; import org.eclipse.rdf4j.common.transaction.IsolationLevel; import org.eclipse.rdf4j.common.transaction.IsolationLevels; import org.eclipse.rdf4j.model.ValueFactory; @@ -34,7 +31,7 @@ import org.eclipse.rdf4j.query.algebra.evaluation.EvaluationStrategyFactory; import org.eclipse.rdf4j.query.algebra.evaluation.federation.FederatedServiceResolver; import org.eclipse.rdf4j.query.algebra.evaluation.federation.FederatedServiceResolverClient; -import org.eclipse.rdf4j.query.algebra.evaluation.impl.StrictEvaluationStrategyFactory; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.DefaultEvaluationStrategyFactory; import org.eclipse.rdf4j.repository.sparql.federation.SPARQLServiceResolver; import org.eclipse.rdf4j.sail.InterruptedSailException; import org.eclipse.rdf4j.sail.NotifyingSailConnection; @@ -62,8 +59,10 @@ public class LmdbStore extends AbstractNotifyingSail implements FederatedService /*-----------* * Variables * *-----------*/ - - private static final String VERSION = MavenUtil.loadVersion("org.eclipse.rdf4j", "rdf4j-sail-lmdb", "devel"); + /** + * The current version of the LMDB store. + */ + static final int VERSION = 2; /** * Specifies which triple indexes this lmdb store must use. @@ -169,7 +168,7 @@ public void setDataDir(File dataDir) { */ public synchronized EvaluationStrategyFactory getEvaluationStrategyFactory() { if (evalStratFactory == null) { - evalStratFactory = new StrictEvaluationStrategyFactory(getFederatedServiceResolver()); + evalStratFactory = new DefaultEvaluationStrategyFactory(getFederatedServiceResolver()); } evalStratFactory.setQuerySolutionCacheThreshold(getIterationCacheSyncThreshold()); evalStratFactory.setTrackResultSize(isTrackResultSize()); @@ -252,18 +251,33 @@ protected void initializeInternal() throws SailException { logger.debug("Data dir is " + dataDir); try { - File versionFile = new File(dataDir, "lmdbrdf.ver"); - String version = versionFile.exists() ? FileUtils.readFileToString(versionFile, StandardCharsets.UTF_8) - : null; - if (!VERSION.equals(version) && upgradeStore(dataDir, version)) { - FileUtils.writeStringToFile(versionFile, VERSION, StandardCharsets.UTF_8); + StoreProperties properties = new StoreProperties(dataDir); + // ensure that it is an error if an unsupported version of LmdbStore already exists + if (new File(dataDir, "lmdbrdf.ver").exists()) { + throw new SailException("Directory contains data from an older unsupported version of LmdbStore"); + } + boolean updateVersion = false; + if (properties.load()) { + if (!String.valueOf(VERSION).equals(properties.getVersion())) { + updateVersion = upgradeStore(dataDir, properties.getVersion()); + } + } else { + properties.setVersion(String.valueOf(VERSION)); } - backingStore = new LmdbSailStore(dataDir, config); + + backingStore = new LmdbSailStore(dataDir, properties, config); + + // update version afer loading and potential internal migration within value and triple store + if (updateVersion) { + properties.setVersion(String.valueOf(VERSION)); + } + properties.save(); + this.store = new SnapshotSailStore(backingStore, () -> new MemoryOverflowModel() { @Override protected LmdbSailStore createSailStore(File dataDir) throws IOException, SailException { // Model can't fit into memory, use another LmdbSailStore to store delta - LmdbSailStore lmdbSailStore = new LmdbSailStore(dataDir, config); + LmdbSailStore lmdbSailStore = new LmdbSailStore(dataDir, new StoreProperties(), config); lmdbSailStore.enableMultiThreading = false; return lmdbSailStore; } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUtil.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUtil.java index 25459309f3c..c754419d788 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUtil.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbUtil.java @@ -21,23 +21,18 @@ import static org.lwjgl.util.lmdb.LMDB.MDB_RDONLY; import static org.lwjgl.util.lmdb.LMDB.MDB_SUCCESS; import static org.lwjgl.util.lmdb.LMDB.mdb_dbi_open; -import static org.lwjgl.util.lmdb.LMDB.mdb_set_compare; import static org.lwjgl.util.lmdb.LMDB.mdb_strerror; import static org.lwjgl.util.lmdb.LMDB.mdb_txn_abort; import static org.lwjgl.util.lmdb.LMDB.mdb_txn_begin; import static org.lwjgl.util.lmdb.LMDB.mdb_txn_commit; import java.io.IOException; -import java.nio.ByteBuffer; import java.nio.IntBuffer; -import java.util.Comparator; import org.lwjgl.PointerBuffer; import org.lwjgl.system.MemoryStack; import org.lwjgl.system.MemoryUtil; import org.lwjgl.system.Pointer; -import org.lwjgl.util.lmdb.MDBCmpFuncI; -import org.lwjgl.util.lmdb.MDBVal; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -121,22 +116,16 @@ static T transaction(long env, Transaction transaction) throws IOExceptio return ret; } - static int openDatabase(long env, String name, int flags, Comparator comparator) throws IOException { - return transaction(env, (stack, txn) -> { - IntBuffer ip = stack.mallocInt(1); + static int openDatabase(long env, String name, int flags) throws IOException { + return transaction(env, (stack, txn) -> openDatabaseWithTxn(txn, name, flags)); + } + static int openDatabaseWithTxn(long txn, String name, int flags) throws IOException { + try (MemoryStack stack = stackPush()) { + IntBuffer ip = stack.mallocInt(1); E(mdb_dbi_open(txn, name, flags, ip)); - int dbi = ip.get(0); - if (comparator != null) { - MDBCmpFuncI cmp = (a, b) -> { - MDBVal aVal = MDBVal.create(a); - MDBVal bVal = MDBVal.create(b); - return comparator.compare(aVal.mv_data(), bVal.mv_data()); - }; - mdb_set_compare(txn, dbi, cmp); - } - return dbi; - }); + return ip.get(0); + } } /** @@ -173,7 +162,6 @@ static boolean requiresResize(long mapSize, long pageSize, long txn, long requir if (percentageUsed > PERCENTAGE_FULL_TRIGGERS_RESIZE) { return true; } - return mapSize - nextPageNo * pageSize < Math.max(requiredSize, MIN_FREE_SPACE); } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/PersistentSetFactory.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/PersistentSetFactory.java index 06eea13ffed..e2d68c7f355 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/PersistentSetFactory.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/PersistentSetFactory.java @@ -74,7 +74,7 @@ class PersistentSetFactory { dbDir = Files.createTempDirectory(cacheDir.toPath(), "set"); E(mdb_env_open(env, dbDir.toAbsolutePath().toString(), flags, 0664)); - this.defaultDbi = openDatabase(env, null, MDB_CREATE, null); + this.defaultDbi = openDatabase(env, null, MDB_CREATE); MDBStat stat = MDBStat.malloc(stack); readTransaction(env, (stack2, txn) -> { @@ -132,7 +132,7 @@ void ensureResize() throws IOException, InterruptedException { PersistentSet createSet(String name, Function writeFunc, Function readFunc) throws IOException { - int dbi = openDatabase(env, name, MDB_CREATE, null); + int dbi = openDatabase(env, name, MDB_CREATE); return new PersistentSet<>(this, dbi) { @Override protected byte[] write(T element) throws IOException { diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/StoreProperties.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/StoreProperties.java new file mode 100644 index 00000000000..18b0cfefcb6 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/StoreProperties.java @@ -0,0 +1,126 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Optional; +import java.util.Properties; + +class StoreProperties { + /** + * The file name for the properties file. + */ + static final String FILE_NAME = "store.properties"; + + /** + * The key used to store the triple store version in the properties file. + */ + static final String VERSION_KEY = "version"; + /** + * The key used to store the triple indexes specification that specifies which triple indexes exist. + */ + static final String INDEXES_KEY = "triple-indexes"; + + protected final File propertiesFile; + + protected String version; + + protected String tripleIndexes; + + protected boolean loaded; + + protected boolean dirty; + + StoreProperties() { + this.propertiesFile = null; + } + + StoreProperties(File dir) { + this.propertiesFile = new File(dir, FILE_NAME); + } + + /** + * Load from properties file. + * + * @return true if loaded from file, else false + */ + boolean load() { + Optional.ofNullable(propertiesFile).filter(File::isFile).ifPresent(file -> { + Properties properties = new Properties(); + try (InputStream in = new FileInputStream(file)) { + properties.load(in); + } catch (IOException e) { + throw new IllegalStateException("Unable to load store properties from " + file, e); + } + version = properties.getProperty(VERSION_KEY); + tripleIndexes = properties.getProperty(INDEXES_KEY); + loaded = true; + }); + return loaded; + } + + /** + * Save to properties file. + */ + void save() { + if (!dirty) { + return; + } + Optional.ofNullable(propertiesFile).ifPresent(file -> { + Properties properties = new Properties(); + if (version != null) { + properties.setProperty(VERSION_KEY, version); + } + if (tripleIndexes != null) { + properties.setProperty(INDEXES_KEY, tripleIndexes); + } + File parent = file.getParentFile(); + if (parent != null) { + parent.mkdirs(); + } + try (OutputStream out = new FileOutputStream(file)) { + properties.store(out, "LmdbStore meta-data"); + dirty = false; + } catch (IOException e) { + throw new IllegalStateException("Unable to store properties to " + file, e); + } + }); + } + + boolean isLoaded() { + return loaded; + } + + String getVersion() { + return version; + } + + StoreProperties setVersion(String version) { + this.version = version; + this.dirty = true; + return this; + } + + String getTripleIndexes() { + return tripleIndexes; + } + + StoreProperties setTripleIndexes(String tripleIndexes) { + this.tripleIndexes = tripleIndexes; + this.dirty = true; + return this; + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java index 03850b01ec0..f6b2f5cbb4d 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java @@ -12,7 +12,7 @@ package org.eclipse.rdf4j.sail.lmdb; import static org.eclipse.rdf4j.sail.lmdb.LmdbUtil.E; -import static org.eclipse.rdf4j.sail.lmdb.LmdbUtil.openDatabase; +import static org.eclipse.rdf4j.sail.lmdb.LmdbUtil.openDatabaseWithTxn; import static org.eclipse.rdf4j.sail.lmdb.LmdbUtil.readTransaction; import static org.eclipse.rdf4j.sail.lmdb.LmdbUtil.transaction; import static org.eclipse.rdf4j.sail.lmdb.Varint.readQuadUnsigned; @@ -59,11 +59,7 @@ import java.io.Closeable; import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.IntBuffer; import java.util.ArrayList; @@ -77,7 +73,6 @@ import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Properties; import java.util.Set; import java.util.StringTokenizer; import java.util.concurrent.atomic.LongAdder; @@ -128,26 +123,7 @@ class TripleStore implements Closeable { * The default triple indexes. */ private static final String DEFAULT_INDEXES = "spoc,posc"; - /** - * The file name for the properties file. - */ - private static final String PROPERTIES_FILE = "triples.prop"; - /** - * The key used to store the triple store version in the properties file. - */ - private static final String VERSION_KEY = "version"; - /** - * The key used to store the triple indexes specification that specifies which triple indexes exist. - */ - private static final String INDEXES_KEY = "triple-indexes"; private static final boolean REUSE_SECONDARY_WRITE_CURSOR = true; - /** - * The version number for the current triple store. - *
    - *
  • version 1: The first version with configurable triple indexes, a context field and a properties file. - *
- */ - private static final int SCHEME_VERSION = 1; /*-----------* * Variables * *-----------*/ @@ -157,9 +133,10 @@ class TripleStore implements Closeable { */ private final File dir; /** - * Object containing meta-data for the triple store. + * Properties of the store, such as version and triple indexes specification. These properties are stored in a file + * in the store directory and are loaded when the store is initialized. */ - private final Properties properties; + private final StoreProperties properties; /** * The list of triple indexes that are used to store and retrieve triples. */ @@ -186,7 +163,13 @@ class TripleStore implements Closeable { private TxnRecordCache recordCache = null; TripleStore(File dir, LmdbStoreConfig config, ValueStore valueStore) throws IOException, SailException { + this(dir, new StoreProperties(dir), config, valueStore); + } + + TripleStore(File dir, StoreProperties properties, LmdbStoreConfig config, ValueStore valueStore) + throws IOException, SailException { this.dir = dir; + this.properties = properties; boolean forceSync = config.getForceSync(); boolean noReadahead = config.getNoReadahead(); this.autoGrow = config.getAutoGrow(); @@ -226,81 +209,67 @@ class TripleStore implements Closeable { txnManager = new TxnManager(env, Mode.RESET); - File propFile = new File(this.dir, PROPERTIES_FILE); - String indexSpecStr = config.getTripleIndexes(); - if (!propFile.exists()) { - // newly created lmdb store - properties = new Properties(); - - Set indexSpecs = parseIndexSpecList(indexSpecStr); + try { + String indexSpecStr = config.getTripleIndexes(); + if (!properties.isLoaded()) { + // newly created lmdb store + Set indexSpecs = parseIndexSpecList(indexSpecStr); + + if (indexSpecs.isEmpty()) { + logger.debug("No indexes specified, using default indexes: {}", DEFAULT_INDEXES); + indexSpecStr = DEFAULT_INDEXES; + indexSpecs = parseIndexSpecList(indexSpecStr); + } - if (indexSpecs.isEmpty()) { - logger.debug("No indexes specified, using default indexes: {}", DEFAULT_INDEXES); - indexSpecStr = DEFAULT_INDEXES; - indexSpecs = parseIndexSpecList(indexSpecStr); + startTransaction(); + initIndexes(indexSpecs); + endTransaction(true); + initializePageAndMapSize(config.getTripleDBSize()); + } else { + // Initialize existing indexes + Set indexSpecs = getIndexSpecs(); + startTransaction(); + initIndexes(indexSpecs); + endTransaction(true); + initializePageAndMapSize(config.getTripleDBSize()); + + // Compare the existing indexes with the requested indexes + Set reqIndexSpecs = parseIndexSpecList(indexSpecStr); + if (reqIndexSpecs.isEmpty()) { + // No indexes specified, use the existing ones + indexSpecStr = properties.getTripleIndexes(); + } else if (!reqIndexSpecs.equals(indexSpecs)) { + // Set of indexes needs to be changed + startTransaction(); + reindex(indexSpecs, reqIndexSpecs); + endTransaction(true); + } } - initIndexes(indexSpecs, config.getTripleDBSize()); - } else { - // Read triple properties file and check format version number - properties = loadProperties(propFile); - checkVersion(); - - // Initialize existing indexes - Set indexSpecs = getIndexSpecs(); - initIndexes(indexSpecs, config.getTripleDBSize()); - - // Compare the existing indexes with the requested indexes - Set reqIndexSpecs = parseIndexSpecList(indexSpecStr); - - if (reqIndexSpecs.isEmpty()) { - // No indexes specified, use the existing ones - indexSpecStr = properties.getProperty(INDEXES_KEY); - } else if (!reqIndexSpecs.equals(indexSpecs)) { - // Set of indexes needs to be changed - reindex(indexSpecs, reqIndexSpecs); + if (!indexSpecStr.equals(properties.getTripleIndexes())) { + // Store up-to-date properties + properties.setTripleIndexes(indexSpecStr); } - } - - if (!String.valueOf(SCHEME_VERSION).equals(properties.getProperty(VERSION_KEY)) - || !indexSpecStr.equals(properties.getProperty(INDEXES_KEY))) { - // Store up-to-date properties - properties.setProperty(VERSION_KEY, String.valueOf(SCHEME_VERSION)); - properties.setProperty(INDEXES_KEY, indexSpecStr); - storeProperties(propFile); + } catch (IOException | SailException e) { + endTransaction(false); + throw e; } resetAlignedWriteCursorState(); } - private void checkVersion() throws SailException { - // Check version number - String versionStr = properties.getProperty(VERSION_KEY); - if (versionStr == null) { - logger.warn("{} missing in TripleStore's properties file", VERSION_KEY); - } else { - try { - int version = Integer.parseInt(versionStr); - if (version > SCHEME_VERSION) { - throw new SailException("Directory contains data that uses a newer data format"); - } - } catch (NumberFormatException e) { - logger.warn("Malformed version number in TripleStore's properties file"); - } - } - } - private Set getIndexSpecs() throws SailException { - String indexesStr = properties.getProperty(INDEXES_KEY); + String indexesStr = properties.getTripleIndexes(); - if (indexesStr == null) { - throw new SailException(INDEXES_KEY + " missing in TripleStore's properties file"); + if (indexesStr == null || indexesStr.trim().isEmpty()) { + throw new SailException(StoreProperties.INDEXES_KEY + " missing in " + StoreProperties.FILE_NAME + " file"); } Set indexSpecs = parseIndexSpecList(indexesStr); if (indexSpecs.isEmpty()) { - throw new SailException("No " + INDEXES_KEY + " found in TripleStore's properties file"); + throw new SailException( + "Invalid " + StoreProperties.INDEXES_KEY + " found in " + StoreProperties.FILE_NAME + " file"); } return indexSpecs; @@ -338,16 +307,18 @@ private Set parseIndexSpecList(String indexSpecStr) throws SailException return indexes; } - private void initIndexes(Set indexSpecs, long tripleDbSize) throws IOException { + private void initIndexes(Set indexSpecs) throws IOException { for (String fieldSeq : orderIndexSpecs(indexSpecs)) { logger.trace("Initializing index '{}'...", fieldSeq); indexes.add(new TripleIndex(fieldSeq)); } + } + private void initializePageAndMapSize(long tripleDbSize) throws IOException { // initialize page size and set map size for env readTransaction(env, (stack, txn) -> { MDBStat stat = MDBStat.malloc(stack); - TripleIndex mainIndex = indexes.get(0); + TripleIndex mainIndex = indexes.getFirst(); mdb_stat(txn, mainIndex.getDB(true), stat); boolean isEmpty = stat.ms_entries() == 0; @@ -543,8 +514,7 @@ private void reindex(Set currentIndexSpecs, Set newIndexSpecs) t if (!addedIndexSpecs.isEmpty()) { TripleIndex sourceIndex = indexes.get(0); for (boolean explicit : new boolean[] { true, false }) { - transaction(env, (stack, txn) -> { - + try (MemoryStack stack = stackPush()) { MDBVal keyValue = MDBVal.calloc(stack); ByteBuffer keyBuf = stack.malloc(MAX_KEY_LENGTH); keyValue.mv_data(keyBuf); @@ -556,7 +526,7 @@ private void reindex(Set currentIndexSpecs, Set newIndexSpecs) t RecordIterator[] sourceIter = { null }; try { sourceIter[0] = new LmdbRecordIterator(sourceIndex, false, -1, -1, -1, -1, - explicit, txnManager.createTxn(txn)); + explicit, txnManager.createReadTxn()); RecordIterator it = sourceIter[0]; long[] quad; @@ -566,7 +536,34 @@ private void reindex(Set currentIndexSpecs, Set newIndexSpecs) t quad[CONTEXT_IDX]); keyBuf.flip(); - E(mdb_put(txn, addedIndex.getDB(explicit), keyValue, dataValue, 0)); + if (requiresResize()) { + endTransaction(true); + + // the lock is just a safety measure if reindex is somehow called outside of the + // constructor + StampedLongAdderLockManager lockManager = txnManager.lockManager(); + long readStamp; + try { + readStamp = lockManager.readLock(); + } catch (InterruptedException e) { + throw new SailException(e); + } + try { + txnManager.deactivate(); + mapSize = LmdbUtil.autoGrowMapSize(mapSize, pageSize, 0); + E(mdb_env_set_mapsize(env, mapSize)); + logger.debug("resized map to {}", mapSize); + } finally { + try { + txnManager.activate(); + } finally { + lockManager.unlockRead(readStamp); + } + } + startTransaction(); + } + + E(mdb_put(writeTxn, addedIndex.getDB(explicit), keyValue, dataValue, 0)); } } finally { if (sourceIter[0] != null) { @@ -576,9 +573,7 @@ private void reindex(Set currentIndexSpecs, Set newIndexSpecs) t currentIndexes.put(fieldSeq, addedIndex); } - - return null; - }); + } } logger.debug("New index(es) initialized"); @@ -589,19 +584,16 @@ private void reindex(Set currentIndexSpecs, Set newIndexSpecs) t removedIndexSpecs.removeAll(newIndexSpecs); List removedIndexExceptions = new ArrayList<>(); - transaction(env, (stack, txn) -> { - // Delete files for removed indexes - for (String fieldSeq : removedIndexSpecs) { - try { - TripleIndex removedIndex = currentIndexes.remove(fieldSeq); - removedIndex.destroy(txn); - logger.debug("Deleted file(s) for removed {} index", fieldSeq); - } catch (Throwable e) { - removedIndexExceptions.add(e); - } + // Delete files for removed indexes + for (String fieldSeq : removedIndexSpecs) { + try { + TripleIndex removedIndex = currentIndexes.remove(fieldSeq); + removedIndex.destroy(writeTxn); + logger.debug("Deleted file(s) for removed {} index", fieldSeq); + } catch (Throwable e) { + removedIndexExceptions.add(e); } - return null; - }); + } if (!removedIndexExceptions.isEmpty()) { throw new IOException(removedIndexExceptions.get(0)); @@ -744,8 +736,6 @@ protected void filterUsedIds(Collection ids) throws IOException { // TODO currently this does not test for contexts (component == 3) // because in most cases context indexes do not exist for (int component = 0; component <= 2; component++) { - int c = component; - TripleIndex index = getBestIndex(component == 0 ? 1 : -1, component == 1 ? 1 : -1, component == 2 ? 1 : -1, component == 3 ? 1 : -1); @@ -779,13 +769,19 @@ protected void filterUsedIds(Collection ids) throws IOException { it.remove(); continue; } - if (component != 2 && (id & 1) == 1) { - // id is a literal and can only appear in object position - continue; + if (component != 2) { + // optimization: ensure that literals are only tested if they appear in object + // position + switch (ValueIds.getIdType(id)) { + case ValueIds.T_DOUBLE: + case ValueIds.T_LITERAL: + // id is a literal, don't test it + continue; + } } - long subj = c == 0 ? id : -1, pred = c == 1 ? id : -1, - obj = c == 2 ? id : -1, context = c == 3 ? id : -1; + long subj = component == 0 ? id : -1, pred = component == 1 ? id : -1, + obj = component == 2 ? id : -1, context = component == 3 ? id : -1; GroupMatcher matcher = index.createMatcher(subj, pred, obj, context); @@ -1092,7 +1088,6 @@ public boolean storeTriple(long subj, long pred, long obj, long context, boolean if (stAdded) { for (int i = 1; i < indexes.size(); i++) { - TripleIndex index = indexes.get(i); keyBuf.clear(); index.toKey(keyBuf, subj, pred, obj, context); @@ -1692,20 +1687,6 @@ public void rollback() throws IOException { endTransaction(false); } - private Properties loadProperties(File propFile) throws IOException { - try (InputStream in = new FileInputStream(propFile)) { - Properties properties = new Properties(); - properties.load(in); - return properties; - } - } - - private void storeProperties(File propFile) throws IOException { - try (OutputStream out = new FileOutputStream(propFile)) { - properties.store(out, "triple indexes meta-data, DO NOT EDIT!"); - } - } - class TripleIndex { private final char[] fieldSeq; @@ -1724,8 +1705,8 @@ public TripleIndex(String fieldSeq) throws IOException { this.leadingFieldValueAccessor = this.fieldValueAccessors[0]; this.indexMap = getIndexes(this.fieldSeq); // open database and use native sort order without comparator - dbiExplicit = openDatabase(env, fieldSeq, MDB_CREATE, null); - dbiInferred = openDatabase(env, fieldSeq + "-inf", MDB_CREATE, null); + dbiExplicit = openDatabaseWithTxn(writeTxn, fieldSeq, MDB_CREATE); + dbiInferred = openDatabaseWithTxn(writeTxn, fieldSeq + "-inf", MDB_CREATE); } public char[] getFieldSeq() { @@ -1763,24 +1744,14 @@ protected int[] getIndexes(char[] fieldSeq) { int[] indexes = new int[fieldSeq.length]; for (int i = 0; i < fieldSeq.length; i++) { char field = fieldSeq[i]; - int fieldIdx; - switch (field) { - case 's': - fieldIdx = SUBJ_IDX; - break; - case 'p': - fieldIdx = PRED_IDX; - break; - case 'o': - fieldIdx = OBJ_IDX; - break; - case 'c': - fieldIdx = CONTEXT_IDX; - break; - default: - throw new IllegalArgumentException( - "invalid character '" + field + "' in field sequence: " + new String(fieldSeq)); - } + int fieldIdx = switch (field) { + case 's' -> SUBJ_IDX; + case 'p' -> PRED_IDX; + case 'o' -> OBJ_IDX; + case 'c' -> CONTEXT_IDX; + default -> throw new IllegalArgumentException( + "invalid character '" + field + "' in field sequence: " + new String(fieldSeq)); + }; indexes[i] = fieldIdx; } return indexes; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TxnRecordCache.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TxnRecordCache.java index 8ad7cbc4604..e6fe51dae32 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TxnRecordCache.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TxnRecordCache.java @@ -81,8 +81,8 @@ public TxnRecordCache(File cacheDir) throws IOException { dbDir = Files.createTempDirectory(cacheDir.toPath(), "txncache"); E(mdb_env_open(env, dbDir.toAbsolutePath().toString(), flags, 0664)); - dbiExplicit = openDatabase(env, "quads", MDB_CREATE, null); - dbiInferred = openDatabase(env, "quads-inf", MDB_CREATE, null); + dbiExplicit = openDatabase(env, "quads", MDB_CREATE); + dbiInferred = openDatabase(env, "quads-inf", MDB_CREATE); MDBStat stat = MDBStat.malloc(stack); readTransaction(env, (stack2, txn) -> { diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ValueIds.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ValueIds.java new file mode 100644 index 00000000000..421855b11bb --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ValueIds.java @@ -0,0 +1,113 @@ +/******************************************************************************* + * Copyright (c) 2024 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb; + +/** + * Constants and functions for working with ids encoded into long values. + */ +public class ValueIds { + /** + * An inlined double value. The least significant bit of the value is set to 1 to distinguish it from other inlined + * values and references. + */ + public static final int T_DOUBLE = -1; + + /** + * Pointer to an arbitrary value in the value store. This is not used as RDF value. + */ + public static final int T_PTR = 0; + + /** Reference to a URI */ + public static final int T_URI = 1; + /** Reference to a literal */ + public static final int T_LITERAL = 2; + /** Reference to a blank node */ + public static final int T_BNODE = 3; + /** Reference to a triple */ + public static final int T_TRIPLE = 4; + + // inlined values + public static final int T_INTEGER = 16; + public static final int T_DECIMAL = 17; + public static final int T_FLOAT = 18; + public static final int T_DATETIME = 19; + public static final int T_DATETIMESTAMP = 20; + public static final int T_DATE = 21; + public static final int T_BOOLEAN = 22; + public static final int T_SHORTSTRING = 23; + public static final int T_POSITIVE_INTEGER = 24; + public static final int T_NEGATIVE_INTEGER = 25; + public static final int T_NON_NEGATIVE_INTEGER = 26; + public static final int T_NON_POSITIVE_INTEGER = 27; + public static final int T_LONG = 28; + public static final int T_INT = 29; + public static final int T_SHORT = 30; + public static final int T_BYTE = 31; + public static final int T_UNSIGNEDLONG = 32; + public static final int T_UNSIGNEDINT = 33; + public static final int T_UNSIGNEDSHORT = 34; + public static final int T_UNSIGNEDBYTE = 35; + + /** + * Returns the type section of the given id. + * + * @param id The id of which the type should be extracted. + * @return The id's type. + */ + public static int getIdType(long id) { + if (isDouble(id)) { + return T_DOUBLE; + } + return (int) ((id >> 1) & 0x3F); + } + + /** + * Returns the value section of the given id. + * + * @param id The id of which the value should be extracted. + * @return The id's value. + */ + public static long getValue(long id) { + return id >> 7; + } + + /** + * Combines an id type and a value into a single long id. + * + * @param idType The id's type. + * @param value The id's value. + * @return A composite id. + */ + public static long createId(int idType, long value) { + return value << 7 | (long) idType << 1; + } + + /** + * Tests if the given id is an inlined value or a reference. + * + * @param id The id to test + * @return true if the value is inlined, else false + */ + public static boolean isInlined(long id) { + return isDouble(id) || getIdType(id) >= T_INTEGER; + } + + /** + * Tests if the given id is an inlined double value, which is identified by the least significant bit being set to + * 1. + * + * @param value The id's value + * @return true if the value is an inlined double, else false + */ + public static boolean isDouble(long value) { + return (value & 1L) != 0; + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ValueStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ValueStore.java index 894ab212bad..38553e10f2e 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ValueStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ValueStore.java @@ -9,6 +9,7 @@ * SPDX-License-Identifier: BSD-3-Clause *******************************************************************************/ // Some portions generated by Codex +// Some portions generated by Co-Pilot package org.eclipse.rdf4j.sail.lmdb; import static org.eclipse.rdf4j.sail.lmdb.LmdbUtil.E; @@ -82,6 +83,7 @@ import org.eclipse.rdf4j.sail.SailException; import org.eclipse.rdf4j.sail.lmdb.LmdbUtil.Transaction; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; +import org.eclipse.rdf4j.sail.lmdb.inlined.Values; import org.eclipse.rdf4j.sail.lmdb.model.LmdbBNode; import org.eclipse.rdf4j.sail.lmdb.model.LmdbIRI; import org.eclipse.rdf4j.sail.lmdb.model.LmdbLiteral; @@ -102,19 +104,23 @@ class ValueStore extends AbstractValueFactory { private final static Logger logger = LoggerFactory.getLogger(ValueStore.class); - private static final byte URI_VALUE = 0x0; // 00 + private static final long VALUE_EVICTION_INTERVAL = 60000; // 60 seconds - private static final byte LITERAL_VALUE = 0x1; // 01 + private static final byte URI_VALUE = 0; - private static final byte BNODE_VALUE = 0x2; // 10 + private static final byte LITERAL_VALUE = 1; - private static final byte NAMESPACE_VALUE = 0x3; // 11 + private static final byte BNODE_VALUE = 2; - private static final byte ID_KEY = 0x4; + private static final byte TRIPLE_VALUE = 3; - private static final byte HASH_KEY = 0x5; + private static final byte NAMESPACE_VALUE = 4; - private static final byte HASHID_KEY = 0x6; + private static final byte ID_KEY = 5; + + private static final byte HASH_KEY = 6; + + private static final byte HASHID_KEY = 7; /*** * Maximum size of keys before hashing is used (size of two long values) @@ -136,6 +142,13 @@ class ValueStore extends AbstractValueFactory { * Used to do the actual storage of values, once they're translated to byte arrays. */ private final File dir; + + /** + * Properties of the store, such as version and triple indexes specification. These properties are stored in a file + * in the store directory and are loaded when the store is initialized. + */ + private final StoreProperties properties; + /** * Lock for clearing caches when values are removed. */ @@ -159,6 +172,12 @@ class ValueStore extends AbstractValueFactory { * namespace. */ private final ConcurrentCache namespaceIDCache; + /** + * This lock is required to block transactions while auto-growing the map size. + */ + private final ReadWriteLock txnLock = new ReentrantReadWriteLock(); + private final Map refCountsTxCache = new HashMap<>(); + private final ConcurrentHashMap commonVocabulary = new ConcurrentHashMap<>(); /** * Used to do the actual storage of values, once they're translated to byte arrays. */ @@ -179,11 +198,6 @@ class ValueStore extends AbstractValueFactory { private final boolean autoGrow; private boolean invalidateRevisionOnCommit = false; - /** - * This lock is required to block transactions while auto-growing the map size. - */ - private final ReadWriteLock txnLock = new ReentrantReadWriteLock(); - /** * An object that indicates the revision of the value store, which is used to check if cached value IDs are still * valid. In order to be valid, the ValueStoreRevision object of a LmdbValue needs to be equal to this object. @@ -194,12 +208,12 @@ class ValueStore extends AbstractValueFactory { * object is GCed then it is safe to finally remove the ID-value associations and to reuse IDs. */ private volatile ValueStoreRevision.Lazy lazyRevision; - /** * The next ID that is associated with a stored value */ private long nextId = 1; private boolean freeIdsAvailable; + private ValueStoreHashFile hashFile; private final Map pendingHashUpdates = new HashMap<>(); @@ -219,15 +233,22 @@ class ValueStore extends AbstractValueFactory { private final long valueEvictionInterval; private final boolean valueHashCacheEnabled; + private final boolean inlineLiterals; ValueStore(File dir, LmdbStoreConfig config) throws IOException { + this(dir, new StoreProperties(dir), config); + } + + ValueStore(File dir, StoreProperties properties, LmdbStoreConfig config) throws IOException { this.dir = dir; + this.properties = properties; this.forceSync = config.getForceSync(); this.noReadahead = config.getNoReadahead(); this.autoGrow = config.getAutoGrow(); this.mapSize = config.getValueDBSize(); this.valueEvictionInterval = config.getValueEvictionInterval(); this.valueHashCacheEnabled = config.getValueHashCacheEnabled(); + this.inlineLiterals = config.getInlineLiterals(); open(); int cacheSize = nextPowerOfTwo(config.getValueCacheSize()); @@ -290,6 +311,14 @@ private void openHashFileQuietly() { } } + private static boolean isCommonVocabulary(IRI nv) { + String string = nv.toString(); + return string.startsWith("http://www.w3.org/") || + string.startsWith("http://purl.org/") || + string.startsWith("http://publications.europa.eu/resource/authority") || + string.startsWith("http://xmlns.com/"); + } + private void logValues() throws IOException { readTransaction(env, (stack, txn) -> { long cursor = 0; @@ -347,7 +376,7 @@ private void open() throws IOException { E(mdb_env_open(env, dir.getAbsolutePath(), flags, 0664)); // open main database - dbi = openDatabase(env, null, MDB_CREATE, null); + dbi = openDatabase(env, null, MDB_CREATE); // initialize page size and set map size for env readTransaction(env, (stack, txn) -> { @@ -378,11 +407,11 @@ private void open() throws IOException { }); // open unused IDs database - unusedDbi = openDatabase(env, "unused_ids", MDB_CREATE, null); + unusedDbi = openDatabase(env, "unused_ids", MDB_CREATE); // open free IDs database - freeDbi = openDatabase(env, "free_ids", MDB_CREATE, null); + freeDbi = openDatabase(env, "free_ids", MDB_CREATE); // open ref_counts database - refCountsDbi = openDatabase(env, "ref_counts", MDB_CREATE, null); + refCountsDbi = openDatabase(env, "ref_counts", MDB_CREATE); // check if free IDs are available readTransaction(env, (stack, txn) -> { @@ -404,7 +433,14 @@ private void open() throws IOException { }); } - private long nextId(byte type) throws IOException { + private long nextId(byte valueType) throws IOException { + int idType = switch (valueType) { + case URI_VALUE -> ValueIds.T_URI; + case BNODE_VALUE -> ValueIds.T_BNODE; + case LITERAL_VALUE -> ValueIds.T_LITERAL; + case NAMESPACE_VALUE -> ValueIds.T_PTR; + default -> throw new IllegalArgumentException("Unexpected value type: " + valueType); + }; if (freeIdsAvailable) { // next id from store Long reusedId = writeTransaction((stack, txn) -> { @@ -416,11 +452,13 @@ private long nextId(byte type) throws IOException { MDBVal keyData = MDBVal.calloc(stack); MDBVal valueData = MDBVal.calloc(stack); + if (mdb_cursor_get(cursor, keyData, valueData, MDB_FIRST) == MDB_SUCCESS) { long freedId = data2id(keyData.mv_data()); clearStoredHash(freedId); - // remove lower 2 type bits - long value = freedId >> 2; + + // unpack value from compound id + long value = ValueIds.getValue(freedId); // delete entry E(mdb_cursor_del(cursor, 0)); return value; @@ -434,17 +472,12 @@ private long nextId(byte type) throws IOException { } }); if (reusedId != null) { - long result = reusedId; - // encode type in lower 2 bits of id - result = (result << 2) | type; - return result; + return ValueIds.createId(idType, reusedId); } } long result = nextId; nextId++; - // encode type in lower 2 bits of id - result = (result << 2) | type; - return result; + return ValueIds.createId(idType, result); } protected ByteBuffer idBuffer(MemoryStack stack) { @@ -591,15 +624,36 @@ private static int nextPowerOfTwo(int n) { * @throws IOException If an I/O error occurred. */ public LmdbValue getLazyValue(long id) throws IOException { - switch ((byte) (id & 0x3)) { - case URI_VALUE: - return new LmdbIRI(lazyRevision, id); - case LITERAL_VALUE: - return new LmdbLiteral(lazyRevision, id); - case BNODE_VALUE: - return new LmdbBNode(lazyRevision, id); - default: - throw new IOException("Unsupported value with type id " + (id & 0x3)); + long stamp = revisionLock.readLock(); + try { + // Check value cache + LmdbValue resultValue = cachedValue(id); + + if (resultValue == null) { + int idType = ValueIds.getIdType(id); + switch (idType) { + case ValueIds.T_URI: + resultValue = new LmdbIRI(lazyRevision, id); + break; + case ValueIds.T_DOUBLE: + case ValueIds.T_LITERAL: + resultValue = new LmdbLiteral(lazyRevision, id); + break; + case ValueIds.T_BNODE: + resultValue = new LmdbBNode(lazyRevision, id); + break; + default: + if (ValueIds.isInlined(id)) { + resultValue = new LmdbLiteral(lazyRevision, id); + break; + } + throw new IOException("Unsupported value with id=" + id + " and id type " + idType); + } + } + + return resultValue; + } finally { + revisionLock.unlockRead(stamp); } } @@ -617,6 +671,12 @@ public LmdbValue getValue(long id) throws IOException { LmdbValue resultValue = cachedValue(id); if (resultValue == null) { + // unpack inlined values if possible + if (ValueIds.isInlined(id)) { + Literal unpacked = Values.unpackLiteral(id, this); + return new LmdbLiteral(revision, unpacked.getLabel(), unpacked.getDatatype(), id); + } + // Value not in cache, fetch it from file byte[] data = getData(id); @@ -641,6 +701,13 @@ public LmdbValue getValue(long id) throws IOException { * @return true if value could be successfully resolved, else false */ public boolean resolveValue(long id, LmdbValue value) { + // unpack inlined values if possible + if (ValueIds.isInlined(id)) { + Literal unpacked = Values.unpackLiteral(id, this); + ((LmdbLiteral) value).setLabel(unpacked.getLabel()); + ((LmdbLiteral) value).setDatatype(unpacked.getDatatype()); + return true; + } // Try to get from cache LmdbValue cached = cachedValue(id); if (cached != null && this.getRevision().getRevisionId() == cached.getValueStoreRevision().getRevisionId()) { @@ -684,7 +751,7 @@ private void resizeMap(long txn, long requiredSize) throws IOException { mdb_txn_reset(txn); } if (activeWriteTxn) { - endTransaction(true); + endTransaction(true, true); } long oldMapSize = mapSize; @@ -709,57 +776,84 @@ private void resizeMap(long txn, long requiredSize) throws IOException { } } - private void incrementRefCount(MemoryStack stack, long writeTxn, byte[] data) throws IOException { + private void incrementRefCount(MemoryStack stack, long writeTxn, byte[] data) { // literals have a datatype id and URIs have a namespace id if (data[0] == LITERAL_VALUE || data[0] == URI_VALUE) { - try { - stack.push(); - ByteBuffer bb = ByteBuffer.wrap(data); - // skip type marker - int idLength = Varint.firstToLength(bb.get(1)); - MDBVal idVal = MDBVal.calloc(stack); - MDBVal dataVal = MDBVal.calloc(stack); - idVal.mv_data(idBuffer(stack).put(ID_KEY).put(data, 1, idLength).flip()); - long newCount = 1; - if (mdb_get(writeTxn, refCountsDbi, idVal, dataVal) == MDB_SUCCESS) { - // update count - newCount = Varint.readUnsigned(dataVal.mv_data()) + 1; + // skip type marker + long id = Varint.readUnsigned(ByteBuffer.wrap(data, 1, data.length - 1)); + refCountsTxCache.compute(id, (k, v) -> { + if (v == null) { + try { + stack.push(); + MDBVal idVal = MDBVal.calloc(stack); + MDBVal dataVal = MDBVal.calloc(stack); + idVal.mv_data(idBuffer(stack).put(data, 1, Varint.calcLengthUnsigned(id)).flip()); + long newCount = 1; + if (mdb_get(writeTxn, refCountsDbi, idVal, dataVal) == MDB_SUCCESS) { + // update count + newCount = Varint.readUnsigned(dataVal.mv_data()) + 1; + } + return newCount; + } finally { + stack.pop(); + } + } else { + return v + 1; } - // write count - ByteBuffer countBb = stack.malloc(Varint.calcLengthUnsigned(newCount)); - Varint.writeUnsigned(countBb, newCount); - dataVal.mv_data(countBb.flip()); - E(mdb_put(writeTxn, refCountsDbi, idVal, dataVal, 0)); + }); + } + } - } finally { - stack.pop(); + private boolean decrementRefCount(MemoryStack stack, long writeTxn, long id) { + return refCountsTxCache.compute(id, (k, v) -> { + if (v == null) { + try { + stack.push(); + MDBVal idVal = MDBVal.calloc(stack); + MDBVal dataVal = MDBVal.calloc(stack); + ByteBuffer idBb = idBuffer(stack).put(ID_KEY); + Varint.writeUnsigned(idBb, id); + idVal.mv_data(idBb.flip()); + long newCount = 0; + if (mdb_get(writeTxn, refCountsDbi, idVal, dataVal) == MDB_SUCCESS) { + // update count + newCount = Varint.readUnsigned(dataVal.mv_data()) - 1; + } + return newCount; + } finally { + stack.pop(); + } + } else { + return v - 1; } - } + }) == 0; } - private boolean decrementRefCount(MemoryStack stack, long writeTxn, ByteBuffer idBb) throws IOException { + private void updateRefCounts(MemoryStack stack, long writeTxn) throws IOException { try { stack.push(); - MDBVal idVal = MDBVal.calloc(stack); - idVal.mv_data(idBb); + ByteBuffer idBb = idBuffer(stack); + ByteBuffer countBb = stack.malloc(Long.BYTES + 1); MDBVal dataVal = MDBVal.calloc(stack); - if (mdb_get(writeTxn, refCountsDbi, idVal, dataVal) == MDB_SUCCESS) { - // update count - long newCount = Varint.readUnsigned(dataVal.mv_data()) - 1; - if (newCount <= 0) { + for (Map.Entry entry : refCountsTxCache.entrySet()) { + long count = entry.getValue(); + idBb.clear(); + idBb.put(ID_KEY); + Varint.writeUnsigned(idBb, entry.getKey()); + idVal.mv_data(idBb.flip()); + if (count <= 0) { + // delete count entry E(mdb_del(writeTxn, refCountsDbi, idVal, null)); - return true; } else { - // write count - ByteBuffer countBb = stack.malloc(Varint.calcLengthUnsigned(newCount)); - Varint.writeUnsigned(countBb, newCount); + // update count + countBb.clear(); + Varint.writeUnsigned(countBb, count); dataVal.mv_data(countBb.flip()); E(mdb_put(writeTxn, refCountsDbi, idVal, dataVal, 0)); } } - return false; } finally { stack.pop(); } @@ -954,8 +1048,6 @@ public long getId(Value value) throws IOException { return getId(value, false); } - private final ConcurrentHashMap commonVocabulary = new ConcurrentHashMap<>(); - /** * Gets the ID for the specified value. * @@ -966,13 +1058,10 @@ public long getId(Value value) throws IOException { public long getId(Value value, boolean create) throws IOException { // Try to get the internal ID from the value itself boolean isOwnValue = isOwnValue(value); - if (isOwnValue) { LmdbValue lmdbValue = (LmdbValue) value; - if (revisionIsCurrent(lmdbValue)) { long id = lmdbValue.getInternalID(); - if (id != LmdbValue.UNKNOWN_ID) { return id; } @@ -989,44 +1078,61 @@ public long getId(Value value, boolean create) throws IOException { if (cachedID != null) { long id = cachedID; - if (isOwnValue) { // Store id in value for fast access in any consecutive calls ((LmdbValue) value).setInternalID(id, revision); } - return id; } - // ID not cached, search in file - byte[] data = value2data(value, create); - if (data == null && value instanceof Literal) { - data = literal2legacy((Literal) value); + long id = LmdbValue.UNKNOWN_ID; + if (inlineLiterals && value instanceof Literal) { + // inline value into id if possible + try { + long packedId = Values.packLiteral((Literal) value); + if (packedId != 0L) { + Literal unpacked = Values.unpackLiteral(packedId, this); + if (unpacked.equals(value)) { + id = packedId; + } + } + } catch (IllegalArgumentException e) { + // ignore, invalid literal + } } - if (data != null) { - long id = findId(data, create); + if (id == LmdbValue.UNKNOWN_ID) { + // not inlined or ID not cached, search in index + byte[] data = value2data(value, create); + if (data == null && value instanceof Literal) { + data = literal2legacy((Literal) value); + } - if (id != LmdbValue.UNKNOWN_ID) { - if (isOwnValue) { - // Store id in value for fast access in any consecutive calls - ((LmdbValue) value).setInternalID(id, revision); - // Store id in cache - valueIDCache.put((LmdbValue) value, id); - } else { - // Store id in cache - LmdbValue nv = getLmdbValue(value); - nv.setInternalID(id, revision); + if (data != null) { + id = findId(data, create); + } + } - if (nv.isIRI() && isCommonVocabulary(((IRI) nv))) { - commonVocabulary.put(value, id); - } + if (id != LmdbValue.UNKNOWN_ID) { + if (isOwnValue) { + // Store id in value for fast access in any consecutive calls + ((LmdbValue) value).setInternalID(id, revision); + // Store id in cache + valueIDCache.put((LmdbValue) value, id); + } else { + // Store id in cache + LmdbValue nv = getLmdbValue(value); + nv.setInternalID(id, revision); - valueIDCache.put(nv, id); + if (nv.isIRI() && isCommonVocabulary(((IRI) nv))) { + commonVocabulary.put(value, id); } + valueIDCache.put(nv, id); + } + // only store hash for non-inlined values + if (!ValueIds.isInlined(id)) { storeHashIfAbsent(id, value); } - return id; } } finally { @@ -1036,14 +1142,6 @@ public long getId(Value value, boolean create) throws IOException { return LmdbValue.UNKNOWN_ID; } - private static boolean isCommonVocabulary(IRI nv) { - String string = nv.toString(); - return string.startsWith("http://www.w3.org/") || - string.startsWith("http://purl.org/") || - string.startsWith("http://publications.europa.eu/resource/authority") || - string.startsWith("http://xmlns.com/"); - } - public void gcIds(Collection ids, Collection nextIds) throws IOException { if (!enableGC()) { return; @@ -1052,6 +1150,8 @@ public void gcIds(Collection ids, Collection nextIds) throws IOExcep if (!ids.isEmpty()) { // wrap into read txn as resizeMap expects an active surrounding read txn readTransaction(env, (stack1, txn1) -> { + // contains IDs for data types and namespaces which are freed by garbage collecting literals and URIs + resizeMap(writeTxn, 2 * ids.size() * (1 + Long.BYTES + 2 + Long.BYTES)); final Collection finalIds = ids; final Collection finalNextIds = nextIds; @@ -1064,16 +1164,19 @@ public void gcIds(Collection ids, Collection nextIds) throws IOExcep Varint.writeUnsigned(revIdBb, revision.getRevisionId()); int revLength = revIdBb.position(); for (Long id : finalIds) { - // contains IDs for data types and namespaces which are freed by garbage collecting literals and - // URIs - resizeMap(writeTxn, 10L * ids.size() * (1L + Long.BYTES + 2L + Long.BYTES)); - revIdBb.position(revLength).limit(revIdBb.capacity()); revIdVal.mv_data(id2data(revIdBb, id).flip()); // check if id has internal references and therefore cannot be deleted idVal.mv_data(revIdBb.slice().position(revLength)); - if (mdb_get(writeTxn, refCountsDbi, idVal, dataVal) == MDB_SUCCESS) { - continue; + Long refCount = refCountsTxCache.get(id); + if (refCount == null) { + if (mdb_get(writeTxn, refCountsDbi, idVal, dataVal) == MDB_SUCCESS) { + continue; + } + } else { + if (refCount > 0) { + continue; + } } // mark id as unused E(mdb_put(writeTxn, unusedDbi, revIdVal, dataVal, 0)); @@ -1082,7 +1185,7 @@ public void gcIds(Collection ids, Collection nextIds) throws IOExcep deleteValueToIdMappings(stack, writeTxn, finalIds, finalNextIds); - invalidateRevisionOnCommit = enableGC(); + invalidateRevisionOnCommit = true; if (nextValueEvictionTime < 0) { nextValueEvictionTime = System.currentTimeMillis() + this.valueEvictionInterval; } @@ -1103,30 +1206,24 @@ protected void deleteValueToIdMappings(MemoryStack stack, long txn, Collection= 0 && System.currentTimeMillis() >= nextValueEvictionTime) { synchronized (unusedRevisionIds) { - MDBStat stat = MDBStat.malloc(stack); - mdb_stat(writeTxn, unusedDbi, stat); + if (!unusedRevisionIds.isEmpty()) { + MDBStat stat = MDBStat.malloc(stack); + mdb_stat(writeTxn, unusedDbi, stat); - if (resize) { - resizeMap(writeTxn, stat.ms_entries() * (2L + Long.BYTES)); - } + if (resize) { + resizeMap(writeTxn, stat.ms_entries() * (2L + Long.BYTES)); + } - freeUnusedIdsAndValues(stack, writeTxn, unusedRevisionIds); - unusedRevisionIds.clear(); + freeUnusedIdsAndValues(stack, writeTxn, unusedRevisionIds); + unusedRevisionIds.clear(); + clearCaches(); + } } nextValueEvictionTime = -1; } @@ -1284,9 +1384,15 @@ public void startTransaction(boolean resize) throws IOException { /** * Closes the snapshot and the DB iterator if any was opened in the current transaction */ - void endTransaction(boolean commit) throws IOException { + void endTransaction(boolean commit, boolean autoGrow) throws IOException { if (writeTxn != 0) { if (commit) { + if (!autoGrow) { + try (MemoryStack stack = stackPush()) { + updateRefCounts(stack, writeTxn); + } + refCountsTxCache.clear(); + } if (invalidateRevisionOnCommit) { long stamp = revisionLock.writeLock(); try { @@ -1311,6 +1417,7 @@ void endTransaction(boolean commit) throws IOException { flushPendingHashUpdates(); } } else { + refCountsTxCache.clear(); mdb_txn_abort(writeTxn); clearPendingHashUpdates(); } @@ -1320,12 +1427,12 @@ void endTransaction(boolean commit) throws IOException { } public void commit() throws IOException { - endTransaction(true); + endTransaction(true, false); threadLocalReadTxn.get().close(); } public void rollback() throws IOException { - endTransaction(false); + endTransaction(false, false); threadLocalReadTxn.get().close(); } @@ -1406,7 +1513,7 @@ public void close() throws IOException { flushPendingHashUpdates(); } closeReadTransactions(); - endTransaction(false); + endTransaction(false, false); mdb_env_close(env); env = 0; } @@ -1682,7 +1789,7 @@ private byte[] literal2legacy(Literal literal) throws IOException { private byte[] literal2data(String label, Optional lang, IRI dt, boolean create) throws IOException { // Get datatype ID - long datatypeID = LmdbValue.UNKNOWN_ID; + long datatypeID = 0L; if (dt != null) { datatypeID = getId(dt, create); @@ -1770,7 +1877,8 @@ private LmdbLiteral data2literal(long id, byte[] data, LmdbLiteral value) throws // Get datatype long datatypeID = Varint.readUnsignedHeap(bb); IRI datatype = null; - if (datatypeID != LmdbValue.UNKNOWN_ID) { + // literal without a datatype + if (datatypeID > 0) { datatype = (IRI) getValue(datatypeID); } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Varint.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Varint.java index af6632804d6..187a1c14f9a 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Varint.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Varint.java @@ -132,7 +132,9 @@ public static void writeUnsigned(final ByteBuffer bb, final long value) { return; } - if (value <= 240) { + if (value < 0) { + throw new IllegalArgumentException("Negative value can not be encoded as varint: " + value); + } else if (value <= 240) { bb.put((byte) value); } else if (value <= 2287) { // header: 241..248, then 1 payload byte @@ -217,11 +219,13 @@ private static void writeSignificantBits(ByteBuffer bb, long value, int bytes) { /** * Calculates required length in bytes to encode the given long value using variable-length encoding. * - * @param value the value value + * @param value the value * @return length in bytes */ public static int calcLengthUnsigned(long value) { - if (value <= 240) { + if (value < 0) { + throw new IllegalArgumentException("Negative value can not be encoded as varint: " + value); + } else if (value <= 240) { return 1; } else if (value <= 2287) { return 2; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfig.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfig.java index ff988d23458..290ec3b941b 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfig.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfig.java @@ -8,7 +8,7 @@ * * SPDX-License-Identifier: BSD-3-Clause *******************************************************************************/ -// Some portions generated by Codex +// Some portions generated by Co-Pilot package org.eclipse.rdf4j.sail.lmdb.config; import java.time.Duration; @@ -89,6 +89,8 @@ public class LmdbStoreConfig extends BaseSailConfig { private boolean valueHashCacheEnabled = false; + private boolean inlineLiterals = true; + /*--------------* * Constructors * *--------------*/ @@ -240,6 +242,15 @@ public LmdbStoreConfig setValueHashCacheEnabled(boolean valueHashCacheEnabled) { return this; } + public boolean getInlineLiterals() { + return inlineLiterals; + } + + public LmdbStoreConfig setInlineLiterals(boolean inlineLiterals) { + this.inlineLiterals = inlineLiterals; + return this; + } + @Override public Resource export(Model m) { Resource implNode = super.export(m); @@ -288,6 +299,9 @@ public Resource export(Model m) { if (valueHashCacheEnabled) { m.add(implNode, LmdbStoreSchema.VALUE_HASH_CACHE_ENABLED, vf.createLiteral(true)); } + if (!inlineLiterals) { + m.add(implNode, LmdbStoreSchema.INLINE_LITERALS, vf.createLiteral(false)); + } return implNode; } @@ -434,6 +448,17 @@ public void parse(Model m, Resource implNode) throws SailConfigException { + " property, found " + lit); } }); + + Models.objectLiteral(m.getStatements(implNode, LmdbStoreSchema.INLINE_LITERALS, null)) + .ifPresent(lit -> { + try { + setInlineLiterals(lit.booleanValue()); + } catch (IllegalArgumentException e) { + throw new SailConfigException( + "Boolean value required for " + LmdbStoreSchema.INLINE_LITERALS + + " property, found " + lit); + } + }); } catch (ModelException e) { throw new SailConfigException(e.getMessage(), e); } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreSchema.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreSchema.java index 317a6790678..e4e08031e91 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreSchema.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreSchema.java @@ -8,7 +8,7 @@ * * SPDX-License-Identifier: BSD-3-Clause *******************************************************************************/ -// Some portions generated by Codex +// Some portions generated by Co-Pilot package org.eclipse.rdf4j.sail.lmdb.config; import org.eclipse.rdf4j.model.IRI; @@ -97,6 +97,11 @@ public class LmdbStoreSchema { */ public final static IRI VALUE_HASH_CACHE_ENABLED; + /** + * http://rdf4j.org/config/sail/lmdb#inlineLiterals + */ + public final static IRI INLINE_LITERALS; + static { ValueFactory factory = SimpleValueFactory.getInstance(); TRIPLE_INDEXES = factory.createIRI(NAMESPACE, "tripleIndexes"); @@ -113,5 +118,6 @@ public class LmdbStoreSchema { PAGE_CARDINALITY_ESTIMATOR = factory.createIRI(NAMESPACE, "pageCardinalityEstimator"); VALUE_EVICTION_INTERVAL = factory.createIRI(NAMESPACE, "valueEvictionInterval"); VALUE_HASH_CACHE_ENABLED = factory.createIRI(NAMESPACE, "valueHashCacheEnabled"); + INLINE_LITERALS = factory.createIRI(NAMESPACE, "inlineLiterals"); } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Booleans.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Booleans.java new file mode 100644 index 00000000000..096146618e9 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Booleans.java @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.sail.lmdb.ValueIds; + +public class Booleans { + + static long packBoolean(Literal literal) { + return ValueIds.createId(ValueIds.T_BOOLEAN, literal.booleanValue() ? 1L : 0L); + } + + static Literal unpackBoolean(long value, ValueFactory valueFactory) { + return valueFactory.createLiteral(ValueIds.getValue(value) != 0); + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Bytes.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Bytes.java new file mode 100644 index 00000000000..abd68b50d19 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Bytes.java @@ -0,0 +1,48 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +public class Bytes { + + /** + * Packs a byte array into a long value. + * + * @param bytes the byte array to be packed into a long. + * @return the long value representing the packed bytes. + * + * Note: Assumes the length of the byte array is within a reasonable range for packing into a long + * (typically 8 bytes or less, depending on use case). + */ + static long packBytes(byte[] bytes) { + long value = 0; + for (int i = 0; i < bytes.length; i++) { + value = value << 8; + value = value | (bytes[i] & 0xFF); + } + return value; + } + + /** + * Unpacks a long value into a byte array. + * + * @param value the long value to be unpacked. + * @param length the number of bytes to unpack from the long value. + * @return the byte array representing the unpacked bytes. + */ + static byte[] unpackBytes(long value, int length) { + byte[] bytes = new byte[length]; + for (int i = bytes.length - 1; i >= 0; i--) { + bytes[i] = (byte) (value & 0xFF); + value = value >>> 8; + } + return bytes; + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Dates.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Dates.java new file mode 100644 index 00000000000..b347bbd0abb --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Dates.java @@ -0,0 +1,168 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import javax.xml.datatype.DatatypeConfigurationException; +import javax.xml.datatype.DatatypeFactory; +import javax.xml.datatype.XMLGregorianCalendar; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.base.CoreDatatype; +import org.eclipse.rdf4j.model.base.CoreDatatype.XSD; +import org.eclipse.rdf4j.sail.lmdb.ValueIds; + +public class Dates { + private static final ThreadLocal DATATYPE_FACTORY = ThreadLocal.withInitial(() -> { + try { + return DatatypeFactory.newInstance(); // not guaranteed to be thread-safe + } catch (DatatypeConfigurationException e) { + throw new RuntimeException("unable to create datatype factory", e); + } + }); + + static long packDateTime(Literal literal) { + try { + XMLGregorianCalendar calendar = literal.calendarValue(); + return ValueIds.createId(ValueIds.T_DATETIME, encodeToLong(calendar, XSD.DATETIME)); + } catch (IllegalArgumentException iae) { + // packing is not possible + } + return 0L; + } + + static long packDateTimeStamp(Literal literal) { + try { + XMLGregorianCalendar calendar = literal.calendarValue(); + return ValueIds.createId(ValueIds.T_DATETIME, encodeToLong(calendar, XSD.DATETIMESTAMP)); + } catch (IllegalArgumentException iae) { + // packing is not possible + } + return 0L; + } + + static long packDate(Literal literal) { + try { + XMLGregorianCalendar calendar = literal.calendarValue(); + return ValueIds.createId(ValueIds.T_DATE, encodeToLong(calendar, XSD.DATE)); + } catch (IllegalArgumentException iae) { + // packing is not possible + } + return 0L; + } + + static Literal unpackDateTime(long value, ValueFactory valueFactory) { + XMLGregorianCalendar calendar = decodeFromLong(ValueIds.getValue(value), XSD.DATETIME); + return valueFactory.createLiteral(calendar.toXMLFormat(), XSD.DATETIME); + } + + static Literal unpackDateTimeStamp(long value, ValueFactory valueFactory) { + XMLGregorianCalendar calendar = decodeFromLong(ValueIds.getValue(value), XSD.DATETIMESTAMP); + return valueFactory.createLiteral(calendar.toXMLFormat(), XSD.DATETIMESTAMP); + } + + static Literal unpackDate(long value, ValueFactory valueFactory) { + XMLGregorianCalendar calendar = decodeFromLong(ValueIds.getValue(value), XSD.DATE); + return valueFactory.createLiteral(calendar.toXMLFormat(), XSD.DATE); + } + + /** + * Encodes an XSD dateTime/date/time string (with optional millis, timezone) into 7 bytes of a long. Supports: + *
    + *
  • dateTime: "YYYY-MM-DDThh:mm:ss(.SSS)(Z|±hh:mm)"
  • + *
  • date: "YYYY-MM-DD(Z|±hh:mm)"
  • + *
  • time: "hh:mm:ss(.SSS)(Z|±hh:mm)"
  • + *
+ */ + static long encodeToLong(XMLGregorianCalendar calendar, CoreDatatype type) { + int year = calendar.getYear(); + int month = calendar.getMonth(); + int day = calendar.getDay(); + int hour = calendar.getHour(); + int minute = calendar.getMinute(); + int second = calendar.getSecond(); + int milli = calendar.getMinute(); + // in 15-min steps + int tzOffsetStep = calendar.getTimezone() / 15; + + // Range checks + if (type != XSD.TIME) { + if (year < 0 || year > 8191) { + throw new IllegalArgumentException("Year out of range for encoding: " + year); + } + if (month < 1 || month > 12) { + throw new IllegalArgumentException("Month out of range: " + month); + } + if (day < 1 || day > 31) { + throw new IllegalArgumentException("Day out of range: " + day); + } + } + if (type != XSD.DATE) { + if (hour < 0 || hour > 23) { + throw new IllegalArgumentException("Hour out of range: " + hour); + } + if (minute < 0 || minute > 59) { + throw new IllegalArgumentException("Minute out of range: " + minute); + } + if (second < 0 || second > 59) { + throw new IllegalArgumentException("Second out of range: " + second); + } + if (milli < 0 || milli > 999) { + throw new IllegalArgumentException("Millis out of range: " + milli); + } + } + if (tzOffsetStep < -64 || tzOffsetStep > 63) { + throw new IllegalArgumentException("Timezone offset out of encodable range ±15h 45min"); + } + + int tzBits = tzOffsetStep + 64; + + long bits = 0; + bits |= ((long) tzBits & 0x7F) << 49; // 7 bits (most significant) + bits |= ((long) milli & 0x3FF) << 39; // 10 bits + bits |= ((long) second & 0x3F) << 33; // 6 bits + bits |= ((long) minute & 0x3F) << 27; // 6 bits + bits |= ((long) hour & 0x1F) << 22; // 5 bits + bits |= ((long) day & 0x1F) << 17; // 5 bits + bits |= ((long) month & 0x0F) << 13; // 4 bits + bits |= ((long) year & 0x1FFF); // 13 bits (least significant) + + return bits; + } + + /** + * Decodes a 7-byte long back to an XSD dateTime/date/time string (uses 3-digit millis if present). + */ + static XMLGregorianCalendar decodeFromLong(long bits, CoreDatatype type) { + int year = (int) (bits & 0x1FFF); // 13 bits + int month = (int) ((bits >>> 13) & 0x0F); // 4 bits + int day = (int) ((bits >>> 17) & 0x1F); // 5 bits + int hour = (int) ((bits >>> 22) & 0x1F); // 5 bits + int minute = (int) ((bits >>> 27) & 0x3F); // 6 bits + int second = (int) ((bits >>> 33) & 0x3F); // 6 bits + int milli = (int) ((bits >>> 39) & 0x3FF); // 10 bits + int tzBits = (int) ((bits >>> 49) & 0x7F); // 7 bits (most significant) + int tzOffsetStep = tzBits - 64; + int tzOffsetMin = tzOffsetStep * 15; + + XMLGregorianCalendar calendar = DATATYPE_FACTORY.get().newXMLGregorianCalendar(); + calendar.setYear(year); + calendar.setMonth(month); + calendar.setDay(day); + calendar.setHour(hour); + calendar.setMinute(minute); + calendar.setSecond(second); + calendar.setMillisecond(milli); + calendar.setTimezone(tzOffsetMin); + return calendar; + } + +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Decimals.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Decimals.java new file mode 100644 index 00000000000..ba42e36b336 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Decimals.java @@ -0,0 +1,145 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import java.math.BigDecimal; +import java.math.BigInteger; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.sail.lmdb.ValueIds; + +public class Decimals { + + static final int DECIMAL_VALUE_BITS = 48; + static final int DECIMAL_SCALE_BITS = 8; + static final BigInteger MAX_DECIMAL_VALUE = BigInteger.valueOf((1L << (DECIMAL_VALUE_BITS - 1)) - 1); + static final BigInteger MIN_DECIMAL_VALUE = BigInteger.valueOf(-(1L << (DECIMAL_VALUE_BITS - 1))); + static final int MAX_DECIMAL_SCALE = 2 ^ (DECIMAL_SCALE_BITS - 1) - 1; + static final int MIN_DECIMAL_SCALE = -2 ^ (DECIMAL_SCALE_BITS - 1); + static final int DOUBLE_EXPONENT_BITS = 9; + private static final int DOUBLE_EXPONENT_ZERO_OR_SUBNORMAL = 0; + private static final int DOUBLE_EXPONENT_INF_OR_NAN = (1 << DOUBLE_EXPONENT_BITS) - 1; + private static final int DOUBLE_EXPONENT_BIAS = (1 << (DOUBLE_EXPONENT_BITS - 1)) - 1; + private static final int DOUBLE_EXPONENT_MIN_NORMAL = -DOUBLE_EXPONENT_BIAS + 1; + private static final int DOUBLE_EXPONENT_MAX_NORMAL = DOUBLE_EXPONENT_BIAS; + + /** + * Encodes a {@link BigDecimal} in 56 bits [48 bits value, 8 bits scale]. + * + * @param value The decimal value + * @return Encoded value with type marker + */ + static long packDecimal(BigDecimal value) { + BigInteger unscaled = value.unscaledValue(); + if (unscaled.compareTo(MAX_DECIMAL_VALUE) > 0 || unscaled.compareTo(MIN_DECIMAL_VALUE) < 0) { + return 0L; + } + int scale = value.scale(); + if (scale > MAX_DECIMAL_SCALE || scale < MIN_DECIMAL_SCALE) { + return 0L; + } + long encoded = Integers.encodeZigZag(unscaled.longValue()) << DECIMAL_SCALE_BITS | scale; + return ValueIds.createId(ValueIds.T_DECIMAL, encoded); + } + + /** + * Encodes a double exponent into 9 bits if possible. Handles special cases for zero/subnormal and NaN/Infinity. + * + * @param exponent11 The original 11-bit exponent. + * @return Encoded 9-bit exponent as int (0-511), or -1 if not encodable. + */ + public static int encodeExponent9Bits(int exponent11) { + if (exponent11 == 0x7FF) { + return DOUBLE_EXPONENT_INF_OR_NAN; + } + + if (exponent11 == 0) { + return DOUBLE_EXPONENT_ZERO_OR_SUBNORMAL; + } + + int unbiasedExp = exponent11 - 1023; + if (unbiasedExp < DOUBLE_EXPONENT_MIN_NORMAL || unbiasedExp > DOUBLE_EXPONENT_MAX_NORMAL) { + return -1; + } + return unbiasedExp + DOUBLE_EXPONENT_BIAS; + } + + /** + * Decodes a 9-bit exponent back to the original 11-bit exponent. + * + * @param encoded 9-bit encoded exponent + * @return 11-bit biased exponent or special values for reserved patterns + */ + public static int decodeExponent9Bits(int encoded) { + if (encoded == DOUBLE_EXPONENT_ZERO_OR_SUBNORMAL) { + return 0; + } + if (encoded == DOUBLE_EXPONENT_INF_OR_NAN) { + return 0x7FF; + } + int unbiased = encoded - DOUBLE_EXPONENT_BIAS; + return unbiased + 1023; + } + + /** + * @deprecated Use {@link #decodeExponent9Bits(int)}. + */ + @Deprecated + public static int decodeExponent10Bits(int encoded) { + return decodeExponent9Bits(encoded); + } + + static long packDouble(double value) { + long valueBits = Double.doubleToRawLongBits(value); + int exponent11 = (int) ((valueBits >>> 52) & 0x7FF); + int exponent9 = encodeExponent9Bits(exponent11); + if (exponent9 >= 0) { + int sign = value < 0 ? 1 : 0; + long mantissa = valueBits & 0x000fffffffffffffL; + return ((long) exponent9) << 54 | mantissa << 2 | sign << 1 | 1; + } + return 0L; + } + + static long packFloat(float value) { + return ValueIds.createId(ValueIds.T_FLOAT, Integers.encodeZigZag(Float.floatToRawIntBits(value))); + } + + static Literal unpackDecimal(long value, ValueFactory valueFactory) { + long encoded = ValueIds.getValue(value); + int scale = (byte) (encoded & 0xFF); + long unscaled = Integers.decodeZigZag(encoded >>> DECIMAL_SCALE_BITS); + return valueFactory.createLiteral(new BigDecimal(BigInteger.valueOf(unscaled), scale)); + } + + static Literal unpackDouble(long value, ValueFactory valueFactory) { + if ((value & 1L) == 0) { + throw new IllegalArgumentException("Invalid packed double value: zero bit not set."); + } + int sign = (int) ((value >> 1) & 1); + long mantissa = (value >> 2) & 0x000fffffffffffffL; + int exponent9 = (int) ((value >>> 54) & DOUBLE_EXPONENT_INF_OR_NAN); + + int exponent11 = decodeExponent9Bits(exponent9); + + long valueBits = ((long) sign << 63) | + ((long) (exponent11 & 0x7FF) << 52) | + mantissa; + + return valueFactory.createLiteral(Double.longBitsToDouble(valueBits)); + } + + static Literal unpackFloat(long value, ValueFactory valueFactory) { + float floatValue = Float.intBitsToFloat((int) Integers.decodeZigZag(ValueIds.getValue(value))); + return valueFactory.createLiteral(floatValue); + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Integers.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Integers.java new file mode 100644 index 00000000000..a36424e5e78 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Integers.java @@ -0,0 +1,183 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import java.math.BigInteger; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.base.CoreDatatype.XSD; +import org.eclipse.rdf4j.sail.lmdb.ValueIds; + +/** + * Functions for inlining of values into long ids. + */ +public class Integers { + + static final int INTEGER_VALUE_BITS = 56; + static final long MAX_INTEGER = (1L << (INTEGER_VALUE_BITS - 1)) - 1; + static final BigInteger MAX_BIG_INTEGER = BigInteger.valueOf(MAX_INTEGER); + static final long MIN_INTEGER = -(1L << (INTEGER_VALUE_BITS - 1)); + static final BigInteger MIN_BIG_INTEGER = BigInteger.valueOf(MIN_INTEGER); + + /** + * Encode a signed long to ZigZag-encoded long. + * + * @param value the long value to be encoded + * @return the encoded long value + */ + static long encodeZigZag(long value) { + return (value << 1) ^ (value >> 63); + } + + /** + * Decode a ZigZag-encoded long back to signed long. + * + * @param encoded the encoded long value + * @return the original long value with proper sign + */ + static long decodeZigZag(long encoded) { + return (encoded >>> 1) ^ -(encoded & 0x1); + } + + private static long packInteger(Literal literal, int idType) { + BigInteger value = literal.integerValue(); + if (value.compareTo(MAX_BIG_INTEGER) > 0 || value.compareTo(MIN_BIG_INTEGER) < 0) { + return 0L; + } + return ValueIds.createId(idType, encodeZigZag(value.longValue())); + } + + static long packInteger(Literal literal) { + return packInteger(literal, ValueIds.T_INTEGER); + } + + static long packLong(Literal literal) { + long value = literal.longValue(); + if (value > MAX_INTEGER || value < MIN_INTEGER) { + return 0L; + } + return ValueIds.createId(ValueIds.T_LONG, encodeZigZag(value)); + } + + static long packInt(Literal literal) { + return ValueIds.createId(ValueIds.T_INT, encodeZigZag(literal.intValue())); + } + + static long packShort(Literal literal) { + return ValueIds.createId(ValueIds.T_SHORT, encodeZigZag(literal.shortValue())); + } + + static long packByte(Literal literal) { + return ValueIds.createId(ValueIds.T_BYTE, 0xFF & literal.byteValue()); + } + + static long packUnsignedLong(Literal literal) { + long value = Long.parseUnsignedLong(literal.getLabel()); + if (value > MAX_INTEGER || value < MIN_INTEGER) { + return 0L; + } + return ValueIds.createId(ValueIds.T_UNSIGNEDLONG, encodeZigZag(value)); + } + + static long packUnsignedInt(Literal literal) { + return ValueIds.createId(ValueIds.T_UNSIGNEDINT, encodeZigZag(literal.longValue())); + } + + static long packUnsignedShort(Literal literal) { + return ValueIds.createId(ValueIds.T_UNSIGNEDSHORT, encodeZigZag(literal.intValue())); + } + + static long packUnsignedByte(Literal literal) { + return ValueIds.createId(ValueIds.T_UNSIGNEDBYTE, literal.intValue()); + } + + static long packPositiveInteger(Literal literal) { + return packInteger(literal, ValueIds.T_POSITIVE_INTEGER); + } + + static long packNegativeInteger(Literal literal) { + return packInteger(literal, ValueIds.T_NEGATIVE_INTEGER); + } + + static long packNonNegativeInteger(Literal literal) { + return packInteger(literal, ValueIds.T_NON_NEGATIVE_INTEGER); + } + + static long packNonPositiveInteger(Literal literal) { + return packInteger(literal, ValueIds.T_NON_POSITIVE_INTEGER); + } + + static Literal unpackInteger(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral(BigInteger.valueOf(decoded)); + } + + static Literal unpackLong(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral(decoded); + } + + static Literal unpackInt(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral((int) decoded); + } + + static Literal unpackShort(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral((short) decoded); + } + + static Literal unpackByte(long value, ValueFactory valueFactory) { + return valueFactory.createLiteral((byte) ValueIds.getValue(value)); + } + + static Literal unpackUnsignedLong(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral(Long.toUnsignedString(decoded), XSD.UNSIGNED_LONG); + } + + static Literal unpackUnsignedInt(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral(Integer.toUnsignedString((int) decoded), XSD.UNSIGNED_INT); + } + + static Literal unpackUnsignedShort(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral(Integer.toUnsignedString((int) decoded), XSD.UNSIGNED_SHORT); + } + + static Literal unpackUnsignedByte(long value, ValueFactory valueFactory) { + long decoded = ValueIds.getValue(value); + return valueFactory.createLiteral(Integer.toUnsignedString((int) decoded), XSD.UNSIGNED_BYTE); + } + + static Literal unpackPositiveInteger(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral(Long.toString(decoded), XSD.POSITIVE_INTEGER); + } + + static Literal unpackNegativeInteger(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral(Long.toString(decoded), XSD.NEGATIVE_INTEGER); + } + + static Literal unpackNonNegativeInteger(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral(Long.toString(decoded), XSD.NON_NEGATIVE_INTEGER); + } + + static Literal unpackNonPositiveInteger(long value, ValueFactory valueFactory) { + long decoded = decodeZigZag(ValueIds.getValue(value)); + return valueFactory.createLiteral(Long.toString(decoded), XSD.NON_POSITIVE_INTEGER); + } + +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Strings.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Strings.java new file mode 100644 index 00000000000..3e41900e55c --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Strings.java @@ -0,0 +1,44 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import java.nio.charset.StandardCharsets; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.base.CoreDatatype.XSD; +import org.eclipse.rdf4j.sail.lmdb.ValueIds; + +public class Strings { + + static long packString(Literal literal) { + String label = literal.getLabel(); + if (label.length() > Values.MAX_LENGTH) { + // in any case string is longer than maximum encodable length + return 0L; + } + byte[] bytes = label.getBytes(StandardCharsets.UTF_8); + int maxLength = Values.MAX_LENGTH - 1; + if (bytes.length > maxLength) { + // multibyte string is longer than maximum encodable length + return 0L; + } + + return ValueIds.createId(ValueIds.T_SHORTSTRING, Bytes.packBytes(bytes) << 8 | bytes.length); + } + + static Literal unpackString(long value, ValueFactory valueFactory) { + value = ValueIds.getValue(value); + int length = (int) (value & 0xFF); + String strValue = new String(Bytes.unpackBytes(value >>> 8, length), StandardCharsets.UTF_8); + return valueFactory.createLiteral(strValue, XSD.STRING); + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Values.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Values.java new file mode 100644 index 00000000000..a42d473c1f0 --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Values.java @@ -0,0 +1,93 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import static org.eclipse.rdf4j.sail.lmdb.inlined.Booleans.*; +import static org.eclipse.rdf4j.sail.lmdb.inlined.Dates.*; +import static org.eclipse.rdf4j.sail.lmdb.inlined.Decimals.*; +import static org.eclipse.rdf4j.sail.lmdb.inlined.Integers.*; +import static org.eclipse.rdf4j.sail.lmdb.inlined.Strings.*; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.base.CoreDatatype.XSD; +import org.eclipse.rdf4j.sail.lmdb.ValueIds; + +/** + * Functions for inlining of values into long ids. + */ +public class Values { + /** + * Maximum length of inlined values in bytes. + */ + static int MAX_LENGTH = 7; + + public static long packLiteral(Literal literal) { + XSD xsdDataType = literal.getCoreDatatype().asXSDDatatypeOrNull(); + if (xsdDataType == null) { + return 0L; + } + return switch (xsdDataType) { + case DECIMAL -> packDecimal(literal.decimalValue()); + case DOUBLE -> packDouble(literal.doubleValue()); + case FLOAT -> packFloat(literal.floatValue()); + case INTEGER -> packInteger(literal); + case LONG -> packLong(literal); + case INT -> packInt(literal); + case SHORT -> packShort(literal); + case BYTE -> packByte(literal); + case UNSIGNED_LONG -> packUnsignedLong(literal); + case UNSIGNED_INT -> packUnsignedInt(literal); + case UNSIGNED_SHORT -> packUnsignedShort(literal); + case UNSIGNED_BYTE -> packUnsignedByte(literal); + case POSITIVE_INTEGER -> packPositiveInteger(literal); + case NEGATIVE_INTEGER -> packNegativeInteger(literal); + case NON_NEGATIVE_INTEGER -> packNonNegativeInteger(literal); + case NON_POSITIVE_INTEGER -> packNonPositiveInteger(literal); + case STRING -> packString(literal); + case DATETIME -> packDateTime(literal); + case DATETIMESTAMP -> packDateTimeStamp(literal); + case DATE -> packDate(literal); + case BOOLEAN -> packBoolean(literal); + default -> + // unsupported type + 0L; + }; + } + + public static Literal unpackLiteral(long value, ValueFactory valueFactory) { + int idType = ValueIds.getIdType(value); + return switch (idType) { + case ValueIds.T_DOUBLE -> unpackDouble(value, valueFactory); + case ValueIds.T_DECIMAL -> unpackDecimal(value, valueFactory); + case ValueIds.T_FLOAT -> unpackFloat(value, valueFactory); + case ValueIds.T_INTEGER -> unpackInteger(value, valueFactory); + case ValueIds.T_LONG -> unpackLong(value, valueFactory); + case ValueIds.T_INT -> unpackInt(value, valueFactory); + case ValueIds.T_SHORT -> unpackShort(value, valueFactory); + case ValueIds.T_BYTE -> unpackByte(value, valueFactory); + case ValueIds.T_UNSIGNEDLONG -> unpackUnsignedLong(value, valueFactory); + case ValueIds.T_UNSIGNEDINT -> unpackUnsignedInt(value, valueFactory); + case ValueIds.T_UNSIGNEDSHORT -> unpackUnsignedShort(value, valueFactory); + case ValueIds.T_UNSIGNEDBYTE -> unpackUnsignedByte(value, valueFactory); + case ValueIds.T_POSITIVE_INTEGER -> unpackPositiveInteger(value, valueFactory); + case ValueIds.T_NEGATIVE_INTEGER -> unpackNegativeInteger(value, valueFactory); + case ValueIds.T_NON_NEGATIVE_INTEGER -> unpackNonNegativeInteger(value, valueFactory); + case ValueIds.T_NON_POSITIVE_INTEGER -> unpackNonPositiveInteger(value, valueFactory); + case ValueIds.T_SHORTSTRING -> unpackString(value, valueFactory); + case ValueIds.T_DATETIME -> unpackDateTime(value, valueFactory); + case ValueIds.T_DATETIMESTAMP -> unpackDateTimeStamp(value, valueFactory); + case ValueIds.T_DATE -> unpackDate(value, valueFactory); + case ValueIds.T_BOOLEAN -> unpackBoolean(value, valueFactory); + default -> throw new IllegalArgumentException("Invalid packed value " + value + " with id type: " + idType); + }; + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/DefaultIndexTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/DefaultIndexTest.java index 3b19e49e0fc..40cff25af93 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/DefaultIndexTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/DefaultIndexTest.java @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2021 Eclipse RDF4J contributors. + * Copyright (c) 2026 Eclipse RDF4J contributors. * * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Distribution License v1.0 @@ -10,12 +10,9 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.lmdb; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.File; -import java.io.FileInputStream; -import java.io.InputStream; -import java.util.Properties; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; import org.junit.jupiter.api.Test; @@ -25,33 +22,28 @@ public class DefaultIndexTest { @Test public void testDefaultIndex(@TempDir File dir) throws Exception { - TripleStore store = new TripleStore(dir, new LmdbStoreConfig(), null); + StoreProperties properties = new StoreProperties(dir); + TripleStore store = new TripleStore(dir, properties, new LmdbStoreConfig(), null); store.close(); // check that the triple store used the default index - assertEquals("spoc,posc", findIndex(dir)); + assertEquals("spoc,posc", properties.getTripleIndexes()); LmdbTestUtil.deleteDir(dir); } @Test public void testExistingIndex(@TempDir File dir) throws Exception { // set a non-default index - TripleStore store = new TripleStore(dir, new LmdbStoreConfig("spoc,opsc"), null); + StoreProperties properties = new StoreProperties(dir); + TripleStore store = new TripleStore(dir, properties, new LmdbStoreConfig("spoc,opsc"), null); + properties.save(); store.close(); - String before = findIndex(dir); + properties = new StoreProperties(dir); + properties.load(); + String before = properties.getTripleIndexes(); // check that the index is preserved with a null value - store = new TripleStore(dir, new LmdbStoreConfig(null), null); + store = new TripleStore(dir, properties, new LmdbStoreConfig(null), null); store.close(); - assertEquals(before, findIndex(dir)); + assertEquals(before, properties.getTripleIndexes()); LmdbTestUtil.deleteDir(dir); } - - private String findIndex(File dir) throws Exception { - Properties properties = new Properties(); - try (InputStream in = new FileInputStream(new File(dir, "triples.prop"))) { - properties.clear(); - properties.load(in); - } - return (String) properties.get("triple-indexes"); - } - } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConsistencyIT.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConsistencyIT.java deleted file mode 100644 index b228eedafc5..00000000000 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConsistencyIT.java +++ /dev/null @@ -1,201 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2021 Eclipse RDF4J contributors. - * - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Distribution License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/org/documents/edl-v10.php. - * - * SPDX-License-Identifier: BSD-3-Clause - *******************************************************************************/ -package org.eclipse.rdf4j.sail.lmdb; - -import static org.junit.Assert.assertEquals; - -import java.io.File; -import java.util.List; - -import org.eclipse.rdf4j.common.iteration.Iterations; -import org.eclipse.rdf4j.model.IRI; -import org.eclipse.rdf4j.model.Model; -import org.eclipse.rdf4j.model.Resource; -import org.eclipse.rdf4j.model.Statement; -import org.eclipse.rdf4j.model.Value; -import org.eclipse.rdf4j.model.ValueFactory; -import org.eclipse.rdf4j.model.impl.LinkedHashModel; -import org.eclipse.rdf4j.model.impl.SimpleValueFactory; -import org.eclipse.rdf4j.repository.Repository; -import org.eclipse.rdf4j.repository.RepositoryConnection; -import org.eclipse.rdf4j.repository.sail.SailRepository; -import org.eclipse.rdf4j.repository.util.RDFInserter; -import org.eclipse.rdf4j.repository.util.RDFLoader; -import org.eclipse.rdf4j.rio.RDFFormat; -import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Integration tests for checking Lmdb Store index consistency. - */ -public class LmdbStoreConsistencyIT { - - private static final Logger logger = LoggerFactory.getLogger(LmdbStoreConsistencyIT.class); - - /*-----------* - * Variables * - *-----------*/ - - /*---------* - * Methods * - *---------*/ - - @Test - public void testSES1867IndexCorruption(@TempDir File dataDir) throws Exception { - try { - ValueFactory vf = SimpleValueFactory.getInstance(); - IRI oldContext = vf.createIRI("http://example.org/oldContext"); - IRI newContext = vf.createIRI("http://example.org/newContext"); - - Repository repo = new SailRepository(new LmdbStore(dataDir, new LmdbStoreConfig("spoc,psoc"))); - - try (RepositoryConnection conn = repo.getConnection()) { - // Step1: setup the initial database state - logger.info("Preserving initial state ..."); - conn.begin(); - RDFInserter inserter = new RDFInserter(conn) { - private int count; - - @Override - protected void addStatement(Resource subj, IRI pred, Value obj, Resource ctxt) { - super.addStatement(subj, pred, obj, ctxt); - if (count++ % 1000 == 0) { - con.commit(); - con.begin(); - } - } - }; - RDFLoader loader = new RDFLoader(conn.getParserConfig(), conn.getValueFactory()); - loader.load(getClass().getResourceAsStream("/lmdbstore-testdata/SES-1867/initialState.nq"), "", - RDFFormat.NQUADS, inserter); - conn.commit(); - logger.info("Number of statements: " + conn.size()); - - // Step 2: in a single transaction remove "oldContext", then add - // statements to "newContext" - conn.begin(); - - logger.info("Removing old context"); - conn.remove((Resource) null, (IRI) null, (Value) null, oldContext); - - logger.info("Adding updated context"); - conn.add(getClass().getResourceAsStream("/lmdbstore-testdata/SES-1867/newTriples.nt"), "", - RDFFormat.NTRIPLES, - newContext); - conn.commit(); - - // Step 3: check whether oldContext is actually empty - List stmts = Iterations.asList(conn.getStatements(null, null, null, false, oldContext)); - logger.info("Not deleted statements: " + stmts.size()); - } - repo.shutDown(); - - // Step 4: check the repository size with SPOC only - new File(dataDir, "triples/triples.prop").delete(); // delete triples.prop to - // update index usage - repo = new SailRepository(new LmdbStore(dataDir, new LmdbStoreConfig("spoc"))); - - Model spocStatements; - try (RepositoryConnection conn = repo.getConnection()) { - logger.info("Repository size with SPOC index only: " + conn.size()); - spocStatements = Iterations.addAll(conn.getStatements(null, null, null, false), new LinkedHashModel()); - } - repo.shutDown(); - - // Step 5: check the repository size with PSOC only - new File(dataDir, "triples/triples.prop").delete(); // delete triples.prop to - // update index usage - repo = new SailRepository(new LmdbStore(dataDir, new LmdbStoreConfig("psoc"))); - - Model psocStatements; - try (RepositoryConnection conn = repo.getConnection()) { - logger.info("Repository size with PSOC index only: " + conn.size()); - psocStatements = Iterations.addAll(conn.getStatements(null, null, null, false), new LinkedHashModel()); - } - repo.shutDown(); - - // Step 6: computing the differences of the contents of the indices - logger.info("Computing differences of sets..."); - - Model differenceA = new LinkedHashModel(spocStatements); - differenceA.removeAll(psocStatements); - Model differenceB = new LinkedHashModel(psocStatements); - differenceB.removeAll(spocStatements); - - logger.info("Difference SPOC MINUS PSOC: " + differenceA.size()); - logger.info("Difference PSOC MINUS SPOC: " + differenceB.size()); - - logger.info("Different statements in SPOC MINUS PSOC (Mind the contexts):"); - for (Statement st : differenceA) { - logger.error(" * " + st); - } - - logger.info("Different statements in PSOC MINUS SPOC (Mind the contexts):"); - for (Statement st : differenceB) { - logger.error(" * " + st); - } - - assertEquals(0, differenceA.size()); - assertEquals(0, differenceB.size()); - } finally { - LmdbTestUtil.deleteDir(dataDir); - } - } - - @Test - public void testLargeTransactionStaysConsistentAcrossSpocOspcAndPsoc(@TempDir File dataDir) throws Exception { - try { - Model inserted = new LinkedHashModel(); - ValueFactory vf = SimpleValueFactory.getInstance(); - - for (int i = 0; i < 1537; i++) { - Resource context = i % 5 == 0 ? null : vf.createIRI("urn:context:" + (i % 13)); - inserted.add(vf.createStatement( - vf.createIRI("urn:subject:" + i), - vf.createIRI("urn:predicate:" + (i % 31)), - vf.createIRI("urn:object:" + (i % 43)), - context)); - } - - Repository repo = new SailRepository(new LmdbStore(dataDir, new LmdbStoreConfig("spoc,ospc,psoc"))); - try (RepositoryConnection conn = repo.getConnection()) { - conn.begin(); - conn.add(inserted); - conn.commit(); - } finally { - repo.shutDown(); - } - - Model spocStatements = readAllStatements(dataDir, "spoc"); - Model ospcStatements = readAllStatements(dataDir, "ospc"); - Model psocStatements = readAllStatements(dataDir, "psoc"); - - assertEquals(inserted, spocStatements); - assertEquals(spocStatements, ospcStatements); - assertEquals(spocStatements, psocStatements); - } finally { - LmdbTestUtil.deleteDir(dataDir); - } - } - - private Model readAllStatements(File dataDir, String indexSpec) { - new File(dataDir, "triples/triples.prop").delete(); - Repository repo = new SailRepository(new LmdbStore(dataDir, new LmdbStoreConfig(indexSpec))); - try (RepositoryConnection conn = repo.getConnection()) { - return Iterations.addAll(conn.getStatements(null, null, null, false), new LinkedHashModel()); - } finally { - repo.shutDown(); - } - } -} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreReindexTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreReindexTest.java new file mode 100644 index 00000000000..c0eb65636a5 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreReindexTest.java @@ -0,0 +1,149 @@ +/******************************************************************************* + * Copyright (c) 2021 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.File; +import java.util.List; + +import org.eclipse.rdf4j.common.iteration.Iterations; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Model; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.LinkedHashModel; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.repository.Repository; +import org.eclipse.rdf4j.repository.RepositoryConnection; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.repository.util.RDFInserter; +import org.eclipse.rdf4j.repository.util.RDFLoader; +import org.eclipse.rdf4j.rio.RDFFormat; +import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Integration tests for checking Lmdb Store index consistency. + */ +public class LmdbStoreReindexTest { + + private static final Logger logger = LoggerFactory.getLogger(LmdbStoreReindexTest.class); + + /*-----------* + * Variables * + *-----------*/ + + /*---------* + * Methods * + *---------*/ + + @Test + public void testReindex(@TempDir File dataDir) throws Exception { + ValueFactory vf = SimpleValueFactory.getInstance(); + IRI oldContext = vf.createIRI("http://example.org/oldContext"); + IRI newContext = vf.createIRI("http://example.org/newContext"); + + Repository repo = new SailRepository(new LmdbStore(dataDir, new LmdbStoreConfig("spoc,psoc"))); + + try (RepositoryConnection conn = repo.getConnection()) { + // Step1: setup the initial database state + logger.info("Preserving initial state ..."); + conn.begin(); + RDFInserter inserter = new RDFInserter(conn) { + private int count; + + @Override + protected void addStatement(Resource subj, IRI pred, Value obj, Resource ctxt) { + super.addStatement(subj, pred, obj, ctxt); + if (count++ % 1000 == 0) { + con.commit(); + con.begin(); + } + } + }; + RDFLoader loader = new RDFLoader(conn.getParserConfig(), conn.getValueFactory()); + loader.load(getClass().getResourceAsStream("/lmdbstore-testdata/SES-1867/initialState.nq"), "", + RDFFormat.NQUADS, inserter); + conn.commit(); + logger.info("Number of statements: " + conn.size()); + + // Step 2: in a single transaction remove "oldContext", then add + // statements to "newContext" + conn.begin(); + + logger.info("Removing old context"); + conn.remove((Resource) null, (IRI) null, (Value) null, oldContext); + + logger.info("Adding updated context"); + conn.add(getClass().getResourceAsStream("/lmdbstore-testdata/SES-1867/newTriples.nt"), "", + RDFFormat.NTRIPLES, + newContext); + conn.commit(); + + // Step 3: check whether oldContext is actually empty + List stmts = Iterations.asList(conn.getStatements(null, null, null, false, oldContext)); + logger.info("Not deleted statements: " + stmts.size()); + } + repo.shutDown(); + + // Step 4: check the repository size with SPOC only + // update index usage + repo = new SailRepository(new LmdbStore(dataDir, new LmdbStoreConfig("spoc"))); + + Model spocStatements; + try (RepositoryConnection conn = repo.getConnection()) { + logger.info("Repository size with SPOC index only: " + conn.size()); + spocStatements = Iterations.addAll(conn.getStatements(null, null, null, false), new LinkedHashModel()); + } + repo.shutDown(); + + // Step 5: check the repository size with PSOC only + // update index usage + repo = new SailRepository(new LmdbStore(dataDir, new LmdbStoreConfig("psoc"))); + + Model psocStatements; + try (RepositoryConnection conn = repo.getConnection()) { + logger.info("Repository size with PSOC index only: " + conn.size()); + psocStatements = Iterations.addAll(conn.getStatements(null, null, null, false), new LinkedHashModel()); + } + repo.shutDown(); + + // Step 6: computing the differences of the contents of the indices + logger.info("Computing differences of sets..."); + + Model differenceA = new LinkedHashModel(spocStatements); + differenceA.removeAll(psocStatements); + Model differenceB = new LinkedHashModel(psocStatements); + differenceB.removeAll(spocStatements); + + logger.info("Difference SPOC MINUS PSOC: " + differenceA.size()); + logger.info("Difference PSOC MINUS SPOC: " + differenceB.size()); + + logger.info("Different statements in SPOC MINUS PSOC (Mind the contexts):"); + for (Statement st : differenceA) { + logger.error(" * " + st); + } + + logger.info("Different statements in PSOC MINUS SPOC (Mind the contexts):"); + for (Statement st : differenceB) { + logger.error(" * " + st); + } + + assertEquals(0, differenceA.size()); + assertEquals(0, differenceB.size()); + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/RemoveAddTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/RemoveAddTest.java new file mode 100644 index 00000000000..fe66465a472 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/RemoveAddTest.java @@ -0,0 +1,123 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ + +package org.eclipse.rdf4j.sail.lmdb; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.util.List; + +import org.eclipse.rdf4j.common.iteration.Iterations; +import org.eclipse.rdf4j.common.transaction.IsolationLevels; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.vocabulary.FOAF; +import org.eclipse.rdf4j.model.vocabulary.RDF; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; +import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.rules.TemporaryFolder; + +/** + * Tests that values and statements correctly exists after (partial) removal and addition. + */ +public class RemoveAddTest { + + private static SailRepository repository; + + public static TemporaryFolder tempDir = new TemporaryFolder(); + static List statementList; + + @BeforeAll + public static void beforeClass() throws IOException { + tempDir.create(); + File file = tempDir.newFolder(); + + LmdbStoreConfig config = new LmdbStoreConfig("spoc,ospc,psoc"); + repository = new SailRepository(new LmdbStore(file, config)); + } + + @AfterAll + public static void afterClass() { + repository.shutDown(); + tempDir.delete(); + tempDir = null; + repository = null; + statementList = null; + } + + @Test + public void removeAndAdd() { + IRI alice; + IRI bob; + int expectedTypeStatements; + + try (SailRepositoryConnection connection = repository.getConnection()) { + connection.begin(IsolationLevels.NONE); + + ValueFactory vf = connection.getValueFactory(); + alice = vf.createIRI("urn:person:alice"); + bob = vf.createIRI("urn:person:bob"); + + connection.add(alice, RDF.TYPE, FOAF.PERSON); + connection.add(alice, FOAF.NAME, vf.createLiteral("Alice")); + connection.add(alice, FOAF.AGE, vf.createLiteral(34)); + connection.add(alice, FOAF.MBOX, vf.createIRI("mailto:alice@example.org")); + connection.add(alice, FOAF.KNOWS, bob); + + connection.add(bob, RDF.TYPE, FOAF.PERSON); + connection.add(bob, FOAF.NAME, vf.createLiteral("Bob")); + connection.add(bob, FOAF.AGE, vf.createLiteral(29)); + connection.add(bob, FOAF.MBOX, vf.createIRI("mailto:bob@example.org")); + + connection.commit(); + + expectedTypeStatements = Iterations.asList(connection.getStatements(null, RDF.TYPE, null, false)).size(); + } + + try (SailRepositoryConnection connection = repository.getConnection()) { + statementList = Iterations.asList(connection.getStatements(null, RDF.TYPE, null, false)); + } + + try (SailRepositoryConnection connection = repository.getConnection()) { + connection.begin(IsolationLevels.NONE); + connection.remove((Resource) null, RDF.TYPE, null); + connection.commit(); + connection.begin(IsolationLevels.NONE); + connection.add(statementList); + connection.commit(); + } + + try (SailRepositoryConnection connection = repository.getConnection()) { + int typeStatementsAfterRestore = Iterations + .asList(connection.getStatements(null, RDF.TYPE, null, false)) + .size(); + + assertEquals(expectedTypeStatements, typeStatementsAfterRestore, + "rdf:type statements should be restored after remove-and-add"); + assertTrue(connection.hasStatement(alice, RDF.TYPE, FOAF.PERSON, false)); + assertTrue(connection.hasStatement(alice, FOAF.NAME, null, false)); + assertTrue(connection.hasStatement(alice, FOAF.AGE, null, false)); + assertTrue(connection.hasStatement(alice, FOAF.KNOWS, bob, false)); + assertTrue(connection.hasStatement(bob, RDF.TYPE, FOAF.PERSON, false)); + assertTrue(connection.hasStatement(bob, FOAF.NAME, null, false)); + assertTrue(connection.hasStatement(bob, FOAF.AGE, null, false)); + } + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/SailSourceModelTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/SailSourceModelTest.java index db9eac7791e..92fb034cb7e 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/SailSourceModelTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/SailSourceModelTest.java @@ -60,7 +60,9 @@ public void testGetStatements_ConcurrentModificationOfModel() { protected SailSourceModel getNewModel() { try { File dataDir = Files.createTempDirectory("SailSourceModelTest-").toFile(); - LmdbSailStore store = new LmdbSailStore(dataDir, new LmdbStoreConfig("spoc")); + LmdbSailStore store = new LmdbSailStore(dataDir, + new StoreProperties(), + new LmdbStoreConfig("spoc")); stores.add(store); storeDirs.add(dataDir); return new SailSourceModel(store); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/StorePropertiesTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/StorePropertiesTest.java new file mode 100644 index 00000000000..9e5a4b62b8c --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/StorePropertiesTest.java @@ -0,0 +1,39 @@ +/******************************************************************************* + * Copyright (c) 2026 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Some portions generated by Co-Pilot +package org.eclipse.rdf4j.sail.lmdb; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.File; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +class StorePropertiesTest { + + @Test + void saveAndLoadRoundTripsPersistedValues(@TempDir File dir) { + StoreProperties properties = new StoreProperties(dir) + .setVersion("1") + .setTripleIndexes("spoc,posc"); + + properties.save(); + + StoreProperties loaded = new StoreProperties(dir); + loaded.load(); + + assertTrue(new File(dir, "store.properties").isFile()); + assertEquals("1", loaded.getVersion()); + assertEquals("spoc,posc", loaded.getTripleIndexes()); + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TestLmdbStoreUpgrade.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TestLmdbStoreUpgrade.java deleted file mode 100644 index dc105278c28..00000000000 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TestLmdbStoreUpgrade.java +++ /dev/null @@ -1,73 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2021 Eclipse RDF4J contributors. - * - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Distribution License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/org/documents/edl-v10.php. - * - * SPDX-License-Identifier: BSD-3-Clause - *******************************************************************************/ -package org.eclipse.rdf4j.sail.lmdb; - -import static org.junit.Assert.assertTrue; - -import java.io.File; - -import org.eclipse.rdf4j.common.iteration.CloseableIteration; -import org.eclipse.rdf4j.model.Statement; -import org.eclipse.rdf4j.model.ValueFactory; -import org.eclipse.rdf4j.model.vocabulary.RDF; -import org.eclipse.rdf4j.model.vocabulary.RDFS; -import org.eclipse.rdf4j.sail.NotifyingSailConnection; -import org.eclipse.rdf4j.sail.SailException; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -/** - * - */ -public class TestLmdbStoreUpgrade { - - @Test - public void testDevel(@TempDir File dataDir) throws SailException { - LmdbStore store = new LmdbStore(dataDir); - try { - store.init(); - try (NotifyingSailConnection con = store.getConnection()) { - ValueFactory vf = store.getValueFactory(); - con.begin(); - con.addStatement(RDF.VALUE, RDFS.LABEL, vf.createLiteral("value")); - con.commit(); - } - } finally { - store.shutDown(); - } - new File(dataDir, "lmdbrdf.ver").delete(); - try { - assertValue(dataDir); - assertTrue(new File(dataDir, "lmdbrdf.ver").exists()); - } finally { - LmdbTestUtil.deleteDir(dataDir); - } - } - - public void assertValue(File dataDir) throws SailException { - LmdbStore store = new LmdbStore(dataDir); - try { - store.init(); - try (NotifyingSailConnection con = store.getConnection()) { - ValueFactory vf = store.getValueFactory(); - CloseableIteration iter; - iter = con.getStatements(RDF.VALUE, RDFS.LABEL, vf.createLiteral("value"), false); - try { - assertTrue(iter.hasNext()); - } finally { - iter.close(); - } - } - } finally { - store.shutDown(); - } - } -} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleIndexToKeyCacheTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleIndexToKeyCacheTest.java index 669d722d10b..9cf4a732718 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleIndexToKeyCacheTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleIndexToKeyCacheTest.java @@ -53,7 +53,9 @@ void spoc_subjectBound_othersWildcard() throws Exception { long obj = Long.MAX_VALUE; long context = Long.MAX_VALUE; + tripleStore.startTransaction(); TripleStore.TripleIndex index = tripleStore.new TripleIndex("spoc"); + tripleStore.endTransaction(true); int len = Varint.calcListLengthUnsigned(subj, pred, obj, context); ByteBuffer actual = ByteBuffer.allocate(len); @@ -79,7 +81,9 @@ void posc_predicateBound_othersWildcard() throws Exception { long obj = Long.MAX_VALUE; long context = Long.MAX_VALUE; + tripleStore.startTransaction(); TripleStore.TripleIndex index = tripleStore.new TripleIndex("posc"); + tripleStore.endTransaction(true); int len = Varint.calcListLengthUnsigned(subj, pred, obj, context); ByteBuffer actual = ByteBuffer.allocate(len); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/ValueStoreTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/ValueStoreTest.java index 68310a0a514..e8d8881aae8 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/ValueStoreTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/ValueStoreTest.java @@ -8,7 +8,7 @@ * * SPDX-License-Identifier: BSD-3-Clause *******************************************************************************/ -// Some portions generated by Codex +// Some portions generated by Co-Pilot package org.eclipse.rdf4j.sail.lmdb; import static org.junit.Assert.assertEquals; @@ -32,12 +32,16 @@ import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.Model; +import org.eclipse.rdf4j.model.Resource; import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.util.ModelBuilder; import org.eclipse.rdf4j.model.util.Values; import org.eclipse.rdf4j.model.vocabulary.RDF; import org.eclipse.rdf4j.model.vocabulary.RDFS; import org.eclipse.rdf4j.model.vocabulary.XSD; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; +import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreSchema; import org.eclipse.rdf4j.sail.lmdb.model.LmdbBNode; import org.eclipse.rdf4j.sail.lmdb.model.LmdbIRI; import org.eclipse.rdf4j.sail.lmdb.model.LmdbLiteral; @@ -53,6 +57,8 @@ */ public class ValueStoreTest { + private static final IRI INLINE_LITERALS = Values.iri(LmdbStoreSchema.NAMESPACE + "inlineLiterals"); + private ValueStore valueStore; private File dataDir; @@ -74,6 +80,31 @@ private LmdbStoreConfig hashCacheEnabledConfig() { return new LmdbStoreConfig().setValueHashCacheEnabled(true); } + @Test + public void testDisableInlineLiteralsUsesStoredIds() throws Exception { + valueStore.close(); + + LmdbStoreConfig config = new LmdbStoreConfig(); + Resource implNode = Values.bnode(); + Model configModel = new ModelBuilder() + .add(implNode, INLINE_LITERALS, Values.literal(false)) + .build(); + + config.parse(configModel, implNode); + assertFalse("inline literals should be disabled after parsing config", + config.getInlineLiterals()); + + valueStore = createValueStore(config); + + Literal literal = Values.literal("inline"); + valueStore.startTransaction(true); + long id = valueStore.storeValue(literal); + valueStore.commit(); + + assertFalse("small literals should not be inlined when disabled", ValueIds.isInlined(id)); + assertEquals(literal, valueStore.getValue(id)); + } + @Test public void testGcValues() throws Exception { Value values[] = new Value[] { @@ -100,6 +131,7 @@ public void testGcValues() throws Exception { for (int i = 0; i < values.length; i++) { Assert.assertEquals(LmdbValue.UNKNOWN_ID, valueStore.getId(values[i])); + // access to value must be ensured as long as revision is not invalidated Assert.assertTrue(valueStore.getValue(ids[i]) != null); } @@ -112,7 +144,8 @@ public void testGcValues() throws Exception { for (int i = 0; i < values.length; i++) { Assert.assertEquals(LmdbValue.UNKNOWN_ID, valueStore.getId(values[i])); - Assert.assertTrue(valueStore.getValue(ids[i]) != null); + // value should be removed after invalidating the revision + Assert.assertTrue(valueStore.getValue(ids[i]) == null); } valueStore.startTransaction(true); @@ -163,33 +196,35 @@ public void testGcValuesAfterRestart() throws Exception { @Test public void testGcDatatypes() throws Exception { - IRI[] types = new IRI[] { XSD.STRING, XSD.INTEGER, XSD.DOUBLE, XSD.DECIMAL, XSD.FLOAT }; + IRI[] types = new IRI[] { XSD.STRING, XSD.INTEGER, XSD.LONG, XSD.DECIMAL }; LmdbValue values[] = new LmdbValue[types.length]; valueStore.startTransaction(true); for (int i = 0; i < values.length; i++) { - values[i] = valueStore.createLiteral("123", types[i]); + // use a value that is large enough to not being inlined + values[i] = valueStore.createLiteral(Long.toString(Long.MAX_VALUE - 1), types[i]); valueStore.storeValue(values[i]); } valueStore.commit(); valueStore.startTransaction(true); List datatypeIds = new LinkedList<>(); - for (int i = 1; i < types.length; i++) { + for (int i = 0; i < types.length; i++) { datatypeIds.add(valueStore.storeValue(types[i])); } valueStore.commit(); valueStore.startTransaction(true); valueStore.gcIds(Collections.singleton(values[0].getInternalID()), new HashSet<>()); - valueStore.gcIds(datatypeIds, new HashSet<>()); + valueStore.gcIds(datatypeIds.subList(1, datatypeIds.size() - 1), new HashSet<>()); valueStore.commit(); // close and recreate store valueStore.close(); valueStore = createValueStore(); + // the first value is directly GCed assertNull(valueStore.getValue(values[0].getInternalID())); - // the first datatype is not directly garbage collected and must not be + // the first datatype is not directly GCed and must not be // removed from the store if the related literal is removed assertNotNull(valueStore.getValue(datatypeIds.remove(0))); @@ -208,7 +243,8 @@ public void testGcDatatypes() throws Exception { public void testGcURIs() throws Exception { for (boolean storeAndGcUri : List.of(false, true)) { valueStore.startTransaction(true); - LmdbLiteral literal = valueStore.createLiteral("123", XSD.STRING); + // use a value that is large enough to not being inlined + LmdbLiteral literal = valueStore.createLiteral("123".repeat(5), XSD.STRING); valueStore.storeValue(literal); if (storeAndGcUri) { valueStore.storeValue(XSD.STRING); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfigTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfigTest.java index 813192bc3af..36e340367c7 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfigTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/config/LmdbStoreConfigTest.java @@ -8,7 +8,7 @@ * * SPDX-License-Identifier: BSD-3-Clause *******************************************************************************/ -// Some portions generated by Codex +// Some portions generated by Co-Pilot package org.eclipse.rdf4j.sail.lmdb.config; import static org.assertj.core.api.Assertions.assertThat; @@ -38,6 +38,8 @@ class LmdbStoreConfigTest { private static final IRI NO_READAHEAD = Values.iri(LmdbStoreSchema.NAMESPACE + "noReadahead"); + private static final IRI INLINE_LITERALS = Values.iri(LmdbStoreSchema.NAMESPACE + "inlineLiterals"); + @Test void pageCardinalityEstimatorDefaultsToEnabled() { assertThat(new LmdbStoreConfig().getPageCardinalityEstimator()).isTrue(); @@ -58,6 +60,11 @@ void valueHashCacheDefaultsToDisabled() { assertThat(new LmdbStoreConfig().getValueHashCacheEnabled()).isFalse(); } + @Test + void inlineLiteralsDefaultsToEnabled() { + assertThat(new LmdbStoreConfig().getInlineLiterals()).isTrue(); + } + @ParameterizedTest @ValueSource(booleans = { true, false }) void testThatLmdbStoreConfigParseAndExportNoReadahead(final boolean noReadahead) { @@ -106,6 +113,18 @@ void testThatLmdbStoreConfigParseAndExportValueHashCacheEnabled(final boolean va ); } + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void testThatLmdbStoreConfigParseAndExportInlineLiterals(final boolean inlineLiterals) { + testParseAndExport( + INLINE_LITERALS, + Values.literal(inlineLiterals), + LmdbStoreConfig::getInlineLiterals, + inlineLiterals, + !inlineLiterals + ); + } + @ParameterizedTest @ValueSource(booleans = { true, false }) void testThatLmdbStoreConfigParseAndExportAutoGrow(final boolean autoGrow) { diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/BooleansTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/BooleansTest.java new file mode 100644 index 00000000000..7a785e86752 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/BooleansTest.java @@ -0,0 +1,60 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.sail.lmdb.ValueIds; +import org.junit.jupiter.api.Test; + +class BooleansTest { + + private final ValueFactory valueFactory = SimpleValueFactory.getInstance(); + + @Test + void testPackBooleanTrue() { + Literal trueLiteral = valueFactory.createLiteral(true); + long expectedId = ValueIds.createId(ValueIds.T_BOOLEAN, 1L); + long actualId = Booleans.packBoolean(trueLiteral); + + assertEquals(expectedId, actualId, "Packing true literal should return the correct ID"); + } + + @Test + void testPackBooleanFalse() { + Literal falseLiteral = valueFactory.createLiteral(false); + long expectedId = ValueIds.createId(ValueIds.T_BOOLEAN, 0L); + long actualId = Booleans.packBoolean(falseLiteral); + + assertEquals(expectedId, actualId, "Packing false literal should return the correct ID"); + } + + @Test + void testUnpackBooleanTrue() { + long trueId = ValueIds.createId(ValueIds.T_BOOLEAN, 1L); + Literal expectedLiteral = valueFactory.createLiteral(true); + Literal actualLiteral = Booleans.unpackBoolean(trueId, valueFactory); + + assertEquals(expectedLiteral, actualLiteral, "Unpacking ID for true should return true literal"); + } + + @Test + void testUnpackBooleanFalse() { + long falseId = ValueIds.createId(ValueIds.T_BOOLEAN, 0L); + Literal expectedLiteral = valueFactory.createLiteral(false); + Literal actualLiteral = Booleans.unpackBoolean(falseId, valueFactory); + + assertEquals(expectedLiteral, actualLiteral, "Unpacking ID for false should return false literal"); + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/BytesTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/BytesTest.java new file mode 100644 index 00000000000..147d69c3b10 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/BytesTest.java @@ -0,0 +1,67 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.Test; + +class BytesTest { + + @Test + void testPackBytesWithValidInput() { + byte[] bytes = { 0x01, 0x02, 0x03, 0x04 }; + long expected = 0x01020304L; + assertEquals(expected, Bytes.packBytes(bytes), "Packing bytes should result in the correct long value."); + assertArrayEquals(bytes, Bytes.unpackBytes(expected, 4), + "Unpacking long should result in the correct byte array."); + } + + @Test + void testPackBytesWithEmptyArray() { + byte[] bytes = {}; + long expected = 0L; + assertEquals(expected, Bytes.packBytes(bytes), "Packing an empty array should result in 0L."); + } + + @Test + void testPackBytesWithSingleByte() { + byte[] bytes = { 0x7F }; + long expected = 0x7FL; + assertEquals(expected, Bytes.packBytes(bytes), + "Packing a single byte should result in its long representation."); + } + + @Test + void testUnpackBytesWithZeroLength() { + long value = 0x01020304L; + byte[] expected = {}; + assertArrayEquals(expected, Bytes.unpackBytes(value, 0), + "Unpacking with zero length should result in an empty array."); + } + + @Test + void testUnpackBytesWithSingleByte() { + long value = 0x7FL; + byte[] expected = { 0x7F }; + assertArrayEquals(expected, Bytes.unpackBytes(value, 1), + "Unpacking a single byte should return the correct byte array."); + } + + @Test + void testPackAndUnpackConsistency() { + byte[] originalBytes = { 0x01, 0x02, 0x03, 0x04 }; + long packedValue = Bytes.packBytes(originalBytes); + byte[] unpackedBytes = Bytes.unpackBytes(packedValue, originalBytes.length); + assertArrayEquals(originalBytes, unpackedBytes, + "Packing then unpacking should result in the original byte array."); + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/DecimalsTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/DecimalsTest.java new file mode 100644 index 00000000000..05c0cc445e4 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/DecimalsTest.java @@ -0,0 +1,113 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.math.BigDecimal; +import java.math.BigInteger; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.junit.jupiter.api.Test; + +public class DecimalsTest { + + @Test + void testPackDecimalValid() { + BigDecimal value = new BigDecimal(BigInteger.valueOf(123456), 2); + long packedValue = Decimals.packDecimal(value); + assertNotEquals(0L, packedValue); + } + + @Test + void testPackDecimalInvalidScale() { + BigDecimal value = new BigDecimal(BigInteger.valueOf(123456), Decimals.MAX_DECIMAL_SCALE + 1); + long packedValue = Decimals.packDecimal(value); + assertEquals(0L, packedValue); + } + + @Test + void testPackDecimalInvalidValue() { + BigDecimal value = new BigDecimal(BigInteger.valueOf(Decimals.MAX_DECIMAL_VALUE.longValue() + 1), 2); + long packedValue = Decimals.packDecimal(value); + assertEquals(0L, packedValue); + } + + @Test + void testUnpackDecimal() { + BigDecimal value = new BigDecimal(BigInteger.valueOf(123456), 2); + long packedValue = Decimals.packDecimal(value); + Literal literal = Decimals.unpackDecimal(packedValue, SimpleValueFactory.getInstance()); + assertEquals(value, literal.decimalValue()); + } + + @Test + void testPackDouble() { + double[] values = { + 123.456, // typical positive + 3.14, // small positive + -123.456, // typical negative + -3.14, // small negative + 0, // positive zero + -0.0, // negative zero + 1, // simple positive + -1, // simple negative + Double.NaN, // not-a-number + Double.POSITIVE_INFINITY, // positive infinity + Double.NEGATIVE_INFINITY // negative infinity + }; + + for (double value : values) { + long packedValue = Decimals.packDouble(value); + assertFalse(packedValue < 0, "Packed value should be non-negative for value: " + value); + assertNotEquals(0L, packedValue, "Packing failed for value: " + value); + Literal literal = Decimals.unpackDouble(packedValue, SimpleValueFactory.getInstance()); + if (Double.isNaN(value)) { + assertTrue(Double.isNaN(literal.doubleValue()), "Expected NaN but got: " + literal.doubleValue()); + } else { + assertEquals(value, literal.doubleValue(), 0.0, "Mismatch for value: " + value); + } + } + } + + @Test + void testPackFloat() { + float[] values = { + 123.456f, // typical positive + 3.14f, // small positive + -123.456f, // typical negative + -3.14f, // small negative + 0f, // positive zero + -0.0f, // negative zero + 1f, // simple positive + -1f, // simple negative + Float.NaN, // not-a-number + Float.POSITIVE_INFINITY, // positive infinity + Float.NEGATIVE_INFINITY // negative infinity + }; + + for (float value : values) { + long packedValue = Decimals.packFloat(value); + assertFalse(packedValue < 0, "Packed value should be non-negative for value: " + value); + assertNotEquals(0L, packedValue, "Packing failed for value: " + value); + Literal literal = Decimals.unpackFloat(packedValue, SimpleValueFactory.getInstance()); + if (Float.isNaN(value)) { + assertTrue(Double.isNaN(literal.floatValue()), "Expected NaN but got: " + literal.floatValue()); + } else { + assertEquals(value, literal.floatValue(), 0.0, "Mismatch for value: " + value); + } + } + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/StringsTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/StringsTest.java new file mode 100644 index 00000000000..7c573431286 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/StringsTest.java @@ -0,0 +1,61 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.base.CoreDatatype.XSD; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.junit.jupiter.api.Test; + +class StringsTest { + + @Test + void testPackStringWithinMaxLength() { + ValueFactory valueFactory = SimpleValueFactory.getInstance(); + Literal literal = valueFactory.createLiteral("test", XSD.STRING); + long packed = Strings.packString(literal); + assertFalse(packed < 0, "Packed value should be non-negative for value: " + literal); + + // Assert that the packed value is not 0 + assertNotEquals(0L, packed, "Packed value should not be 0 for valid input."); + } + + @Test + void testPackStringExceedsMaxLength() { + ValueFactory valueFactory = SimpleValueFactory.getInstance(); + // Create a string longer than MAX_LENGTH - 1, one byte is used to encode string length + String longString = "a".repeat(Values.MAX_LENGTH); + Literal literal = valueFactory.createLiteral(longString, XSD.STRING); + long packed = Strings.packString(literal); + assertFalse(packed < 0, "Packed value should be non-negative for value: " + literal); + + // Assert that the packed value is 0 + assertEquals(0L, packed, "Packed value should be 0 for input exceeding max length."); + } + + @Test + void testUnpackString() { + ValueFactory valueFactory = SimpleValueFactory.getInstance(); + Literal literal = valueFactory.createLiteral("test", XSD.STRING); + long packed = Strings.packString(literal); + assertFalse(packed < 0, "Packed value should be non-negative for value: " + literal); + + Literal unpackedLiteral = Strings.unpackString(packed, valueFactory); + + // Assert that the unpacked value matches the original + assertEquals(literal.getLabel(), unpackedLiteral.getLabel(), "Unpacked label should match original."); + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/ValuesTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/ValuesTest.java new file mode 100644 index 00000000000..17725b28a4c --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/ValuesTest.java @@ -0,0 +1,189 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.inlined; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertFalse; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.util.Arrays; +import java.util.List; + +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.vocabulary.XSD; +import org.eclipse.rdf4j.sail.lmdb.Varint; +import org.junit.jupiter.api.Test; + +class ValuesTest { + + private final ValueFactory vf = SimpleValueFactory.getInstance(); + + private List literals = Arrays.asList( + // DECIMAL + vf.createLiteral(BigDecimal.ZERO), + vf.createLiteral(BigDecimal.ONE.negate()), + vf.createLiteral(new BigDecimal("123456789.987654321")), + vf.createLiteral(new BigDecimal("0.00000000000000000001")), + vf.createLiteral(BigDecimal.valueOf(42.42)), + vf.createLiteral(BigDecimal.TEN), + // DOUBLE + vf.createLiteral(Double.NaN), + vf.createLiteral(Double.POSITIVE_INFINITY), + vf.createLiteral(Double.NEGATIVE_INFINITY), + vf.createLiteral(Double.MIN_VALUE), + vf.createLiteral(Double.MAX_VALUE), + // vf.createLiteral(-0.0d), + vf.createLiteral(3.14159d), + vf.createLiteral(2.0d), + vf.createLiteral(7.11d), + // FLOAT + vf.createLiteral(Float.NaN), + vf.createLiteral(Float.POSITIVE_INFINITY), + vf.createLiteral(Float.NEGATIVE_INFINITY), + vf.createLiteral(Float.MIN_VALUE), + vf.createLiteral(Float.MAX_VALUE), + vf.createLiteral(-0.0f), + vf.createLiteral(1.5f), + vf.createLiteral(0.25f), + // INTEGER + vf.createLiteral(BigInteger.ZERO), + vf.createLiteral(BigInteger.ONE.negate()), + vf.createLiteral(BigInteger.valueOf(Long.MAX_VALUE)), + vf.createLiteral(BigInteger.valueOf(Long.MIN_VALUE)), + vf.createLiteral(BigInteger.valueOf(100)), + vf.createLiteral(BigInteger.valueOf(-12345)), + // LONG + vf.createLiteral(Long.MAX_VALUE), + vf.createLiteral(Long.MIN_VALUE), + vf.createLiteral(0L), + vf.createLiteral(123456789L), + // INT + vf.createLiteral(Integer.MAX_VALUE), + vf.createLiteral(Integer.MIN_VALUE), + vf.createLiteral(0), + vf.createLiteral(42), + // SHORT + vf.createLiteral(Short.MAX_VALUE), + vf.createLiteral(Short.MIN_VALUE), + vf.createLiteral((short) 0), + vf.createLiteral((short) 999), + // BYTE + vf.createLiteral(Byte.MAX_VALUE), + vf.createLiteral(Byte.MIN_VALUE), + vf.createLiteral((byte) 0), + vf.createLiteral((byte) 42), + // UNSIGNED_LONG + vf.createLiteral("0", XSD.UNSIGNED_LONG), + vf.createLiteral("18446744073709551615", XSD.UNSIGNED_LONG), // 2^64-1 + vf.createLiteral("123456789", XSD.UNSIGNED_LONG), + // UNSIGNED_INT + vf.createLiteral("0", XSD.UNSIGNED_INT), + vf.createLiteral("4294967295", XSD.UNSIGNED_INT), // 2^32-1 + vf.createLiteral("123456", XSD.UNSIGNED_INT), + // UNSIGNED_SHORT + vf.createLiteral("0", XSD.UNSIGNED_SHORT), + vf.createLiteral("65535", XSD.UNSIGNED_SHORT), // 2^16-1 + vf.createLiteral("12345", XSD.UNSIGNED_SHORT), + // UNSIGNED_BYTE + vf.createLiteral("0", XSD.UNSIGNED_BYTE), + vf.createLiteral("255", XSD.UNSIGNED_BYTE), // 2^8-1 + vf.createLiteral("42", XSD.UNSIGNED_BYTE), + // POSITIVE_INTEGER + vf.createLiteral("1", XSD.POSITIVE_INTEGER), + vf.createLiteral("999999999999999999999999", XSD.POSITIVE_INTEGER), + vf.createLiteral("42", XSD.POSITIVE_INTEGER), + // NEGATIVE_INTEGER + vf.createLiteral("-1", XSD.NEGATIVE_INTEGER), + vf.createLiteral("-999999999999999999999999", XSD.NEGATIVE_INTEGER), + vf.createLiteral("-42", XSD.NEGATIVE_INTEGER), + // NON_NEGATIVE_INTEGER + vf.createLiteral("0", XSD.NON_NEGATIVE_INTEGER), + vf.createLiteral("123456789012345678", XSD.NON_NEGATIVE_INTEGER), + vf.createLiteral("123", XSD.NON_NEGATIVE_INTEGER), + // NON_POSITIVE_INTEGER + vf.createLiteral("0", XSD.NON_POSITIVE_INTEGER), + vf.createLiteral("-123456789012345678", XSD.NON_POSITIVE_INTEGER), + vf.createLiteral("-99", XSD.NON_POSITIVE_INTEGER), + // STRING (short string; edge + standard) + vf.createLiteral("", XSD.STRING), + vf.createLiteral("a", XSD.STRING), + vf.createLiteral("abcdefg", XSD.STRING), // max inlined length + vf.createLiteral("RDF4J", XSD.STRING), + vf.createLiteral("test", XSD.STRING), + // DATETIME + vf.createLiteral(LocalDateTime.of(1970, 1, 1, 0, 0, 0)), + vf.createLiteral(LocalDateTime.of(9999, 12, 31, 23, 59, 59)), + vf.createLiteral(LocalDateTime.of(2020, 2, 29, 12, 0, 0)), + vf.createLiteral(LocalDateTime.of(1999, 12, 31, 23, 59, 59)), + // DATETIMESTAMP + // vf.createLiteral(OffsetDateTime.of(1970, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC)), + // vf.createLiteral(OffsetDateTime.of(9999, 12, 31, 23, 59, 59, 0, ZoneOffset.ofHours(14))), + // vf.createLiteral(OffsetDateTime.of(2000, 1, 1, 0, 0, 0, 0, ZoneOffset.ofHours(-5))), + // DATE + vf.createLiteral(LocalDate.of(1970, 1, 1)), + vf.createLiteral(LocalDate.of(9999, 12, 31)), + vf.createLiteral(LocalDate.of(2024, 6, 13)), + // BOOLEAN + vf.createLiteral(true), + vf.createLiteral(false) + ); + + @Test + void testPackAndUnpack_AllLiteralTypesWithEdgeAndStandardCases() { + for (Literal literal : literals) { + try { + long packed = Values.packLiteral(literal); + assertFalse(packed < 0, "Packed value should be non-negative for value: " + literal); + + // If the literal is not inlined, packed==0. Only test roundtrip if it is inlined. + if (packed != 0L) { + Literal unpacked = Values.unpackLiteral(packed, vf); + assertEqualLiterals(unpacked, literal); + } else { + // (optional) ensure non-inlined values can be detected + assertThat(packed).isZero(); + } + } catch (Throwable e) { + throw new AssertionError("Failed to pack/unpack literal: " + literal, e); + } + } + } + + @Test + void testPackAndUnpack_AllLiteralTypesWithVarintConversion() { + ByteBuffer bb = ByteBuffer.allocate(Long.BYTES + 1); + for (Literal literal : literals) { + long packed = Values.packLiteral(literal); + assertFalse(packed < 0, "Packed value should be non-negative for value: " + literal); + + // If the literal is not inlined, packed==0. Only test roundtrip if it is inlined. + if (packed != 0L) { + bb.clear(); + Varint.writeUnsigned(bb, packed); + bb.flip(); + assertThat(Varint.readUnsigned(bb)).isEqualTo(packed); + } else { + // (optional) ensure non-inlined values can be detected + assertThat(packed).isZero(); + } + } + } + + private void assertEqualLiterals(Literal actual, Literal expected) { + assertThat(actual).isEqualTo(expected); + } +}