From 2b2f92910a91fcec36cf1dc8fdce47400521e1b8 Mon Sep 17 00:00:00 2001 From: Chengxu Bian Date: Fri, 20 Feb 2026 19:43:17 -0500 Subject: [PATCH 01/10] feat: add S3 SAIL module with in-memory storage (Phase 1a+1b) Introduce rdf4j-sail-s3, an S3-backed SAIL using LSM-tree architecture adapted from RisingWave's Hummock engine. This commit implements the module skeleton and in-memory storage layer: - Config: S3StoreConfig, S3StoreFactory, S3StoreSchema - Storage: Varint encoding, QuadIndex permutations, MemTable (ConcurrentSkipListMap) - Value/NS: S3ValueStore (ConcurrentHashMap ID mapping), S3NamespaceStore - Core SAIL: S3Store, S3StoreConnection, S3SailStore with SailSource/Sink/Dataset - SPI registration via META-INF/services Co-Authored-By: Claude Opus 4.6 --- core/sail/pom.xml | 1 + core/sail/s3/pom.xml | 75 +++ .../rdf4j/sail/s3/S3EvaluationStatistics.java | 30 + .../rdf4j/sail/s3/S3NamespaceStore.java | 55 ++ .../eclipse/rdf4j/sail/s3/S3SailStore.java | 566 ++++++++++++++++++ .../org/eclipse/rdf4j/sail/s3/S3Store.java | 228 +++++++ .../rdf4j/sail/s3/S3StoreConnection.java | 127 ++++ .../eclipse/rdf4j/sail/s3/S3ValueStore.java | 89 +++ .../rdf4j/sail/s3/config/S3StoreConfig.java | 280 +++++++++ .../rdf4j/sail/s3/config/S3StoreFactory.java | 61 ++ .../rdf4j/sail/s3/config/S3StoreSchema.java | 78 +++ .../rdf4j/sail/s3/storage/MemTable.java | 253 ++++++++ .../rdf4j/sail/s3/storage/QuadIndex.java | 342 +++++++++++ .../eclipse/rdf4j/sail/s3/storage/Varint.java | 406 +++++++++++++ .../org.eclipse.rdf4j.sail.config.SailFactory | 1 + 15 files changed, 2592 insertions(+) create mode 100644 core/sail/s3/pom.xml create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3EvaluationStatistics.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3NamespaceStore.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3Store.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3StoreConnection.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3ValueStore.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreFactory.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreSchema.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Varint.java create mode 100644 core/sail/s3/src/main/resources/META-INF/services/org.eclipse.rdf4j.sail.config.SailFactory diff --git a/core/sail/pom.xml b/core/sail/pom.xml index c31538770d8..9386f6bd5ac 100644 --- a/core/sail/pom.xml +++ b/core/sail/pom.xml @@ -19,6 +19,7 @@ model shacl lmdb + s3 lucene-api lucene elasticsearch diff --git a/core/sail/s3/pom.xml b/core/sail/s3/pom.xml new file mode 100644 index 00000000000..481535c3268 --- /dev/null +++ b/core/sail/s3/pom.xml @@ -0,0 +1,75 @@ + + + 4.0.0 + + org.eclipse.rdf4j + rdf4j-sail + 6.0.0-SNAPSHOT + + rdf4j-sail-s3 + RDF4J: S3Store + Sail implementation that stores data on S3-compatible object storage using an LSM-tree. + + + ${project.groupId} + rdf4j-sail-base + ${project.version} + + + ${project.groupId} + rdf4j-queryalgebra-evaluation + ${project.version} + + + ${project.groupId} + rdf4j-queryalgebra-model + ${project.version} + + + ${project.groupId} + rdf4j-query + ${project.version} + + + ${project.groupId} + rdf4j-model + ${project.version} + + + io.minio + minio + 8.5.7 + + + org.slf4j + slf4j-api + + + com.google.guava + guava + + + ${project.groupId} + rdf4j-sail-testsuite + ${project.version} + test + + + ${project.groupId} + rdf4j-repository-testsuite + ${project.version} + test + + + ${project.groupId} + rdf4j-repository-sail + ${project.version} + test + + + org.junit.jupiter + junit-jupiter-params + test + + + diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3EvaluationStatistics.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3EvaluationStatistics.java new file mode 100644 index 00000000000..7660bb5de72 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3EvaluationStatistics.java @@ -0,0 +1,30 @@ +/******************************************************************************* + * Copyright (c) 2024 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3; + +import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics; + +/** + * Evaluation statistics for the S3 sail. Currently uses the base class's default cardinality estimation. This can be + * enhanced later to query the actual storage for more accurate estimates. + */ +class S3EvaluationStatistics extends EvaluationStatistics { + + @Override + protected CardinalityCalculator createCardinalityCalculator() { + return new S3CardinalityCalculator(); + } + + protected class S3CardinalityCalculator extends CardinalityCalculator { + // Uses the default cardinality estimation from the base class. + // Can be enhanced to consult S3ValueStore and storage for accurate estimates. + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3NamespaceStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3NamespaceStore.java new file mode 100644 index 00000000000..2b1a114b066 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3NamespaceStore.java @@ -0,0 +1,55 @@ +/******************************************************************************* + * Copyright (c) 2024 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3; + +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.Map; + +import org.eclipse.rdf4j.model.impl.SimpleNamespace; + +/** + * In-memory store for namespace prefix information. All operations are synchronized for thread safety. + */ +class S3NamespaceStore implements Iterable { + + private final Map namespacesMap = new LinkedHashMap<>(16); + + public synchronized String getNamespace(String prefix) { + SimpleNamespace namespace = namespacesMap.get(prefix); + return namespace != null ? namespace.getName() : null; + } + + public synchronized void setNamespace(String prefix, String name) { + SimpleNamespace ns = namespacesMap.get(prefix); + if (ns != null) { + if (!ns.getName().equals(name)) { + ns.setName(name); + } + } else { + namespacesMap.put(prefix, new SimpleNamespace(prefix, name)); + } + } + + public synchronized void removeNamespace(String prefix) { + namespacesMap.remove(prefix); + } + + @Override + public synchronized Iterator iterator() { + // return a snapshot to avoid ConcurrentModificationException + return new LinkedHashMap<>(namespacesMap).values().iterator(); + } + + public synchronized void clear() { + namespacesMap.clear(); + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java new file mode 100644 index 00000000000..c3791412f35 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java @@ -0,0 +1,566 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.locks.ReentrantLock; + +import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.common.iteration.CloseableIteratorIteration; +import org.eclipse.rdf4j.common.iteration.ConvertingIteration; +import org.eclipse.rdf4j.common.iteration.EmptyIteration; +import org.eclipse.rdf4j.common.iteration.FilterIteration; +import org.eclipse.rdf4j.common.iteration.UnionIteration; +import org.eclipse.rdf4j.common.order.StatementOrder; +import org.eclipse.rdf4j.common.transaction.IsolationLevel; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Namespace; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics; +import org.eclipse.rdf4j.sail.SailException; +import org.eclipse.rdf4j.sail.base.BackingSailSource; +import org.eclipse.rdf4j.sail.base.SailDataset; +import org.eclipse.rdf4j.sail.base.SailSink; +import org.eclipse.rdf4j.sail.base.SailSource; +import org.eclipse.rdf4j.sail.base.SailStore; +import org.eclipse.rdf4j.sail.s3.config.S3StoreConfig; +import org.eclipse.rdf4j.sail.s3.storage.MemTable; +import org.eclipse.rdf4j.sail.s3.storage.QuadIndex; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * In-memory {@link SailStore} implementation that stores RDF quads in {@link MemTable}s. Each configured index + * permutation gets its own MemTable for efficient query patterns. + * + *

+ * This is the Phase 1b in-memory-only implementation. Later phases will add SSTable persistence and S3 integration. + *

+ */ +class S3SailStore implements SailStore { + + final Logger logger = LoggerFactory.getLogger(S3SailStore.class); + + private final S3ValueStore valueStore; + private final S3NamespaceStore namespaceStore; + private final List indexes; + private final List memTables; + private volatile boolean mayHaveInferred; + + /** + * A lock to control concurrent access by {@link S3SailSink} to the stores. + */ + private final ReentrantLock sinkStoreAccessLock = new ReentrantLock(); + + S3SailStore(S3StoreConfig config) { + this.valueStore = new S3ValueStore(); + this.namespaceStore = new S3NamespaceStore(); + + // Parse index specifications from config + String indexSpec = config.getQuadIndexes(); + Set indexSpecs = QuadIndex.parseIndexSpecList(indexSpec); + this.indexes = new ArrayList<>(indexSpecs.size()); + this.memTables = new ArrayList<>(indexSpecs.size()); + for (String spec : indexSpecs) { + QuadIndex qi = new QuadIndex(spec); + indexes.add(qi); + memTables.add(new MemTable(qi)); + } + + if (indexes.isEmpty()) { + // Fallback: always ensure at least one index + QuadIndex defaultIndex = new QuadIndex("spoc"); + indexes.add(defaultIndex); + memTables.add(new MemTable(defaultIndex)); + } + } + + @Override + public ValueFactory getValueFactory() { + return valueStore; + } + + @Override + public EvaluationStatistics getEvaluationStatistics() { + return new S3EvaluationStatistics(); + } + + @Override + public SailSource getExplicitSailSource() { + return new S3SailSource(true); + } + + @Override + public SailSource getInferredSailSource() { + return new S3SailSource(false); + } + + @Override + public void close() throws SailException { + valueStore.close(); + for (MemTable mt : memTables) { + mt.clear(); + } + } + + /** + * Selects the best MemTable for the given query pattern. + */ + private int getBestIndex(long subj, long pred, long obj, long context) { + int bestScore = -1; + int bestIdx = 0; + for (int i = 0; i < indexes.size(); i++) { + int score = indexes.get(i).getPatternScore(subj, pred, obj, context); + if (score > bestScore) { + bestScore = score; + bestIdx = i; + } + } + return bestIdx; + } + + /** + * Creates a statement iterator for the given pattern. + */ + CloseableIteration createStatementIterator( + Resource subj, IRI pred, Value obj, boolean explicit, Resource... contexts) { + + if (!explicit && !mayHaveInferred) { + return new EmptyIteration<>(); + } + + long subjID = S3ValueStore.UNKNOWN_ID; + if (subj != null) { + subjID = valueStore.getId(subj); + if (subjID == S3ValueStore.UNKNOWN_ID) { + return new EmptyIteration<>(); + } + } + + long predID = S3ValueStore.UNKNOWN_ID; + if (pred != null) { + predID = valueStore.getId(pred); + if (predID == S3ValueStore.UNKNOWN_ID) { + return new EmptyIteration<>(); + } + } + + long objID = S3ValueStore.UNKNOWN_ID; + if (obj != null) { + objID = valueStore.getId(obj); + if (objID == S3ValueStore.UNKNOWN_ID) { + return new EmptyIteration<>(); + } + } + + List contextIDList = new ArrayList<>(contexts.length == 0 ? 1 : contexts.length); + if (contexts.length == 0) { + contextIDList.add(S3ValueStore.UNKNOWN_ID); + } else { + for (Resource context : contexts) { + if (context == null) { + contextIDList.add(0L); + } else if (!context.isTriple()) { + long contextID = valueStore.getId(context); + if (contextID != S3ValueStore.UNKNOWN_ID) { + contextIDList.add(contextID); + } + } + } + } + + if (contextIDList.isEmpty()) { + return new EmptyIteration<>(); + } + + int bestIdx = getBestIndex(subjID, predID, objID, + contextIDList.size() == 1 ? contextIDList.get(0) : S3ValueStore.UNKNOWN_ID); + MemTable bestTable = memTables.get(bestIdx); + + ArrayList> perContextIterList = new ArrayList<>(contextIDList.size()); + + for (long contextID : contextIDList) { + Iterator quads = bestTable.scan(subjID, predID, objID, contextID, explicit); + perContextIterList.add(new QuadToStatementIteration(quads, valueStore)); + } + + if (perContextIterList.size() == 1) { + return perContextIterList.get(0); + } else { + return new UnionIteration<>(perContextIterList); + } + } + + // ========================================================================= + // Inner classes + // ========================================================================= + + private final class S3SailSource extends BackingSailSource { + + private final boolean explicit; + + S3SailSource(boolean explicit) { + this.explicit = explicit; + } + + @Override + public SailSource fork() { + throw new UnsupportedOperationException("This store does not support multiple datasets"); + } + + @Override + public SailSink sink(IsolationLevel level) throws SailException { + return new S3SailSink(explicit); + } + + @Override + public SailDataset dataset(IsolationLevel level) throws SailException { + return new S3SailDataset(explicit); + } + } + + private final class S3SailSink implements SailSink { + + private final boolean explicit; + + S3SailSink(boolean explicit) { + this.explicit = explicit; + } + + @Override + public void close() { + // no-op + } + + @Override + public void prepare() throws SailException { + // serializable is not supported at this level + } + + @Override + public void flush() throws SailException { + sinkStoreAccessLock.lock(); + try { + // In-memory only: nothing to persist yet. + // In later phases this will flush MemTable to SSTable and upload to S3. + } finally { + sinkStoreAccessLock.unlock(); + } + } + + @Override + public void setNamespace(String prefix, String name) throws SailException { + sinkStoreAccessLock.lock(); + try { + namespaceStore.setNamespace(prefix, name); + } finally { + sinkStoreAccessLock.unlock(); + } + } + + @Override + public void removeNamespace(String prefix) throws SailException { + sinkStoreAccessLock.lock(); + try { + namespaceStore.removeNamespace(prefix); + } finally { + sinkStoreAccessLock.unlock(); + } + } + + @Override + public void clearNamespaces() throws SailException { + sinkStoreAccessLock.lock(); + try { + namespaceStore.clear(); + } finally { + sinkStoreAccessLock.unlock(); + } + } + + @Override + public void observe(Resource subj, IRI pred, Value obj, Resource... contexts) throws SailException { + // serializable is not supported at this level + } + + @Override + public void clear(Resource... contexts) throws SailException { + removeStatements(null, null, null, explicit, contexts); + } + + @Override + public void approve(Resource subj, IRI pred, Value obj, Resource ctx) throws SailException { + addStatement(subj, pred, obj, explicit, ctx); + } + + @Override + public void approveAll(Set approved, Set approvedContexts) { + sinkStoreAccessLock.lock(); + try { + for (Statement statement : approved) { + Resource subj = statement.getSubject(); + IRI pred = statement.getPredicate(); + Value obj = statement.getObject(); + Resource context = statement.getContext(); + + long s = valueStore.storeValue(subj); + long p = valueStore.storeValue(pred); + long o = valueStore.storeValue(obj); + long c = context == null ? 0 : valueStore.storeValue(context); + + if (!explicit) { + mayHaveInferred = true; + } + + for (MemTable mt : memTables) { + mt.put(s, p, o, c, explicit); + } + } + } finally { + sinkStoreAccessLock.unlock(); + } + } + + @Override + public void deprecate(Statement statement) throws SailException { + removeStatements(statement.getSubject(), statement.getPredicate(), statement.getObject(), explicit, + statement.getContext()); + } + + @Override + public boolean deprecateByQuery(Resource subj, IRI pred, Value obj, Resource[] contexts) { + return removeStatements(subj, pred, obj, explicit, contexts) > 0; + } + + @Override + public boolean supportsDeprecateByQuery() { + return true; + } + + private void addStatement(Resource subj, IRI pred, Value obj, boolean explicit, Resource context) { + sinkStoreAccessLock.lock(); + try { + long s = valueStore.storeValue(subj); + long p = valueStore.storeValue(pred); + long o = valueStore.storeValue(obj); + long c = context == null ? 0 : valueStore.storeValue(context); + + if (!explicit) { + mayHaveInferred = true; + } + + for (MemTable mt : memTables) { + mt.put(s, p, o, c, explicit); + } + } finally { + sinkStoreAccessLock.unlock(); + } + } + + private long removeStatements(Resource subj, IRI pred, Value obj, boolean explicit, Resource... contexts) { + Objects.requireNonNull(contexts, + "contexts argument may not be null; either the value should be cast to Resource or an empty array should be supplied"); + + sinkStoreAccessLock.lock(); + try { + final long subjID; + if (subj != null) { + subjID = valueStore.getId(subj); + if (subjID == S3ValueStore.UNKNOWN_ID) { + return 0; + } + } else { + subjID = S3ValueStore.UNKNOWN_ID; + } + + final long predID; + if (pred != null) { + predID = valueStore.getId(pred); + if (predID == S3ValueStore.UNKNOWN_ID) { + return 0; + } + } else { + predID = S3ValueStore.UNKNOWN_ID; + } + + final long objID; + if (obj != null) { + objID = valueStore.getId(obj); + if (objID == S3ValueStore.UNKNOWN_ID) { + return 0; + } + } else { + objID = S3ValueStore.UNKNOWN_ID; + } + + final long[] contextIds; + if (contexts.length == 0) { + contextIds = new long[] { S3ValueStore.UNKNOWN_ID }; + } else { + contextIds = new long[contexts.length]; + for (int i = 0; i < contexts.length; i++) { + Resource context = contexts[i]; + if (context == null) { + contextIds[i] = 0; + } else { + long id = valueStore.getId(context); + contextIds[i] = (id != S3ValueStore.UNKNOWN_ID) ? id : Long.MAX_VALUE; + } + } + } + + // Use the first MemTable as the source of truth for scanning, then remove from all + int bestIdx = getBestIndex(subjID, predID, objID, + contextIds.length == 1 ? contextIds[0] : S3ValueStore.UNKNOWN_ID); + MemTable scanTable = memTables.get(bestIdx); + + long removeCount = 0; + for (long contextId : contextIds) { + Iterator iter = scanTable.scan(subjID, predID, objID, contextId, explicit); + List toRemove = new ArrayList<>(); + while (iter.hasNext()) { + toRemove.add(iter.next()); + } + for (long[] quad : toRemove) { + for (MemTable mt : memTables) { + mt.remove(quad[0], quad[1], quad[2], quad[3], explicit); + } + removeCount++; + } + } + + return removeCount; + } finally { + sinkStoreAccessLock.unlock(); + } + } + } + + private final class S3SailDataset implements SailDataset { + + private final boolean explicit; + + S3SailDataset(boolean explicit) { + this.explicit = explicit; + } + + @Override + public void close() { + // no-op for in-memory implementation + } + + @Override + public String getNamespace(String prefix) throws SailException { + return namespaceStore.getNamespace(prefix); + } + + @Override + public CloseableIteration getNamespaces() { + return new CloseableIteratorIteration<>(namespaceStore.iterator()); + } + + @Override + public CloseableIteration getContextIDs() throws SailException { + // Scan all quads and collect distinct non-null contexts + MemTable table = memTables.get(0); + Iterator allQuads = table.scan(-1, -1, -1, -1, explicit); + + return new FilterIteration( + new ConvertingIteration( + new CloseableIteratorIteration<>(allQuads)) { + @Override + protected Resource convert(long[] quad) { + if (quad[3] == 0) { + return null; + } + Value val = valueStore.getValue(quad[3]); + return val instanceof Resource ? (Resource) val : null; + } + }) { + private final java.util.Set seen = new java.util.HashSet<>(); + + @Override + protected boolean accept(Resource ctx) { + return ctx != null && seen.add(ctx); + } + + @Override + protected void handleClose() { + // no-op + } + }; + } + + @Override + public CloseableIteration getStatements(Resource subj, IRI pred, Value obj, + Resource... contexts) throws SailException { + return createStatementIterator(subj, pred, obj, explicit, contexts); + } + + @Override + public CloseableIteration getStatements(StatementOrder statementOrder, Resource subj, + IRI pred, Value obj, Resource... contexts) throws SailException { + throw new UnsupportedOperationException("Not implemented yet"); + } + + @Override + public Set getSupportedOrders(Resource subj, IRI pred, Value obj, Resource... contexts) { + return Set.of(); + } + + @Override + public Comparator getComparator() { + return null; + } + } + + /** + * Converts quad ID arrays from MemTable iteration into Statement objects by resolving IDs through the ValueStore. + */ + private static final class QuadToStatementIteration implements CloseableIteration { + + private final Iterator quads; + private final S3ValueStore valueStore; + + QuadToStatementIteration(Iterator quads, S3ValueStore valueStore) { + this.quads = quads; + this.valueStore = valueStore; + } + + @Override + public boolean hasNext() { + return quads.hasNext(); + } + + @Override + public Statement next() { + long[] quad = quads.next(); + Resource subj = (Resource) valueStore.getValue(quad[0]); + IRI pred = (IRI) valueStore.getValue(quad[1]); + Value obj = valueStore.getValue(quad[2]); + Resource ctx = quad[3] == 0 ? null : (Resource) valueStore.getValue(quad[3]); + return valueStore.createStatement(subj, pred, obj, ctx); + } + + @Override + public void close() { + // no-op: MemTable iterators don't hold resources + } + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3Store.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3Store.java new file mode 100644 index 00000000000..d7c84ad0d22 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3Store.java @@ -0,0 +1,228 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3; + +import java.util.concurrent.locks.ReentrantLock; + +import org.eclipse.rdf4j.common.annotation.Experimental; +import org.eclipse.rdf4j.common.concurrent.locks.Lock; +import org.eclipse.rdf4j.common.concurrent.locks.LockManager; +import org.eclipse.rdf4j.common.transaction.IsolationLevel; +import org.eclipse.rdf4j.common.transaction.IsolationLevels; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.query.algebra.evaluation.EvaluationStrategyFactory; +import org.eclipse.rdf4j.query.algebra.evaluation.federation.FederatedServiceResolver; +import org.eclipse.rdf4j.query.algebra.evaluation.federation.FederatedServiceResolverClient; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.StrictEvaluationStrategyFactory; +import org.eclipse.rdf4j.repository.sparql.federation.SPARQLServiceResolver; +import org.eclipse.rdf4j.sail.InterruptedSailException; +import org.eclipse.rdf4j.sail.NotifyingSailConnection; +import org.eclipse.rdf4j.sail.SailException; +import org.eclipse.rdf4j.sail.base.SailSource; +import org.eclipse.rdf4j.sail.base.SailStore; +import org.eclipse.rdf4j.sail.base.SnapshotSailStore; +import org.eclipse.rdf4j.sail.helpers.AbstractNotifyingSail; +import org.eclipse.rdf4j.sail.s3.config.S3StoreConfig; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A SAIL implementation that stores RDF data on S3-compatible object storage using an LSM-tree architecture. + * + *

+ * Phase 1b: In-memory only. Data is stored in sorted MemTables and is not yet persisted to S3. This enables passing the + * SAIL compliance tests with the core storage engine. + *

+ * + * @implNote the S3 store is in an experimental state: its existence, signature or behavior may change without warning + * from one release to the next. + */ +@Experimental +public class S3Store extends AbstractNotifyingSail implements FederatedServiceResolverClient { + + private static final Logger logger = LoggerFactory.getLogger(S3Store.class); + + private final S3StoreConfig config; + private SailStore store; + private S3SailStore backingStore; + private EvaluationStrategyFactory evalStratFactory; + + /** + * independent life cycle + */ + private FederatedServiceResolver serviceResolver; + + /** + * dependent life cycle + */ + private SPARQLServiceResolver dependentServiceResolver; + + /** + * Lock manager used to prevent concurrent {@link #getTransactionLock(IsolationLevel)} calls. + */ + private final ReentrantLock txnLockManager = new ReentrantLock(); + + /** + * Holds locks for all isolated transactions. + */ + private final LockManager isolatedLockManager = new LockManager(debugEnabled()); + + /** + * Holds locks for all {@link IsolationLevels#NONE} isolation transactions. + */ + private final LockManager disabledIsolationLockManager = new LockManager(debugEnabled()); + + public S3Store() { + this(new S3StoreConfig()); + } + + public S3Store(S3StoreConfig config) { + super(); + this.config = config; + setSupportedIsolationLevels(IsolationLevels.NONE, IsolationLevels.READ_COMMITTED, + IsolationLevels.SNAPSHOT_READ, IsolationLevels.SNAPSHOT, IsolationLevels.SERIALIZABLE); + setDefaultIsolationLevel(IsolationLevels.SNAPSHOT_READ); + config.getDefaultQueryEvaluationMode().ifPresent(this::setDefaultQueryEvaluationMode); + EvaluationStrategyFactory evalFactory = config.getEvaluationStrategyFactory(); + if (evalFactory != null) { + setEvaluationStrategyFactory(evalFactory); + } + } + + public synchronized EvaluationStrategyFactory getEvaluationStrategyFactory() { + if (evalStratFactory == null) { + evalStratFactory = new StrictEvaluationStrategyFactory(getFederatedServiceResolver()); + } + evalStratFactory.setQuerySolutionCacheThreshold(getIterationCacheSyncThreshold()); + evalStratFactory.setTrackResultSize(isTrackResultSize()); + return evalStratFactory; + } + + public synchronized void setEvaluationStrategyFactory(EvaluationStrategyFactory factory) { + evalStratFactory = factory; + } + + public synchronized FederatedServiceResolver getFederatedServiceResolver() { + if (serviceResolver == null) { + if (dependentServiceResolver == null) { + dependentServiceResolver = new SPARQLServiceResolver(); + } + setFederatedServiceResolver(dependentServiceResolver); + } + return serviceResolver; + } + + @Override + public synchronized void setFederatedServiceResolver(FederatedServiceResolver resolver) { + this.serviceResolver = resolver; + if (resolver != null && evalStratFactory instanceof FederatedServiceResolverClient) { + ((FederatedServiceResolverClient) evalStratFactory).setFederatedServiceResolver(resolver); + } + } + + @Override + protected void initializeInternal() throws SailException { + logger.debug("Initializing S3Store..."); + + try { + backingStore = new S3SailStore(config); + this.store = new SnapshotSailStore(backingStore, () -> new org.eclipse.rdf4j.model.impl.LinkedHashModel()) { + + @Override + public SailSource getExplicitSailSource() { + if (isIsolationDisabled()) { + return backingStore.getExplicitSailSource(); + } else { + return super.getExplicitSailSource(); + } + } + + @Override + public SailSource getInferredSailSource() { + if (isIsolationDisabled()) { + return backingStore.getInferredSailSource(); + } else { + return super.getInferredSailSource(); + } + } + }; + } catch (Throwable e) { + throw new SailException(e); + } + + logger.debug("S3Store initialized"); + } + + @Override + protected void shutDownInternal() throws SailException { + logger.debug("Shutting down S3Store..."); + + try { + store.close(); + } finally { + if (dependentServiceResolver != null) { + dependentServiceResolver.shutDown(); + } + } + + logger.debug("S3Store shut down"); + } + + @Override + public boolean isWritable() { + return true; + } + + @Override + protected NotifyingSailConnection getConnectionInternal() throws SailException { + return new S3StoreConnection(this); + } + + @Override + public ValueFactory getValueFactory() { + return store.getValueFactory(); + } + + /** + * This call will block when {@link IsolationLevels#NONE} is provided when there are active transactions with a + * higher isolation and block when a higher isolation is provided when there are active transactions with + * {@link IsolationLevels#NONE} isolation. + */ + Lock getTransactionLock(IsolationLevel level) throws SailException { + txnLockManager.lock(); + try { + if (IsolationLevels.NONE.isCompatibleWith(level)) { + isolatedLockManager.waitForActiveLocks(); + return disabledIsolationLockManager.createLock(level.toString()); + } else { + disabledIsolationLockManager.waitForActiveLocks(); + return isolatedLockManager.createLock(level.toString()); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new InterruptedSailException(e); + } finally { + txnLockManager.unlock(); + } + } + + boolean isIsolationDisabled() { + return disabledIsolationLockManager.isActiveLock(); + } + + SailStore getSailStore() { + return store; + } + + S3SailStore getBackingStore() { + return backingStore; + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3StoreConnection.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3StoreConnection.java new file mode 100644 index 00000000000..4c1d5e46aef --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3StoreConnection.java @@ -0,0 +1,127 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3; + +import org.eclipse.rdf4j.common.concurrent.locks.Lock; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.sail.SailException; +import org.eclipse.rdf4j.sail.base.SailSourceConnection; +import org.eclipse.rdf4j.sail.helpers.DefaultSailChangedEvent; + +/** + * Connection to an {@link S3Store}. + */ +public class S3StoreConnection extends SailSourceConnection { + + protected final S3Store s3Store; + + private volatile DefaultSailChangedEvent sailChangedEvent; + + /** + * The transaction lock held by this connection during transactions. + */ + private volatile Lock txnLock; + + protected S3StoreConnection(S3Store sail) { + super(sail, sail.getSailStore(), sail.getEvaluationStrategyFactory()); + this.s3Store = sail; + sailChangedEvent = new DefaultSailChangedEvent(sail); + } + + @Override + protected void startTransactionInternal() throws SailException { + boolean releaseLock = true; + try { + if (txnLock == null || !txnLock.isActive()) { + txnLock = s3Store.getTransactionLock(getTransactionIsolation()); + if (s3Store.isIsolationDisabled()) { + releaseLock = false; + } + } + super.startTransactionInternal(); + } finally { + if (releaseLock && txnLock != null && txnLock.isActive()) { + txnLock.release(); + } + } + } + + @Override + protected void commitInternal() throws SailException { + try { + super.commitInternal(); + } finally { + if (txnLock != null && txnLock.isActive()) { + txnLock.release(); + } + } + + s3Store.notifySailChanged(sailChangedEvent); + + // create a fresh event object + sailChangedEvent = new DefaultSailChangedEvent(s3Store); + } + + @Override + protected void rollbackInternal() throws SailException { + try { + super.rollbackInternal(); + } finally { + if (txnLock != null && txnLock.isActive()) { + txnLock.release(); + } + } + // create a fresh event object + sailChangedEvent = new DefaultSailChangedEvent(s3Store); + } + + @Override + protected void addStatementInternal(Resource subj, IRI pred, Value obj, Resource... contexts) + throws SailException { + sailChangedEvent.setStatementsAdded(true); + } + + @Override + public boolean addInferredStatement(Resource subj, IRI pred, Value obj, Resource... contexts) + throws SailException { + boolean ret = super.addInferredStatement(subj, pred, obj, contexts); + sailChangedEvent.setStatementsAdded(true); + return ret; + } + + @Override + protected void removeStatementsInternal(Resource subj, IRI pred, Value obj, Resource... contexts) + throws SailException { + sailChangedEvent.setStatementsRemoved(true); + } + + @Override + public boolean removeInferredStatement(Resource subj, IRI pred, Value obj, Resource... contexts) + throws SailException { + boolean ret = super.removeInferredStatement(subj, pred, obj, contexts); + sailChangedEvent.setStatementsRemoved(true); + return ret; + } + + @Override + protected void clearInternal(Resource... contexts) throws SailException { + super.clearInternal(contexts); + sailChangedEvent.setStatementsRemoved(true); + } + + @Override + public void clearInferred(Resource... contexts) throws SailException { + super.clearInferred(contexts); + sailChangedEvent.setStatementsRemoved(true); + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3ValueStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3ValueStore.java new file mode 100644 index 00000000000..594d8fb1b8d --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3ValueStore.java @@ -0,0 +1,89 @@ +/******************************************************************************* + * Copyright (c) 2024 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3; + +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; + +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.base.AbstractValueFactory; + +/** + * In-memory value store that maps RDF {@link Value} objects to long IDs and vice-versa. Uses {@link ConcurrentHashMap} + * for thread-safe bidirectional lookup. + */ +class S3ValueStore extends AbstractValueFactory { + + static final long UNKNOWN_ID = -1; + + private final ConcurrentHashMap valueToId = new ConcurrentHashMap<>(); + private final ConcurrentHashMap idToValue = new ConcurrentHashMap<>(); + private final AtomicLong nextId = new AtomicLong(1); + + /** + * Stores the supplied value and returns the ID assigned to it. If the value already exists, returns the existing + * ID. + * + * @param value the value to store + * @return the ID assigned to the value + */ + public long storeValue(Value value) { + Long existing = valueToId.get(value); + if (existing != null) { + return existing; + } + long id = nextId.getAndIncrement(); + Long previous = valueToId.putIfAbsent(value, id); + if (previous != null) { + // another thread stored it first + return previous; + } + idToValue.put(id, value); + return id; + } + + /** + * Gets the ID for the specified value. + * + * @param value a value + * @return the ID for the value, or {@link #UNKNOWN_ID} if not found + */ + public long getId(Value value) { + Long id = valueToId.get(value); + return id != null ? id : UNKNOWN_ID; + } + + /** + * Gets the value for the specified ID. + * + * @param id a value ID + * @return the value, or {@code null} if not found + */ + public Value getValue(long id) { + return idToValue.get(id); + } + + /** + * Removes all stored values and resets the ID counter. + */ + public void clear() { + valueToId.clear(); + idToValue.clear(); + nextId.set(1); + } + + /** + * Closes the value store, releasing all resources. + */ + public void close() { + clear(); + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java new file mode 100644 index 00000000000..3908f0b5958 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java @@ -0,0 +1,280 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.config; + +import org.eclipse.rdf4j.model.Model; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.util.ModelException; +import org.eclipse.rdf4j.model.util.Models; +import org.eclipse.rdf4j.sail.base.config.BaseSailConfig; +import org.eclipse.rdf4j.sail.config.SailConfigException; + +/** + * Configuration for S3-backed SAIL store. + */ +public class S3StoreConfig extends BaseSailConfig { + + /** + * The default quad indexes. + */ + public static final String DEFAULT_QUAD_INDEXES = "spoc,posc"; + + /** + * The default memtable size (64 MiB). + */ + public static final long DEFAULT_MEM_TABLE_SIZE = 67_108_864; + + /** + * The default block size (4 MiB). + */ + public static final int DEFAULT_BLOCK_SIZE = 4_194_304; + + /** + * The default memory cache size (256 MiB). + */ + public static final long DEFAULT_MEMORY_CACHE_SIZE = 268_435_456; + + /** + * The default disk cache size (10 GiB). + */ + public static final long DEFAULT_DISK_CACHE_SIZE = 10_737_418_240L; + + /** + * The default value cache size. + */ + public static final int DEFAULT_VALUE_CACHE_SIZE = 512; + + /** + * The default value id cache size. + */ + public static final int DEFAULT_VALUE_ID_CACHE_SIZE = 128; + + private String quadIndexes; + + private long memTableSize = -1; + + private int blockSize = -1; + + private long memoryCacheSize = -1; + + private long diskCacheSize = -1; + + private String diskCachePath; + + private int valueCacheSize = -1; + + private int valueIdCacheSize = -1; + + /*--------------* + * Constructors * + *--------------*/ + + public S3StoreConfig() { + super(S3StoreFactory.SAIL_TYPE); + } + + public S3StoreConfig(String quadIndexes) { + this(); + setQuadIndexes(quadIndexes); + } + + /*---------* + * Methods * + *---------*/ + + public String getQuadIndexes() { + return quadIndexes != null ? quadIndexes : DEFAULT_QUAD_INDEXES; + } + + public S3StoreConfig setQuadIndexes(String quadIndexes) { + this.quadIndexes = quadIndexes; + return this; + } + + public long getMemTableSize() { + return memTableSize >= 0 ? memTableSize : DEFAULT_MEM_TABLE_SIZE; + } + + public S3StoreConfig setMemTableSize(long memTableSize) { + this.memTableSize = memTableSize; + return this; + } + + public int getBlockSize() { + return blockSize >= 0 ? blockSize : DEFAULT_BLOCK_SIZE; + } + + public S3StoreConfig setBlockSize(int blockSize) { + this.blockSize = blockSize; + return this; + } + + public long getMemoryCacheSize() { + return memoryCacheSize >= 0 ? memoryCacheSize : DEFAULT_MEMORY_CACHE_SIZE; + } + + public S3StoreConfig setMemoryCacheSize(long memoryCacheSize) { + this.memoryCacheSize = memoryCacheSize; + return this; + } + + public long getDiskCacheSize() { + return diskCacheSize >= 0 ? diskCacheSize : DEFAULT_DISK_CACHE_SIZE; + } + + public S3StoreConfig setDiskCacheSize(long diskCacheSize) { + this.diskCacheSize = diskCacheSize; + return this; + } + + public String getDiskCachePath() { + return diskCachePath; + } + + public S3StoreConfig setDiskCachePath(String diskCachePath) { + this.diskCachePath = diskCachePath; + return this; + } + + public int getValueCacheSize() { + return valueCacheSize >= 0 ? valueCacheSize : DEFAULT_VALUE_CACHE_SIZE; + } + + public S3StoreConfig setValueCacheSize(int valueCacheSize) { + this.valueCacheSize = valueCacheSize; + return this; + } + + public int getValueIdCacheSize() { + return valueIdCacheSize >= 0 ? valueIdCacheSize : DEFAULT_VALUE_ID_CACHE_SIZE; + } + + public S3StoreConfig setValueIdCacheSize(int valueIdCacheSize) { + this.valueIdCacheSize = valueIdCacheSize; + return this; + } + + @Override + public Resource export(Model m) { + Resource implNode = super.export(m); + ValueFactory vf = SimpleValueFactory.getInstance(); + + m.setNamespace("s3", S3StoreSchema.NAMESPACE); + if (quadIndexes != null) { + m.add(implNode, S3StoreSchema.QUAD_INDEXES, vf.createLiteral(quadIndexes)); + } + if (memTableSize >= 0) { + m.add(implNode, S3StoreSchema.MEM_TABLE_SIZE, vf.createLiteral(memTableSize)); + } + if (blockSize >= 0) { + m.add(implNode, S3StoreSchema.BLOCK_SIZE, vf.createLiteral(blockSize)); + } + if (memoryCacheSize >= 0) { + m.add(implNode, S3StoreSchema.MEMORY_CACHE_SIZE, vf.createLiteral(memoryCacheSize)); + } + if (diskCacheSize >= 0) { + m.add(implNode, S3StoreSchema.DISK_CACHE_SIZE, vf.createLiteral(diskCacheSize)); + } + if (diskCachePath != null) { + m.add(implNode, S3StoreSchema.DISK_CACHE_PATH, vf.createLiteral(diskCachePath)); + } + if (valueCacheSize >= 0) { + m.add(implNode, S3StoreSchema.VALUE_CACHE_SIZE, vf.createLiteral(valueCacheSize)); + } + if (valueIdCacheSize >= 0) { + m.add(implNode, S3StoreSchema.VALUE_ID_CACHE_SIZE, vf.createLiteral(valueIdCacheSize)); + } + return implNode; + } + + @Override + public void parse(Model m, Resource implNode) throws SailConfigException { + super.parse(m, implNode); + + try { + Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.QUAD_INDEXES, null)) + .ifPresent(lit -> setQuadIndexes(lit.getLabel())); + + Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.MEM_TABLE_SIZE, null)) + .ifPresent(lit -> { + try { + setMemTableSize(lit.longValue()); + } catch (NumberFormatException e) { + throw new SailConfigException( + "Long value required for " + S3StoreSchema.MEM_TABLE_SIZE + + " property, found " + lit); + } + }); + + Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.BLOCK_SIZE, null)) + .ifPresent(lit -> { + try { + setBlockSize(lit.intValue()); + } catch (NumberFormatException e) { + throw new SailConfigException( + "Integer value required for " + S3StoreSchema.BLOCK_SIZE + + " property, found " + lit); + } + }); + + Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.MEMORY_CACHE_SIZE, null)) + .ifPresent(lit -> { + try { + setMemoryCacheSize(lit.longValue()); + } catch (NumberFormatException e) { + throw new SailConfigException( + "Long value required for " + S3StoreSchema.MEMORY_CACHE_SIZE + + " property, found " + lit); + } + }); + + Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.DISK_CACHE_SIZE, null)) + .ifPresent(lit -> { + try { + setDiskCacheSize(lit.longValue()); + } catch (NumberFormatException e) { + throw new SailConfigException( + "Long value required for " + S3StoreSchema.DISK_CACHE_SIZE + + " property, found " + lit); + } + }); + + Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.DISK_CACHE_PATH, null)) + .ifPresent(lit -> setDiskCachePath(lit.getLabel())); + + Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.VALUE_CACHE_SIZE, null)) + .ifPresent(lit -> { + try { + setValueCacheSize(lit.intValue()); + } catch (NumberFormatException e) { + throw new SailConfigException( + "Integer value required for " + S3StoreSchema.VALUE_CACHE_SIZE + + " property, found " + lit); + } + }); + + Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.VALUE_ID_CACHE_SIZE, null)) + .ifPresent(lit -> { + try { + setValueIdCacheSize(lit.intValue()); + } catch (NumberFormatException e) { + throw new SailConfigException( + "Integer value required for " + S3StoreSchema.VALUE_ID_CACHE_SIZE + + " property, found " + lit); + } + }); + } catch (ModelException e) { + throw new SailConfigException(e.getMessage(), e); + } + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreFactory.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreFactory.java new file mode 100644 index 00000000000..3b1544f8e35 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreFactory.java @@ -0,0 +1,61 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.config; + +import org.eclipse.rdf4j.sail.Sail; +import org.eclipse.rdf4j.sail.config.SailConfigException; +import org.eclipse.rdf4j.sail.config.SailFactory; +import org.eclipse.rdf4j.sail.config.SailImplConfig; +import org.eclipse.rdf4j.sail.s3.S3Store; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A {@link SailFactory} that creates {@link S3Store}s based on RDF configuration data. + */ +public class S3StoreFactory implements SailFactory { + + private static final Logger logger = LoggerFactory.getLogger(S3StoreFactory.class); + + /** + * The type of repositories that are created by this factory. + * + * @see SailFactory#getSailType() + */ + public static final String SAIL_TYPE = "rdf4j:S3Store"; + + /** + * Returns the Sail's type: rdf4j:S3Store. + */ + @Override + public String getSailType() { + return SAIL_TYPE; + } + + @Override + public SailImplConfig getConfig() { + return new S3StoreConfig(); + } + + @Override + public Sail getSail(SailImplConfig config) throws SailConfigException { + if (!SAIL_TYPE.equals(config.getType())) { + throw new SailConfigException("Invalid Sail type: " + config.getType()); + } + + if (config instanceof S3StoreConfig) { + return new S3Store((S3StoreConfig) config); + } else { + logger.warn("Config is instance of {} is not S3StoreConfig.", config.getClass().getName()); + return new S3Store(); + } + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreSchema.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreSchema.java new file mode 100644 index 00000000000..076cd1ba771 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreSchema.java @@ -0,0 +1,78 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.config; + +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; + +/** + * Defines constants for the S3Store schema which is used by {@link S3StoreFactory}s to initialize S3Stores. + */ +public class S3StoreSchema { + + /** + * The S3Store schema namespace (http://rdf4j.org/config/sail/s3#). + */ + public static final String NAMESPACE = "http://rdf4j.org/config/sail/s3#"; + + /** + * http://rdf4j.org/config/sail/s3#quadIndexes + */ + public final static IRI QUAD_INDEXES; + + /** + * http://rdf4j.org/config/sail/s3#memTableSize + */ + public final static IRI MEM_TABLE_SIZE; + + /** + * http://rdf4j.org/config/sail/s3#blockSize + */ + public final static IRI BLOCK_SIZE; + + /** + * http://rdf4j.org/config/sail/s3#memoryCacheSize + */ + public final static IRI MEMORY_CACHE_SIZE; + + /** + * http://rdf4j.org/config/sail/s3#diskCacheSize + */ + public final static IRI DISK_CACHE_SIZE; + + /** + * http://rdf4j.org/config/sail/s3#diskCachePath + */ + public final static IRI DISK_CACHE_PATH; + + /** + * http://rdf4j.org/config/sail/s3#valueCacheSize + */ + public final static IRI VALUE_CACHE_SIZE; + + /** + * http://rdf4j.org/config/sail/s3#valueIdCacheSize + */ + public final static IRI VALUE_ID_CACHE_SIZE; + + static { + ValueFactory factory = SimpleValueFactory.getInstance(); + QUAD_INDEXES = factory.createIRI(NAMESPACE, "quadIndexes"); + MEM_TABLE_SIZE = factory.createIRI(NAMESPACE, "memTableSize"); + BLOCK_SIZE = factory.createIRI(NAMESPACE, "blockSize"); + MEMORY_CACHE_SIZE = factory.createIRI(NAMESPACE, "memoryCacheSize"); + DISK_CACHE_SIZE = factory.createIRI(NAMESPACE, "diskCacheSize"); + DISK_CACHE_PATH = factory.createIRI(NAMESPACE, "diskCachePath"); + VALUE_CACHE_SIZE = factory.createIRI(NAMESPACE, "valueCacheSize"); + VALUE_ID_CACHE_SIZE = factory.createIRI(NAMESPACE, "valueIdCacheSize"); + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java new file mode 100644 index 00000000000..098312ca278 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java @@ -0,0 +1,253 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.concurrent.ConcurrentNavigableMap; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * In-memory sorted store for quads using a {@link ConcurrentSkipListMap}. Stores quads as varint-encoded byte[] keys + * (in the order defined by a {@link QuadIndex}) with a 1-byte flag value: + *
    + *
  • {@code 0x01} = explicit
  • + *
  • {@code 0x02} = inferred
  • + *
  • {@code 0x00} = tombstone (deleted)
  • + *
+ * + *

+ * The unsigned byte comparison on keys preserves the varint lexicographic ordering, which in turn preserves the numeric + * ordering of the encoded IDs. + *

+ */ +public class MemTable { + + static final byte FLAG_TOMBSTONE = 0x00; + static final byte FLAG_EXPLICIT = 0x01; + static final byte FLAG_INFERRED = 0x02; + + private static final byte[] VALUE_EXPLICIT = new byte[] { FLAG_EXPLICIT }; + private static final byte[] VALUE_INFERRED = new byte[] { FLAG_INFERRED }; + private static final byte[] VALUE_TOMBSTONE = new byte[] { FLAG_TOMBSTONE }; + + private final QuadIndex index; + private final ConcurrentSkipListMap data; + private final AtomicBoolean frozen = new AtomicBoolean(false); + + /** + * Creates a new MemTable backed by the given index for key encoding. + * + * @param index the QuadIndex that determines key encoding order + */ + public MemTable(QuadIndex index) { + this.index = index; + this.data = new ConcurrentSkipListMap<>(Arrays::compareUnsigned); + } + + /** + * Creates a frozen (immutable) MemTable from an existing data map. Used internally by {@link #freeze()}. + */ + private MemTable(QuadIndex index, ConcurrentSkipListMap data, boolean frozen) { + this.index = index; + this.data = data; + this.frozen.set(frozen); + } + + /** + * Stores a quad in the table. + * + * @param s subject ID + * @param p predicate ID + * @param o object ID + * @param c context ID + * @param explicit true for explicit, false for inferred + * @throws IllegalStateException if the table is frozen + */ + public void put(long s, long p, long o, long c, boolean explicit) { + checkNotFrozen(); + byte[] key = index.toKeyBytes(s, p, o, c); + data.put(key, explicit ? VALUE_EXPLICIT : VALUE_INFERRED); + } + + /** + * Removes a quad by writing a tombstone. + * + * @param s subject ID + * @param p predicate ID + * @param o object ID + * @param c context ID + * @param explicit true for explicit, false for inferred (currently unused; tombstone applies to either) + * @throws IllegalStateException if the table is frozen + */ + public void remove(long s, long p, long o, long c, boolean explicit) { + checkNotFrozen(); + byte[] key = index.toKeyBytes(s, p, o, c); + data.put(key, VALUE_TOMBSTONE); + } + + /** + * Checks if a quad exists (is not a tombstone). + * + * @param s subject ID + * @param p predicate ID + * @param o object ID + * @param c context ID + * @param explicit true to check explicit, false for inferred + * @return true if the quad exists with the matching flag + */ + public boolean get(long s, long p, long o, long c, boolean explicit) { + byte[] key = index.toKeyBytes(s, p, o, c); + byte[] value = data.get(key); + if (value == null || value[0] == FLAG_TOMBSTONE) { + return false; + } + return explicit ? value[0] == FLAG_EXPLICIT : value[0] == FLAG_INFERRED; + } + + /** + * Returns an iterator over matching quads using range scan. Bound components (>= 0) form a prefix; unbound + * components (-1) are wildcards. + * + * @param s subject ID, or -1 for wildcard + * @param p predicate ID, or -1 for wildcard + * @param o object ID, or -1 for wildcard + * @param c context ID, or -1 for wildcard + * @param explicit true for explicit, false for inferred + * @return an iterator over matching quads as long[4] arrays in SPOC order + */ + public Iterator scan(long s, long p, long o, long c, boolean explicit) { + byte[] minKey = index.getMinKeyBytes(s, p, o, c); + byte[] maxKey = index.getMaxKeyBytes(s, p, o, c); + byte expectedFlag = explicit ? FLAG_EXPLICIT : FLAG_INFERRED; + + ConcurrentNavigableMap range = data.subMap(minKey, true, maxKey, true); + + return new ScanIterator(range, index, expectedFlag); + } + + /** + * Returns the number of entries in the table (including tombstones). + */ + public int size() { + return data.size(); + } + + /** + * Returns a rough estimate of memory consumption in bytes. + */ + public long approximateSizeInBytes() { + long size = 0; + for (Map.Entry entry : data.entrySet()) { + // key array overhead (16 bytes) + key data + value array overhead (16 bytes) + value data + // + map entry overhead (~64 bytes for skip list node) + size += 16 + entry.getKey().length + 16 + entry.getValue().length + 64; + } + return size; + } + + /** + * Returns a frozen (immutable) snapshot of this table. After freezing, no further writes are accepted on this + * instance. + * + * @return this MemTable, now frozen + */ + public MemTable freeze() { + frozen.set(true); + return this; + } + + /** + * Returns whether this table is frozen (immutable). + */ + public boolean isFrozen() { + return frozen.get(); + } + + /** + * Clears all entries from the table. + * + * @throws IllegalStateException if the table is frozen + */ + public void clear() { + checkNotFrozen(); + data.clear(); + } + + /** + * Returns the QuadIndex used by this table. + */ + public QuadIndex getIndex() { + return index; + } + + /** + * Returns an unmodifiable view of the underlying data map. + */ + public Map getData() { + return Collections.unmodifiableMap(data); + } + + private void checkNotFrozen() { + if (frozen.get()) { + throw new IllegalStateException("MemTable is frozen and cannot accept writes"); + } + } + + /** + * Iterator that filters range scan results by flag value and skips tombstones. Returns quads in SPOC order. + */ + private static class ScanIterator implements Iterator { + private final Iterator> delegate; + private final QuadIndex quadIndex; + private final byte expectedFlag; + private long[] next; + + ScanIterator(ConcurrentNavigableMap range, QuadIndex quadIndex, byte expectedFlag) { + this.delegate = range.entrySet().iterator(); + this.quadIndex = quadIndex; + this.expectedFlag = expectedFlag; + advance(); + } + + private void advance() { + next = null; + while (delegate.hasNext()) { + Map.Entry entry = delegate.next(); + byte flag = entry.getValue()[0]; + if (flag == expectedFlag) { + next = new long[4]; + quadIndex.keyToQuad(entry.getKey(), next); + return; + } + } + } + + @Override + public boolean hasNext() { + return next != null; + } + + @Override + public long[] next() { + if (next == null) { + throw new NoSuchElementException(); + } + long[] result = next; + advance(); + return result; + } + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java new file mode 100644 index 00000000000..bb0aef89ec6 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java @@ -0,0 +1,342 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.nio.ByteBuffer; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.StringTokenizer; + +/** + * Manages index permutations for quad (S, P, O, C) storage. Each QuadIndex defines a field ordering (e.g. "spoc", + * "posc") and provides methods to encode/decode keys in that order, compute pattern scores for query optimization, and + * construct range scan boundaries. + * + *

+ * Based on the TripleStore.TripleIndex pattern from the LMDB SAIL module. + *

+ */ +public class QuadIndex { + + static final int SUBJ_IDX = 0; + static final int PRED_IDX = 1; + static final int OBJ_IDX = 2; + static final int CONTEXT_IDX = 3; + + static final int MAX_KEY_LENGTH = 4 * 9; // 4 varints, max 9 bytes each + + private final char[] fieldSeq; + private final int[] indexMap; + + /** + * Creates a new QuadIndex with the given field sequence. + * + * @param fieldSeq a 4-character string consisting of 's', 'p', 'o', 'c' in any order + * @throws IllegalArgumentException if the field sequence is invalid + */ + public QuadIndex(String fieldSeq) { + if (fieldSeq == null || fieldSeq.length() != 4) { + throw new IllegalArgumentException("Field sequence must be exactly 4 characters: " + fieldSeq); + } + this.fieldSeq = fieldSeq.toCharArray(); + this.indexMap = buildIndexMap(this.fieldSeq); + } + + /** + * Returns the field sequence for this index. + */ + public char[] getFieldSeq() { + return fieldSeq; + } + + /** + * Returns the field sequence as a String. + */ + public String getFieldSeqString() { + return new String(fieldSeq); + } + + /** + * Determines the 'score' of this index on the supplied pattern. The higher the score, the better the index is + * suited for matching the pattern. Score equals the number of leading bound components. Lowest score is 0, meaning + * a sequential scan. + * + * @param subj subject ID, or -1 for wildcard + * @param pred predicate ID, or -1 for wildcard + * @param obj object ID, or -1 for wildcard + * @param context context ID, or -1 for wildcard + * @return pattern score (0-4) + */ + public int getPatternScore(long subj, long pred, long obj, long context) { + int score = 0; + for (char field : fieldSeq) { + switch (field) { + case 's': + if (subj >= 0) { + score++; + } else { + return score; + } + break; + case 'p': + if (pred >= 0) { + score++; + } else { + return score; + } + break; + case 'o': + if (obj >= 0) { + score++; + } else { + return score; + } + break; + case 'c': + if (context >= 0) { + score++; + } else { + return score; + } + break; + default: + throw new IllegalStateException( + "Invalid character '" + field + "' in field sequence: " + new String(fieldSeq)); + } + } + return score; + } + + /** + * Writes a quad as varints in index order into the given buffer. + * + * @param bb buffer for writing bytes + * @param subj subject ID + * @param pred predicate ID + * @param obj object ID + * @param context context ID + */ + public void toKey(ByteBuffer bb, long subj, long pred, long obj, long context) { + for (char field : fieldSeq) { + switch (field) { + case 's': + Varint.writeUnsigned(bb, subj); + break; + case 'p': + Varint.writeUnsigned(bb, pred); + break; + case 'o': + Varint.writeUnsigned(bb, obj); + break; + case 'c': + Varint.writeUnsigned(bb, context); + break; + } + } + } + + /** + * Encodes a quad as a byte array key in index order. + * + * @param subj subject ID + * @param pred predicate ID + * @param obj object ID + * @param context context ID + * @return encoded byte array key + */ + public byte[] toKeyBytes(long subj, long pred, long obj, long context) { + int length = Varint.calcListLengthUnsigned( + getValueForField(fieldSeq[0], subj, pred, obj, context), + getValueForField(fieldSeq[1], subj, pred, obj, context), + getValueForField(fieldSeq[2], subj, pred, obj, context), + getValueForField(fieldSeq[3], subj, pred, obj, context)); + ByteBuffer bb = ByteBuffer.allocate(length); + toKey(bb, subj, pred, obj, context); + return bb.array(); + } + + /** + * Reads a key back to quad values in SPOC order. + * + * @param key buffer positioned at the start of the key + * @param quad array of length 4 to receive [subj, pred, obj, context] + */ + public void keyToQuad(ByteBuffer key, long[] quad) { + Varint.readQuadUnsigned(key, indexMap, quad); + } + + /** + * Reads a key from a byte array back to quad values in SPOC order. + * + * @param key byte array containing the encoded key + * @param quad array of length 4 to receive [subj, pred, obj, context] + */ + public void keyToQuad(byte[] key, long[] quad) { + ByteBuffer bb = ByteBuffer.wrap(key); + Varint.readQuadUnsigned(bb, indexMap, quad); + } + + /** + * Constructs the minimum key for a range scan. Unbound components (-1 or 0) become 0. + * + * @param bb buffer for writing bytes + * @param subj subject ID, or -1 for wildcard + * @param pred predicate ID, or -1 for wildcard + * @param obj object ID, or -1 for wildcard + * @param context context ID, or -1 for wildcard + */ + public void getMinKey(ByteBuffer bb, long subj, long pred, long obj, long context) { + toKey(bb, + subj <= 0 ? 0 : subj, + pred <= 0 ? 0 : pred, + obj <= 0 ? 0 : obj, + context <= 0 ? 0 : context); + } + + /** + * Constructs the minimum key as a byte array for a range scan. + */ + public byte[] getMinKeyBytes(long subj, long pred, long obj, long context) { + return toKeyBytes( + subj <= 0 ? 0 : subj, + pred <= 0 ? 0 : pred, + obj <= 0 ? 0 : obj, + context <= 0 ? 0 : context); + } + + /** + * Constructs the maximum key for a range scan. Unbound components become Long.MAX_VALUE. + * + * @param bb buffer for writing bytes + * @param subj subject ID, or -1 for wildcard + * @param pred predicate ID, or -1 for wildcard + * @param obj object ID, or -1 for wildcard + * @param context context ID, or -1 for wildcard + */ + public void getMaxKey(ByteBuffer bb, long subj, long pred, long obj, long context) { + toKey(bb, + subj <= 0 ? Long.MAX_VALUE : subj, + pred <= 0 ? Long.MAX_VALUE : pred, + obj <= 0 ? Long.MAX_VALUE : obj, + context < 0 ? Long.MAX_VALUE : context); + } + + /** + * Constructs the maximum key as a byte array for a range scan. + */ + public byte[] getMaxKeyBytes(long subj, long pred, long obj, long context) { + return toKeyBytes( + subj <= 0 ? Long.MAX_VALUE : subj, + pred <= 0 ? Long.MAX_VALUE : pred, + obj <= 0 ? Long.MAX_VALUE : obj, + context < 0 ? Long.MAX_VALUE : context); + } + + /** + * Finds the best index from the given list for a query pattern by choosing the index with the highest pattern + * score. + * + * @param indexes list of available indexes + * @param subj subject ID, or -1 for wildcard + * @param pred predicate ID, or -1 for wildcard + * @param obj object ID, or -1 for wildcard + * @param context context ID, or -1 for wildcard + * @return the best matching index + */ + public static QuadIndex getBestIndex(List indexes, long subj, long pred, long obj, long context) { + int bestScore = -1; + QuadIndex bestIndex = null; + + for (QuadIndex index : indexes) { + int score = index.getPatternScore(subj, pred, obj, context); + if (score > bestScore) { + bestScore = score; + bestIndex = index; + } + } + + return bestIndex; + } + + /** + * Parses a comma/whitespace-separated list of index specifications. Each spec must consist of 4 characters: 's', + * 'p', 'o' and 'c'. + * + * @param indexSpecStr a string like "spoc, posc, cosp" + * @return a set of parsed index specifications + * @throws IllegalArgumentException if any spec is invalid + */ + public static Set parseIndexSpecList(String indexSpecStr) { + Set indexes = new HashSet<>(); + + if (indexSpecStr != null) { + StringTokenizer tok = new StringTokenizer(indexSpecStr, ", \t"); + while (tok.hasMoreTokens()) { + String index = tok.nextToken().toLowerCase(); + + if (index.length() != 4 || index.indexOf('s') == -1 || index.indexOf('p') == -1 + || index.indexOf('o') == -1 || index.indexOf('c') == -1) { + throw new IllegalArgumentException( + "Invalid value '" + index + "' in index specification: " + indexSpecStr); + } + + indexes.add(index); + } + } + + return indexes; + } + + @Override + public String toString() { + return new String(fieldSeq); + } + + private static int[] buildIndexMap(char[] fieldSeq) { + int[] indexes = new int[fieldSeq.length]; + for (int i = 0; i < fieldSeq.length; i++) { + switch (fieldSeq[i]) { + case 's': + indexes[i] = SUBJ_IDX; + break; + case 'p': + indexes[i] = PRED_IDX; + break; + case 'o': + indexes[i] = OBJ_IDX; + break; + case 'c': + indexes[i] = CONTEXT_IDX; + break; + default: + throw new IllegalArgumentException( + "Invalid character '" + fieldSeq[i] + "' in field sequence: " + new String(fieldSeq)); + } + } + return indexes; + } + + private static long getValueForField(char field, long subj, long pred, long obj, long context) { + switch (field) { + case 's': + return subj; + case 'p': + return pred; + case 'o': + return obj; + case 'c': + return context; + default: + throw new IllegalArgumentException("Invalid field: " + field); + } + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Varint.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Varint.java new file mode 100644 index 00000000000..69988c616fc --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Varint.java @@ -0,0 +1,406 @@ +/******************************************************************************* + * Copyright (c) 2021 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +/** + * Encodes and decodes unsigned values using variable-length encoding. + * + *

+ * Uses the variable-length encoding of SQLite which + * preserves lexicographic ordering: smaller values use fewer bytes, and lexicographic byte order matches numeric order. + *

+ * + *

+ * Adapted from the LMDB Varint implementation with LMDB-specific dependencies removed (no SignificantBytesBE, no + * GroupMatcher). All reads use heap-based byte-by-byte decoding. + *

+ */ +public final class Varint { + + static final byte[] ENCODED_LONG_MAX = new byte[] { + (byte) 0xFF, // header: 8 payload bytes + 0x7F, // MSB of Long.MAX_VALUE + (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF + }; + + static final byte[] ENCODED_LONG_MAX_QUAD = new byte[] { + (byte) 0xFF, 0x7F, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, 0x7F, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, 0x7F, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, 0x7F, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, + (byte) 0xFF + }; + + static final byte[] ALL_ZERO_QUAD = new byte[] { 0, 0, 0, 0 }; + + private Varint() { + } + + /** + * Encodes a value using the variable-length encoding of SQLite. + * + *

+ * The encoding has the following properties: + *

    + *
  1. Smaller (and more common) values use fewer bytes.
  2. + *
  3. The length of any varint can be determined by looking at just the first byte.
  4. + *
  5. Lexicographical and numeric ordering for varints are the same.
  6. + *
+ *

+ * + * @param bb buffer for writing bytes + * @param value value to encode + */ + public static void writeUnsigned(final ByteBuffer bb, final long value) { + if (value == Long.MAX_VALUE) { + final ByteOrder prev = bb.order(); + if (prev != ByteOrder.BIG_ENDIAN) { + bb.order(ByteOrder.BIG_ENDIAN); + } + try { + bb.put(ENCODED_LONG_MAX); + } finally { + if (prev != ByteOrder.BIG_ENDIAN) { + bb.order(prev); + } + } + return; + } + + if (value <= 240) { + bb.put((byte) value); + } else if (value <= 2287) { + long v = value - 240; + final ByteOrder prev = bb.order(); + if (prev != ByteOrder.BIG_ENDIAN) { + bb.order(ByteOrder.BIG_ENDIAN); + } + try { + int hi = (int) (v >>> 8) + 241; + int lo = (int) (v & 0xFF); + bb.putShort((short) ((hi << 8) | lo)); + } finally { + if (prev != ByteOrder.BIG_ENDIAN) { + bb.order(prev); + } + } + } else if (value <= 67823) { + long v = value - 2288; + bb.put((byte) 249); + final ByteOrder prev = bb.order(); + if (prev != ByteOrder.BIG_ENDIAN) { + bb.order(ByteOrder.BIG_ENDIAN); + } + try { + bb.putShort((short) v); + } finally { + if (prev != ByteOrder.BIG_ENDIAN) { + bb.order(prev); + } + } + } else { + int bytes = descriptor(value) + 1; + bb.put((byte) (250 + (bytes - 3))); + writeSignificantBits(bb, value, bytes); + } + } + + private static void writeSignificantBits(ByteBuffer bb, long value, int bytes) { + final ByteOrder prev = bb.order(); + if (prev != ByteOrder.BIG_ENDIAN) { + bb.order(ByteOrder.BIG_ENDIAN); + } + try { + int i = bytes; + + if ((i & 1) != 0) { + bb.put((byte) (value >>> ((i - 1) * 8))); + i--; + } + + if (i == 8) { + bb.putLong(value); + return; + } + + if (i >= 4) { + int shift = (i - 4) * 8; + bb.putInt((int) (value >>> shift)); + i -= 4; + } + + while (i >= 2) { + int shift = (i - 2) * 8; + bb.putShort((short) (value >>> shift)); + i -= 2; + } + } finally { + if (prev != ByteOrder.BIG_ENDIAN) { + bb.order(prev); + } + } + } + + /** + * Calculates required length in bytes to encode the given long value. + * + * @param value the value + * @return length in bytes + */ + public static int calcLengthUnsigned(long value) { + if (value <= 240) { + return 1; + } else if (value <= 2287) { + return 2; + } else if (value <= 67823) { + return 3; + } else { + int bytes = descriptor(value) + 1; + return 1 + bytes; + } + } + + /** + * Calculates required length in bytes to encode a list of four long values. + * + * @param a first value + * @param b second value + * @param c third value + * @param d fourth value + * @return length in bytes + */ + public static int calcListLengthUnsigned(long a, long b, long c, long d) { + return calcLengthUnsigned(a) + calcLengthUnsigned(b) + calcLengthUnsigned(c) + calcLengthUnsigned(d); + } + + /** + * The number of bytes required to represent the given number minus one. + */ + private static byte descriptor(long value) { + return value == 0 ? 0 : (byte) (7 - Long.numberOfLeadingZeros(value) / 8); + } + + /** Lookup: lead byte (0..255) -> number of additional bytes (0..8). */ + private static final byte[] VARINT_EXTRA_BYTES = buildVarintExtraBytes(); + + private static byte[] buildVarintExtraBytes() { + final byte[] t = new byte[256]; + for (int i = 0; i <= 240; i++) { + t[i] = 0; + } + for (int i = 241; i <= 248; i++) { + t[i] = 1; + } + t[249] = 2; + for (int i = 250; i <= 255; i++) { + t[i] = (byte) (i - 247); + } + return t; + } + + /** + * Decodes a value using SQLite's variable-length integer encoding. + * + * @param bb buffer for reading bytes + * @return decoded value + * @throws IllegalArgumentException if encoded varint is longer than 9 bytes + */ + public static long readUnsigned(ByteBuffer bb) throws IllegalArgumentException { + int a0 = bb.get() & 0xFF; + + if (a0 <= 240) { + return a0; + } else if (a0 <= 248) { + int a1 = bb.get() & 0xFF; + return 240L + ((long) (a0 - 241) << 8) + a1; + } else if (a0 == 249) { + int a1 = bb.get() & 0xFF; + int a2 = bb.get() & 0xFF; + return 2288L + ((long) a1 << 8) + a2; + } else { + int bytes = a0 - 250 + 3; + return readSignificantBits(bb, bytes); + } + } + + /** + * Skips over a single varint in the buffer. + * + * @param bb buffer to advance + */ + public static void skipUnsigned(ByteBuffer bb) { + final int a0 = bb.get() & 0xFF; + if (a0 <= 240) { + return; + } + final int extra = VARINT_EXTRA_BYTES[a0]; + bb.position(bb.position() + extra); + } + + /** + * Decodes a value at an absolute position without advancing the buffer position. + * + * @param bb buffer for reading bytes + * @param pos absolute position in the buffer + * @return decoded value + */ + public static long readUnsigned(ByteBuffer bb, int pos) throws IllegalArgumentException { + int a0 = bb.get(pos) & 0xFF; + if (a0 <= 240) { + return a0; + } else if (a0 <= 248) { + int a1 = bb.get(pos + 1) & 0xFF; + return 240 + 256 * (a0 - 241) + a1; + } else if (a0 == 249) { + int a1 = bb.get(pos + 1) & 0xFF; + int a2 = bb.get(pos + 2) & 0xFF; + return 2288 + 256 * a1 + a2; + } else { + int bytes = a0 - 250 + 3; + return readSignificantBitsAbsolute(bb, pos + 1, bytes); + } + } + + private static final int[] FIRST_TO_LENGTH = buildFirstToLength(); + + private static int[] buildFirstToLength() { + int[] t = new int[256]; + for (int i = 0; i <= 240; i++) { + t[i] = 1; + } + for (int i = 241; i <= 248; i++) { + t[i] = 2; + } + t[249] = 3; + for (int i = 250; i <= 255; i++) { + t[i] = i - 246; + } + return t; + } + + /** + * Determines length of an encoded varint value by inspecting the first byte. + * + * @param a0 first byte of varint value + * @return total length in bytes + */ + public static int firstToLength(byte a0) { + return FIRST_TO_LENGTH[a0 & 0xFF]; + } + + /** + * Decodes a single element of a list of variable-length long values from a buffer. + * + * @param bb buffer for reading bytes + * @param index the element's index + * @return the decoded value + */ + public static long readListElementUnsigned(ByteBuffer bb, int index) { + int pos = 0; + for (int i = 0; i < index; i++) { + pos += firstToLength(bb.get(pos)); + } + return readUnsigned(bb, pos); + } + + /** + * Encodes multiple values using variable-length encoding into the given buffer. + * + * @param bb buffer for writing bytes + * @param values array with values to write + */ + public static void writeListUnsigned(final ByteBuffer bb, final long[] values) { + for (long value : values) { + writeUnsigned(bb, value); + } + } + + /** + * Decodes multiple values using variable-length encoding from the given buffer. + * + * @param bb buffer for reading bytes + * @param values array for the result values + */ + public static void readListUnsigned(ByteBuffer bb, long[] values) { + for (int i = 0; i < values.length; i++) { + values[i] = readUnsigned(bb); + } + } + + /** + * Decodes exactly 4 values (a quad) from the given buffer. + * + * @param bb buffer for reading bytes + * @param values array of length 4 for the result values + */ + public static void readQuadUnsigned(ByteBuffer bb, long[] values) { + values[0] = readUnsigned(bb); + values[1] = readUnsigned(bb); + values[2] = readUnsigned(bb); + values[3] = readUnsigned(bb); + } + + /** + * Decodes multiple values using variable-length encoding, placing each value into the position specified by the + * index map. + * + * @param bb buffer for reading bytes + * @param indexMap map for indexes of values within values array + * @param values array for the result values + */ + public static void readListUnsigned(ByteBuffer bb, int[] indexMap, long[] values) { + for (int i = 0; i < values.length; i++) { + values[indexMap[i]] = readUnsigned(bb); + } + } + + /** + * Decodes exactly 4 values (a quad) from the given buffer, placing each value at the index specified by the map. + * + * @param bb buffer for reading bytes + * @param indexMap map for indexes of values within values array + * @param values array of length 4 for the result values + */ + public static void readQuadUnsigned(ByteBuffer bb, int[] indexMap, long[] values) { + values[indexMap[0]] = readUnsigned(bb); + values[indexMap[1]] = readUnsigned(bb); + values[indexMap[2]] = readUnsigned(bb); + values[indexMap[3]] = readUnsigned(bb); + } + + /** + * Reads n significant bytes from the buffer in big-endian order (byte-by-byte, heap-safe). + */ + private static long readSignificantBits(ByteBuffer bb, int n) { + long value = 0; + for (int i = 0; i < n; i++) { + value = (value << 8) | (bb.get() & 0xFFL); + } + return value; + } + + /** + * Reads n significant bytes from the buffer at an absolute position in big-endian order. + */ + private static long readSignificantBitsAbsolute(ByteBuffer bb, int pos, int bytes) { + long value = 0; + for (int i = 0; i < bytes; i++) { + value = (value << 8) | (bb.get(pos + i) & 0xFFL); + } + return value; + } +} diff --git a/core/sail/s3/src/main/resources/META-INF/services/org.eclipse.rdf4j.sail.config.SailFactory b/core/sail/s3/src/main/resources/META-INF/services/org.eclipse.rdf4j.sail.config.SailFactory new file mode 100644 index 00000000000..599d4243971 --- /dev/null +++ b/core/sail/s3/src/main/resources/META-INF/services/org.eclipse.rdf4j.sail.config.SailFactory @@ -0,0 +1 @@ +org.eclipse.rdf4j.sail.s3.config.S3StoreFactory From e5379d58ca4621da824b2da7dc15d003762087a3 Mon Sep 17 00:00:00 2001 From: Chengxu Bian Date: Sun, 22 Feb 2026 20:28:19 -0500 Subject: [PATCH 02/10] feat: add SSTable format and S3 persistence (Phase 1c) Add persistence layer so MemTables flush to immutable SSTable files on S3-compatible storage. When s3Bucket is not configured the store stays in pure in-memory mode and all existing tests remain unaffected. Key additions: - ObjectStore interface with S3ObjectStore (MinIO) and FileSystemObjectStore (test double) - SSTableWriter/SSTable: binary format with block index for range scans - MergeIterator: K-way merge across MemTable + SSTables with deduplication and tombstone suppression - Manifest: versioned JSON manifest tracking SSTables on S3 - S3ValueStore and S3NamespaceStore serialization/deserialization - S3StoreConfig: S3 connectivity properties (bucket, endpoint, etc.) - S3SailStore: flush path, merged read path, startup loading - 29 new tests (unit + persistence); 541 total tests pass --- core/sail/s3/pom.xml | 16 + .../rdf4j/sail/s3/S3NamespaceStore.java | 39 ++ .../eclipse/rdf4j/sail/s3/S3SailStore.java | 253 +++++++++++-- .../eclipse/rdf4j/sail/s3/S3ValueStore.java | 163 +++++++++ .../rdf4j/sail/s3/config/S3StoreConfig.java | 120 +++++++ .../rdf4j/sail/s3/config/S3StoreSchema.java | 21 ++ .../rdf4j/sail/s3/storage/Manifest.java | 197 +++++++++++ .../rdf4j/sail/s3/storage/MemTable.java | 82 ++++- .../rdf4j/sail/s3/storage/MergeIterator.java | 143 ++++++++ .../rdf4j/sail/s3/storage/ObjectStore.java | 30 ++ .../rdf4j/sail/s3/storage/RawEntrySource.java | 26 ++ .../rdf4j/sail/s3/storage/S3ObjectStore.java | 146 ++++++++ .../rdf4j/sail/s3/storage/SSTable.java | 332 ++++++++++++++++++ .../rdf4j/sail/s3/storage/SSTableWriter.java | 176 ++++++++++ .../sail/s3/S3EvaluationStrategyTest.java | 23 ++ .../rdf4j/sail/s3/S3PersistenceMinioIT.java | 112 ++++++ .../rdf4j/sail/s3/S3PersistenceTest.java | 213 +++++++++++ .../rdf4j/sail/s3/S3SparqlOrderByTest.java | 24 ++ .../rdf4j/sail/s3/S3StoreConnectionTest.java | 26 ++ .../sail/s3/S3StoreIsolationLevelTest.java | 24 ++ .../rdf4j/sail/s3/S3StoreRepositoryTest.java | 24 ++ .../eclipse/rdf4j/sail/s3/S3StoreTest.java | 26 ++ .../s3/S3ValueStoreSerializationTest.java | 134 +++++++ .../s3/storage/FileSystemObjectStore.java | 108 ++++++ .../rdf4j/sail/s3/storage/ManifestTest.java | 87 +++++ .../sail/s3/storage/MergeIteratorTest.java | 178 ++++++++++ .../s3/storage/SSTableWriterReaderTest.java | 183 ++++++++++ 27 files changed, 2876 insertions(+), 30 deletions(-) create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Manifest.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MergeIterator.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ObjectStore.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/RawEntrySource.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/S3ObjectStore.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SSTable.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SSTableWriter.java create mode 100644 core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3EvaluationStrategyTest.java create mode 100644 core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceMinioIT.java create mode 100644 core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceTest.java create mode 100644 core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3SparqlOrderByTest.java create mode 100644 core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreConnectionTest.java create mode 100644 core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreIsolationLevelTest.java create mode 100644 core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreRepositoryTest.java create mode 100644 core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreTest.java create mode 100644 core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3ValueStoreSerializationTest.java create mode 100644 core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/FileSystemObjectStore.java create mode 100644 core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/ManifestTest.java create mode 100644 core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MergeIteratorTest.java create mode 100644 core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/SSTableWriterReaderTest.java diff --git a/core/sail/s3/pom.xml b/core/sail/s3/pom.xml index 481535c3268..601b0c3eb5e 100644 --- a/core/sail/s3/pom.xml +++ b/core/sail/s3/pom.xml @@ -48,6 +48,10 @@ com.google.guava guava + + com.fasterxml.jackson.core + jackson-databind + ${project.groupId} rdf4j-sail-testsuite @@ -71,5 +75,17 @@ junit-jupiter-params test + + org.testcontainers + testcontainers + ${testcontainers.version} + test + + + org.testcontainers + testcontainers-junit-jupiter + ${testcontainers.version} + test + diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3NamespaceStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3NamespaceStore.java index 2b1a114b066..02e85883117 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3NamespaceStore.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3NamespaceStore.java @@ -10,11 +10,17 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.s3; +import java.io.IOException; +import java.io.UncheckedIOException; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import org.eclipse.rdf4j.model.impl.SimpleNamespace; +import org.eclipse.rdf4j.sail.s3.storage.ObjectStore; + +import com.fasterxml.jackson.databind.ObjectMapper; /** * In-memory store for namespace prefix information. All operations are synchronized for thread safety. @@ -52,4 +58,37 @@ public synchronized Iterator iterator() { public synchronized void clear() { namespacesMap.clear(); } + + @SuppressWarnings("unchecked") + synchronized void deserialize(ObjectStore objectStore, ObjectMapper mapper) { + byte[] data = objectStore.get("namespaces/current"); + if (data == null) { + return; + } + try { + List> entries = mapper.readValue(data, List.class); + for (Map entry : entries) { + String prefix = entry.get("prefix"); + String name = entry.get("name"); + namespacesMap.put(prefix, new SimpleNamespace(prefix, name)); + } + } catch (IOException e) { + throw new UncheckedIOException("Failed to deserialize namespaces", e); + } + } + + synchronized void serialize(ObjectStore objectStore, ObjectMapper mapper) { + try { + List> entries = new java.util.ArrayList<>(); + for (SimpleNamespace ns : namespacesMap.values()) { + Map entry = new LinkedHashMap<>(); + entry.put("prefix", ns.getPrefix()); + entry.put("name", ns.getName()); + entries.add(entry); + } + objectStore.put("namespaces/current", mapper.writeValueAsBytes(entries)); + } catch (IOException e) { + throw new UncheckedIOException("Failed to serialize namespaces", e); + } + } } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java index c3791412f35..0c92e9359cb 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java @@ -10,12 +10,14 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.s3; +import java.io.IOException; import java.util.ArrayList; import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.Objects; import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantLock; import org.eclipse.rdf4j.common.iteration.CloseableIteration; @@ -40,18 +42,23 @@ import org.eclipse.rdf4j.sail.base.SailSource; import org.eclipse.rdf4j.sail.base.SailStore; import org.eclipse.rdf4j.sail.s3.config.S3StoreConfig; +import org.eclipse.rdf4j.sail.s3.storage.Manifest; import org.eclipse.rdf4j.sail.s3.storage.MemTable; +import org.eclipse.rdf4j.sail.s3.storage.MergeIterator; +import org.eclipse.rdf4j.sail.s3.storage.ObjectStore; import org.eclipse.rdf4j.sail.s3.storage.QuadIndex; +import org.eclipse.rdf4j.sail.s3.storage.RawEntrySource; +import org.eclipse.rdf4j.sail.s3.storage.S3ObjectStore; +import org.eclipse.rdf4j.sail.s3.storage.SSTable; +import org.eclipse.rdf4j.sail.s3.storage.SSTableWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.databind.ObjectMapper; + /** - * In-memory {@link SailStore} implementation that stores RDF quads in {@link MemTable}s. Each configured index - * permutation gets its own MemTable for efficient query patterns. - * - *

- * This is the Phase 1b in-memory-only implementation. Later phases will add SSTable persistence and S3 integration. - *

+ * {@link SailStore} implementation that stores RDF quads in {@link MemTable}s with optional persistence to + * S3-compatible object storage via SSTables. When S3 is not configured, operates in pure in-memory mode. */ class S3SailStore implements SailStore { @@ -60,17 +67,38 @@ class S3SailStore implements SailStore { private final S3ValueStore valueStore; private final S3NamespaceStore namespaceStore; private final List indexes; - private final List memTables; + private List memTables; private volatile boolean mayHaveInferred; + // Persistence fields (null when S3 is not configured) + private final ObjectStore objectStore; + private final ObjectMapper jsonMapper; + private Manifest manifest; + private final List> sstablesByIndex; // per-index list, newest first + private final AtomicLong epochCounter; + private final long memTableFlushSize; + /** * A lock to control concurrent access by {@link S3SailSink} to the stores. */ private final ReentrantLock sinkStoreAccessLock = new ReentrantLock(); S3SailStore(S3StoreConfig config) { + this(config, config.isS3Configured() + ? new S3ObjectStore(config.getS3Bucket(), config.getS3Endpoint(), config.getS3Region(), + config.getS3Prefix(), config.getS3AccessKey(), config.getS3SecretKey(), + config.isS3ForcePathStyle()) + : null); + } + + /** + * Package-private constructor for testing with a custom ObjectStore. + */ + S3SailStore(S3StoreConfig config, ObjectStore objectStore) { this.valueStore = new S3ValueStore(); this.namespaceStore = new S3NamespaceStore(); + this.objectStore = objectStore; + this.memTableFlushSize = config.getMemTableSize(); // Parse index specifications from config String indexSpec = config.getQuadIndexes(); @@ -84,11 +112,63 @@ class S3SailStore implements SailStore { } if (indexes.isEmpty()) { - // Fallback: always ensure at least one index QuadIndex defaultIndex = new QuadIndex("spoc"); indexes.add(defaultIndex); memTables.add(new MemTable(defaultIndex)); } + + // Initialize persistence + if (objectStore != null) { + this.jsonMapper = new ObjectMapper(); + this.manifest = Manifest.load(objectStore, jsonMapper); + this.epochCounter = new AtomicLong(computeMaxEpoch(manifest) + 1); + this.sstablesByIndex = new ArrayList<>(indexes.size()); + for (int i = 0; i < indexes.size(); i++) { + sstablesByIndex.add(new ArrayList<>()); + } + + // Deserialize value store and namespaces + if (manifest.getNextValueId() > 0) { + valueStore.deserialize(objectStore, manifest.getNextValueId()); + } + namespaceStore.deserialize(objectStore, jsonMapper); + + // Load existing SSTables from manifest + for (Manifest.SSTableInfo info : manifest.getSstables()) { + int idxPos = findIndexByName(info.getIndexName()); + if (idxPos >= 0) { + byte[] sstData = objectStore.get(info.getS3Key()); + if (sstData != null) { + SSTable sst = new SSTable(sstData, indexes.get(idxPos)); + sstablesByIndex.get(idxPos).add(sst); + } + } + } + } else { + this.jsonMapper = null; + this.manifest = null; + this.epochCounter = null; + this.sstablesByIndex = null; + } + } + + private static long computeMaxEpoch(Manifest manifest) { + long max = 0; + for (Manifest.SSTableInfo info : manifest.getSstables()) { + if (info.getEpoch() > max) { + max = info.getEpoch(); + } + } + return max; + } + + private int findIndexByName(String indexName) { + for (int i = 0; i < indexes.size(); i++) { + if (indexes.get(i).getFieldSeqString().equals(indexName)) { + return i; + } + } + return -1; } @Override @@ -113,6 +193,14 @@ public SailSource getInferredSailSource() { @Override public void close() throws SailException { + try { + if (objectStore != null) { + flushToObjectStore(); + objectStore.close(); + } + } catch (IOException e) { + throw new SailException(e); + } valueStore.close(); for (MemTable mt : memTables) { mt.clear(); @@ -135,6 +223,79 @@ private int getBestIndex(long subj, long pred, long obj, long context) { return bestIdx; } + /** + * Flushes active MemTables to SSTables on the object store. + */ + private void flushToObjectStore() { + if (objectStore == null) { + return; + } + + // Check if any MemTable has data + boolean hasMemTableData = false; + for (MemTable mt : memTables) { + if (mt.size() > 0) { + hasMemTableData = true; + break; + } + } + + long epoch = epochCounter.getAndIncrement(); + + List newInfos = new ArrayList<>(); + + if (hasMemTableData) { + // Freeze active MemTables and swap in fresh ones + List frozenTables = memTables; + List newTables = new ArrayList<>(indexes.size()); + for (int i = 0; i < indexes.size(); i++) { + frozenTables.get(i).freeze(); + newTables.add(new MemTable(indexes.get(i))); + } + memTables = newTables; + + // Write each frozen MemTable as an SSTable + for (int i = 0; i < indexes.size(); i++) { + MemTable frozen = frozenTables.get(i); + if (frozen.size() == 0) { + continue; + } + String indexName = indexes.get(i).getFieldSeqString(); + String s3Key = "sstables/L0-" + epoch + "-" + indexName + ".sst"; + + byte[] sstData = SSTableWriter.write(frozen); + objectStore.put(s3Key, sstData); + + SSTable sst = new SSTable(sstData, indexes.get(i)); + sstablesByIndex.get(i).add(0, sst); // prepend (newest first) + + newInfos.add(new Manifest.SSTableInfo( + s3Key, 0, indexName, + bytesToHex(sst.getMinKey()), bytesToHex(sst.getMaxKey()), + sst.getEntryCount(), epoch)); + } + } + + // Always persist value store and namespaces + valueStore.serialize(objectStore); + namespaceStore.serialize(objectStore, jsonMapper); + + // Update and save manifest + List allInfos = new ArrayList<>(newInfos); + allInfos.addAll(manifest.getSstables()); + manifest.setSstables(allInfos); + manifest.setNextValueId(valueStore.getNextId()); + manifest.save(objectStore, jsonMapper, epoch); + } + + private static String bytesToHex(byte[] bytes) { + StringBuilder sb = new StringBuilder(bytes.length * 2); + for (byte b : bytes) { + sb.append(String.format("%02x", b & 0xFF)); + } + return sb.toString(); + } + /** * Creates a statement iterator for the given pattern. */ @@ -191,12 +352,26 @@ CloseableIteration createStatementIterator( int bestIdx = getBestIndex(subjID, predID, objID, contextIDList.size() == 1 ? contextIDList.get(0) : S3ValueStore.UNKNOWN_ID); - MemTable bestTable = memTables.get(bestIdx); + + boolean hasSSTables = sstablesByIndex != null && !sstablesByIndex.get(bestIdx).isEmpty(); ArrayList> perContextIterList = new ArrayList<>(contextIDList.size()); for (long contextID : contextIDList) { - Iterator quads = bestTable.scan(subjID, predID, objID, contextID, explicit); + Iterator quads; + if (hasSSTables) { + // Build merged source: MemTable (newest) + SSTables (newest first) + List sources = new ArrayList<>(); + sources.add(memTables.get(bestIdx).asRawSource(subjID, predID, objID, contextID)); + for (SSTable sst : sstablesByIndex.get(bestIdx)) { + sources.add(sst.asRawSource(subjID, predID, objID, contextID)); + } + byte expectedFlag = explicit ? MemTable.FLAG_EXPLICIT : MemTable.FLAG_INFERRED; + quads = new MergeIterator(sources, indexes.get(bestIdx), expectedFlag, + subjID, predID, objID, contextID); + } else { + quads = memTables.get(bestIdx).scan(subjID, predID, objID, contextID, explicit); + } perContextIterList.add(new QuadToStatementIteration(quads, valueStore)); } @@ -257,8 +432,7 @@ public void prepare() throws SailException { public void flush() throws SailException { sinkStoreAccessLock.lock(); try { - // In-memory only: nothing to persist yet. - // In later phases this will flush MemTable to SSTable and upload to S3. + flushToObjectStore(); } finally { sinkStoreAccessLock.unlock(); } @@ -332,6 +506,17 @@ public void approveAll(Set approved, Set approvedContexts) mt.put(s, p, o, c, explicit); } } + + // Size-triggered flush + if (objectStore != null) { + long totalSize = 0; + for (MemTable mt : memTables) { + totalSize += mt.approximateSizeInBytes(); + } + if (totalSize >= memTableFlushSize) { + flushToObjectStore(); + } + } } finally { sinkStoreAccessLock.unlock(); } @@ -425,14 +610,28 @@ private long removeStatements(Resource subj, IRI pred, Value obj, boolean explic } } - // Use the first MemTable as the source of truth for scanning, then remove from all int bestIdx = getBestIndex(subjID, predID, objID, contextIds.length == 1 ? contextIds[0] : S3ValueStore.UNKNOWN_ID); MemTable scanTable = memTables.get(bestIdx); long removeCount = 0; for (long contextId : contextIds) { - Iterator iter = scanTable.scan(subjID, predID, objID, contextId, explicit); + // When SSTables exist, use merged iterator for remove scan + Iterator iter; + boolean hasSSTables = sstablesByIndex != null && !sstablesByIndex.get(bestIdx).isEmpty(); + if (hasSSTables) { + List sources = new ArrayList<>(); + sources.add(scanTable.asRawSource(subjID, predID, objID, contextId)); + for (SSTable sst : sstablesByIndex.get(bestIdx)) { + sources.add(sst.asRawSource(subjID, predID, objID, contextId)); + } + byte expectedFlag = explicit ? MemTable.FLAG_EXPLICIT : MemTable.FLAG_INFERRED; + iter = new MergeIterator(sources, indexes.get(bestIdx), expectedFlag, + subjID, predID, objID, contextId); + } else { + iter = scanTable.scan(subjID, predID, objID, contextId, explicit); + } + List toRemove = new ArrayList<>(); while (iter.hasNext()) { toRemove.add(iter.next()); @@ -462,7 +661,7 @@ private final class S3SailDataset implements SailDataset { @Override public void close() { - // no-op for in-memory implementation + // no-op } @Override @@ -478,8 +677,22 @@ public CloseableIteration getNamespaces() { @Override public CloseableIteration getContextIDs() throws SailException { // Scan all quads and collect distinct non-null contexts - MemTable table = memTables.get(0); - Iterator allQuads = table.scan(-1, -1, -1, -1, explicit); + // Use the merged read path (createStatementIterator covers this) + int bestIdx = 0; // use first index for full scan + boolean hasSSTables = sstablesByIndex != null && !sstablesByIndex.get(bestIdx).isEmpty(); + + Iterator allQuads; + if (hasSSTables) { + List sources = new ArrayList<>(); + sources.add(memTables.get(bestIdx).asRawSource(-1, -1, -1, -1)); + for (SSTable sst : sstablesByIndex.get(bestIdx)) { + sources.add(sst.asRawSource(-1, -1, -1, -1)); + } + byte expectedFlag = explicit ? MemTable.FLAG_EXPLICIT : MemTable.FLAG_INFERRED; + allQuads = new MergeIterator(sources, indexes.get(bestIdx), expectedFlag, -1, -1, -1, -1); + } else { + allQuads = memTables.get(bestIdx).scan(-1, -1, -1, -1, explicit); + } return new FilterIteration( new ConvertingIteration( @@ -531,9 +744,9 @@ public Comparator getComparator() { } /** - * Converts quad ID arrays from MemTable iteration into Statement objects by resolving IDs through the ValueStore. + * Converts quad ID arrays from iteration into Statement objects by resolving IDs through the ValueStore. */ - private static final class QuadToStatementIteration implements CloseableIteration { + static final class QuadToStatementIteration implements CloseableIteration { private final Iterator quads; private final S3ValueStore valueStore; @@ -560,7 +773,7 @@ public Statement next() { @Override public void close() { - // no-op: MemTable iterators don't hold resources + // no-op } } } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3ValueStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3ValueStore.java index 594d8fb1b8d..324b3f379f7 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3ValueStore.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3ValueStore.java @@ -10,11 +10,23 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.s3; +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicLong; +import org.eclipse.rdf4j.model.BNode; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Literal; import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.model.base.AbstractValueFactory; +import org.eclipse.rdf4j.sail.s3.storage.ObjectStore; +import org.eclipse.rdf4j.sail.s3.storage.Varint; /** * In-memory value store that maps RDF {@link Value} objects to long IDs and vice-versa. Uses {@link ConcurrentHashMap} @@ -80,6 +92,157 @@ public void clear() { nextId.set(1); } + /** + * Returns the next value ID that would be assigned. + */ + long getNextId() { + return nextId.get(); + } + + /** + * Serializes the value store to the object store. + */ + void serialize(ObjectStore objectStore) { + try { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(baos); + + int count = idToValue.size(); + // Write count as varint + ByteBuffer countBuf = ByteBuffer.allocate(9); + Varint.writeUnsigned(countBuf, count); + out.write(countBuf.array(), 0, countBuf.position()); + + for (Map.Entry entry : idToValue.entrySet()) { + long id = entry.getKey(); + Value val = entry.getValue(); + + // Write id as varint + ByteBuffer idBuf = ByteBuffer.allocate(9); + Varint.writeUnsigned(idBuf, id); + out.write(idBuf.array(), 0, idBuf.position()); + + if (val instanceof IRI) { + out.writeByte(0); // type = IRI + byte[] payload = val.stringValue().getBytes(StandardCharsets.UTF_8); + ByteBuffer lenBuf = ByteBuffer.allocate(9); + Varint.writeUnsigned(lenBuf, payload.length); + out.write(lenBuf.array(), 0, lenBuf.position()); + out.write(payload); + } else if (val instanceof Literal) { + out.writeByte(1); // type = Literal + Literal lit = (Literal) val; + byte[] label = lit.getLabel().getBytes(StandardCharsets.UTF_8); + byte[] dt = lit.getDatatype().stringValue().getBytes(StandardCharsets.UTF_8); + String langStr = lit.getLanguage().orElse(""); + byte[] lang = langStr.getBytes(StandardCharsets.UTF_8); + + ByteBuffer buf = ByteBuffer.allocate(9); + + buf.clear(); + Varint.writeUnsigned(buf, label.length); + out.write(buf.array(), 0, buf.position()); + out.write(label); + + buf.clear(); + Varint.writeUnsigned(buf, dt.length); + out.write(buf.array(), 0, buf.position()); + out.write(dt); + + buf.clear(); + Varint.writeUnsigned(buf, lang.length); + out.write(buf.array(), 0, buf.position()); + out.write(lang); + } else if (val instanceof BNode) { + out.writeByte(2); // type = BNode + byte[] payload = ((BNode) val).getID().getBytes(StandardCharsets.UTF_8); + ByteBuffer lenBuf = ByteBuffer.allocate(9); + Varint.writeUnsigned(lenBuf, payload.length); + out.write(lenBuf.array(), 0, lenBuf.position()); + out.write(payload); + } else { + throw new IllegalStateException("Unsupported value type: " + val.getClass()); + } + } + + out.flush(); + objectStore.put("values/current", baos.toByteArray()); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + /** + * Deserializes the value store from the object store. + */ + void deserialize(ObjectStore objectStore, long nextValueId) { + byte[] data = objectStore.get("values/current"); + if (data == null) { + return; + } + + try { + ByteBuffer bb = ByteBuffer.wrap(data); + long count = Varint.readUnsigned(bb); + + for (long i = 0; i < count; i++) { + long id = Varint.readUnsigned(bb); + int type = bb.get() & 0xFF; + + Value val; + switch (type) { + case 0: { // IRI + int len = (int) Varint.readUnsigned(bb); + byte[] payload = new byte[len]; + bb.get(payload); + val = createIRI(new String(payload, StandardCharsets.UTF_8)); + break; + } + case 1: { // Literal + int labelLen = (int) Varint.readUnsigned(bb); + byte[] labelBytes = new byte[labelLen]; + bb.get(labelBytes); + String label = new String(labelBytes, StandardCharsets.UTF_8); + + int dtLen = (int) Varint.readUnsigned(bb); + byte[] dtBytes = new byte[dtLen]; + bb.get(dtBytes); + String dt = new String(dtBytes, StandardCharsets.UTF_8); + + int langLen = (int) Varint.readUnsigned(bb); + byte[] langBytes = new byte[langLen]; + bb.get(langBytes); + String lang = new String(langBytes, StandardCharsets.UTF_8); + + IRI datatypeIRI = createIRI(dt); + if (!lang.isEmpty()) { + val = createLiteral(label, lang); + } else { + val = createLiteral(label, datatypeIRI); + } + break; + } + case 2: { // BNode + int len = (int) Varint.readUnsigned(bb); + byte[] payload = new byte[len]; + bb.get(payload); + val = createBNode(new String(payload, StandardCharsets.UTF_8)); + break; + } + default: + throw new IllegalStateException("Unknown value type: " + type); + } + + valueToId.put(val, id); + idToValue.put(id, val); + } + + nextId.set(nextValueId); + } catch (Exception e) { + throw new RuntimeException("Failed to deserialize value store", e); + } + } + /** * Closes the value store, releasing all resources. */ diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java index 3908f0b5958..d1c3d364efb 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java @@ -75,6 +75,20 @@ public class S3StoreConfig extends BaseSailConfig { private int valueIdCacheSize = -1; + private String s3Bucket; + + private String s3Endpoint; + + private String s3Region; + + private String s3Prefix; + + private String s3AccessKey; + + private String s3SecretKey; + + private boolean s3ForcePathStyle = true; + /*--------------* * Constructors * *--------------*/ @@ -164,6 +178,73 @@ public S3StoreConfig setValueIdCacheSize(int valueIdCacheSize) { return this; } + public String getS3Bucket() { + return s3Bucket; + } + + public S3StoreConfig setS3Bucket(String s3Bucket) { + this.s3Bucket = s3Bucket; + return this; + } + + public String getS3Endpoint() { + return s3Endpoint; + } + + public S3StoreConfig setS3Endpoint(String s3Endpoint) { + this.s3Endpoint = s3Endpoint; + return this; + } + + public String getS3Region() { + return s3Region != null ? s3Region : "us-east-1"; + } + + public S3StoreConfig setS3Region(String s3Region) { + this.s3Region = s3Region; + return this; + } + + public String getS3Prefix() { + return s3Prefix != null ? s3Prefix : ""; + } + + public S3StoreConfig setS3Prefix(String s3Prefix) { + this.s3Prefix = s3Prefix; + return this; + } + + public String getS3AccessKey() { + return s3AccessKey; + } + + public S3StoreConfig setS3AccessKey(String s3AccessKey) { + this.s3AccessKey = s3AccessKey; + return this; + } + + public String getS3SecretKey() { + return s3SecretKey; + } + + public S3StoreConfig setS3SecretKey(String s3SecretKey) { + this.s3SecretKey = s3SecretKey; + return this; + } + + public boolean isS3ForcePathStyle() { + return s3ForcePathStyle; + } + + public S3StoreConfig setS3ForcePathStyle(boolean s3ForcePathStyle) { + this.s3ForcePathStyle = s3ForcePathStyle; + return this; + } + + public boolean isS3Configured() { + return s3Bucket != null && !s3Bucket.isEmpty(); + } + @Override public Resource export(Model m) { Resource implNode = super.export(m); @@ -194,6 +275,25 @@ public Resource export(Model m) { if (valueIdCacheSize >= 0) { m.add(implNode, S3StoreSchema.VALUE_ID_CACHE_SIZE, vf.createLiteral(valueIdCacheSize)); } + if (s3Bucket != null) { + m.add(implNode, S3StoreSchema.S3_BUCKET, vf.createLiteral(s3Bucket)); + } + if (s3Endpoint != null) { + m.add(implNode, S3StoreSchema.S3_ENDPOINT, vf.createLiteral(s3Endpoint)); + } + if (s3Region != null) { + m.add(implNode, S3StoreSchema.S3_REGION, vf.createLiteral(s3Region)); + } + if (s3Prefix != null) { + m.add(implNode, S3StoreSchema.S3_PREFIX, vf.createLiteral(s3Prefix)); + } + if (s3AccessKey != null) { + m.add(implNode, S3StoreSchema.S3_ACCESS_KEY, vf.createLiteral(s3AccessKey)); + } + if (s3SecretKey != null) { + m.add(implNode, S3StoreSchema.S3_SECRET_KEY, vf.createLiteral(s3SecretKey)); + } + m.add(implNode, S3StoreSchema.S3_FORCE_PATH_STYLE, vf.createLiteral(s3ForcePathStyle)); return implNode; } @@ -273,6 +373,26 @@ public void parse(Model m, Resource implNode) throws SailConfigException { + " property, found " + lit); } }); + Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.S3_BUCKET, null)) + .ifPresent(lit -> setS3Bucket(lit.getLabel())); + + Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.S3_ENDPOINT, null)) + .ifPresent(lit -> setS3Endpoint(lit.getLabel())); + + Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.S3_REGION, null)) + .ifPresent(lit -> setS3Region(lit.getLabel())); + + Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.S3_PREFIX, null)) + .ifPresent(lit -> setS3Prefix(lit.getLabel())); + + Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.S3_ACCESS_KEY, null)) + .ifPresent(lit -> setS3AccessKey(lit.getLabel())); + + Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.S3_SECRET_KEY, null)) + .ifPresent(lit -> setS3SecretKey(lit.getLabel())); + + Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.S3_FORCE_PATH_STYLE, null)) + .ifPresent(lit -> setS3ForcePathStyle(lit.booleanValue())); } catch (ModelException e) { throw new SailConfigException(e.getMessage(), e); } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreSchema.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreSchema.java index 076cd1ba771..cc0d2f7730f 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreSchema.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreSchema.java @@ -64,6 +64,20 @@ public class S3StoreSchema { */ public final static IRI VALUE_ID_CACHE_SIZE; + public final static IRI S3_BUCKET; + + public final static IRI S3_ENDPOINT; + + public final static IRI S3_REGION; + + public final static IRI S3_PREFIX; + + public final static IRI S3_ACCESS_KEY; + + public final static IRI S3_SECRET_KEY; + + public final static IRI S3_FORCE_PATH_STYLE; + static { ValueFactory factory = SimpleValueFactory.getInstance(); QUAD_INDEXES = factory.createIRI(NAMESPACE, "quadIndexes"); @@ -74,5 +88,12 @@ public class S3StoreSchema { DISK_CACHE_PATH = factory.createIRI(NAMESPACE, "diskCachePath"); VALUE_CACHE_SIZE = factory.createIRI(NAMESPACE, "valueCacheSize"); VALUE_ID_CACHE_SIZE = factory.createIRI(NAMESPACE, "valueIdCacheSize"); + S3_BUCKET = factory.createIRI(NAMESPACE, "s3Bucket"); + S3_ENDPOINT = factory.createIRI(NAMESPACE, "s3Endpoint"); + S3_REGION = factory.createIRI(NAMESPACE, "s3Region"); + S3_PREFIX = factory.createIRI(NAMESPACE, "s3Prefix"); + S3_ACCESS_KEY = factory.createIRI(NAMESPACE, "s3AccessKey"); + S3_SECRET_KEY = factory.createIRI(NAMESPACE, "s3SecretKey"); + S3_FORCE_PATH_STYLE = factory.createIRI(NAMESPACE, "s3ForcePathStyle"); } } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Manifest.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Manifest.java new file mode 100644 index 00000000000..0e72137bcd3 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Manifest.java @@ -0,0 +1,197 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.ObjectMapper; + +/** + * JSON manifest tracking which SSTables exist in the object store. + * + *

S3 Layout

+ * + *
+ * manifest/current           -> plain text "v{epoch}.json"
+ * manifest/v{epoch}.json     -> JSON manifest
+ * sstables/L0-{epoch}-{indexName}.sst
+ * values/current             -> serialized value store
+ * namespaces/current         -> JSON namespace map
+ * 
+ */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class Manifest { + + @JsonProperty("version") + private int version = 1; + + @JsonProperty("nextValueId") + private long nextValueId; + + @JsonProperty("sstables") + private List sstables = new ArrayList<>(); + + public Manifest() { + } + + public int getVersion() { + return version; + } + + public void setVersion(int version) { + this.version = version; + } + + public long getNextValueId() { + return nextValueId; + } + + public void setNextValueId(long nextValueId) { + this.nextValueId = nextValueId; + } + + public List getSstables() { + return sstables; + } + + public void setSstables(List sstables) { + this.sstables = sstables; + } + + public static Manifest load(ObjectStore store, ObjectMapper mapper) { + byte[] pointer = store.get("manifest/current"); + if (pointer == null) { + return new Manifest(); + } + String manifestKey = "manifest/" + new String(pointer, StandardCharsets.UTF_8).trim(); + byte[] json = store.get(manifestKey); + if (json == null) { + return new Manifest(); + } + try { + return mapper.readValue(json, Manifest.class); + } catch (IOException e) { + throw new UncheckedIOException("Failed to parse manifest", e); + } + } + + public void save(ObjectStore store, ObjectMapper mapper, long epoch) { + try { + String versionedKey = "v" + epoch + ".json"; + byte[] json = mapper.writerWithDefaultPrettyPrinter().writeValueAsBytes(this); + store.put("manifest/" + versionedKey, json); + store.put("manifest/current", versionedKey.getBytes(StandardCharsets.UTF_8)); + } catch (IOException e) { + throw new UncheckedIOException("Failed to save manifest", e); + } + } + + @JsonIgnoreProperties(ignoreUnknown = true) + public static class SSTableInfo { + + @JsonProperty("s3Key") + private String s3Key; + + @JsonProperty("level") + private int level; + + @JsonProperty("indexName") + private String indexName; + + @JsonProperty("minKeyHex") + private String minKeyHex; + + @JsonProperty("maxKeyHex") + private String maxKeyHex; + + @JsonProperty("entryCount") + private long entryCount; + + @JsonProperty("epoch") + private long epoch; + + public SSTableInfo() { + } + + public SSTableInfo(String s3Key, int level, String indexName, String minKeyHex, String maxKeyHex, + long entryCount, long epoch) { + this.s3Key = s3Key; + this.level = level; + this.indexName = indexName; + this.minKeyHex = minKeyHex; + this.maxKeyHex = maxKeyHex; + this.entryCount = entryCount; + this.epoch = epoch; + } + + public String getS3Key() { + return s3Key; + } + + public void setS3Key(String s3Key) { + this.s3Key = s3Key; + } + + public int getLevel() { + return level; + } + + public void setLevel(int level) { + this.level = level; + } + + public String getIndexName() { + return indexName; + } + + public void setIndexName(String indexName) { + this.indexName = indexName; + } + + public String getMinKeyHex() { + return minKeyHex; + } + + public void setMinKeyHex(String minKeyHex) { + this.minKeyHex = minKeyHex; + } + + public String getMaxKeyHex() { + return maxKeyHex; + } + + public void setMaxKeyHex(String maxKeyHex) { + this.maxKeyHex = maxKeyHex; + } + + public long getEntryCount() { + return entryCount; + } + + public void setEntryCount(long entryCount) { + this.entryCount = entryCount; + } + + public long getEpoch() { + return epoch; + } + + public void setEpoch(long epoch) { + this.epoch = epoch; + } + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java index 098312ca278..d0ce15a7155 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java @@ -35,9 +35,9 @@ */ public class MemTable { - static final byte FLAG_TOMBSTONE = 0x00; - static final byte FLAG_EXPLICIT = 0x01; - static final byte FLAG_INFERRED = 0x02; + public static final byte FLAG_TOMBSTONE = 0x00; + public static final byte FLAG_EXPLICIT = 0x01; + public static final byte FLAG_INFERRED = 0x02; private static final byte[] VALUE_EXPLICIT = new byte[] { FLAG_EXPLICIT }; private static final byte[] VALUE_INFERRED = new byte[] { FLAG_INFERRED }; @@ -135,7 +135,7 @@ public Iterator scan(long s, long p, long o, long c, boolean explicit) { ConcurrentNavigableMap range = data.subMap(minKey, true, maxKey, true); - return new ScanIterator(range, index, expectedFlag); + return new ScanIterator(range, index, expectedFlag, s, p, o, c); } /** @@ -200,6 +200,53 @@ public Map getData() { return Collections.unmodifiableMap(data); } + /** + * Returns a {@link RawEntrySource} over the given key range. Includes tombstones (no flag filtering). Used by + * {@link MergeIterator}. + */ + public RawEntrySource asRawSource(long s, long p, long o, long c) { + byte[] minKey = index.getMinKeyBytes(s, p, o, c); + byte[] maxKey = index.getMaxKeyBytes(s, p, o, c); + ConcurrentNavigableMap range = data.subMap(minKey, true, maxKey, true); + return new RawSourceImpl(range); + } + + private static class RawSourceImpl implements RawEntrySource { + private final Iterator> delegate; + private Map.Entry current; + + RawSourceImpl(ConcurrentNavigableMap range) { + this.delegate = range.entrySet().iterator(); + if (delegate.hasNext()) { + current = delegate.next(); + } + } + + @Override + public boolean hasNext() { + return current != null; + } + + @Override + public byte[] peekKey() { + return current.getKey(); + } + + @Override + public byte peekFlag() { + return current.getValue()[0]; + } + + @Override + public void advance() { + if (delegate.hasNext()) { + current = delegate.next(); + } else { + current = null; + } + } + } + private void checkNotFrozen() { if (frozen.get()) { throw new IllegalStateException("MemTable is frozen and cannot accept writes"); @@ -207,18 +254,25 @@ private void checkNotFrozen() { } /** - * Iterator that filters range scan results by flag value and skips tombstones. Returns quads in SPOC order. + * Iterator that filters range scan results by flag value and pattern match. Skips tombstones and entries where + * bound components don't match the query pattern. Returns quads in SPOC order. */ private static class ScanIterator implements Iterator { private final Iterator> delegate; private final QuadIndex quadIndex; private final byte expectedFlag; + private final long patternS, patternP, patternO, patternC; private long[] next; - ScanIterator(ConcurrentNavigableMap range, QuadIndex quadIndex, byte expectedFlag) { + ScanIterator(ConcurrentNavigableMap range, QuadIndex quadIndex, byte expectedFlag, + long s, long p, long o, long c) { this.delegate = range.entrySet().iterator(); this.quadIndex = quadIndex; this.expectedFlag = expectedFlag; + this.patternS = s; + this.patternP = p; + this.patternO = o; + this.patternC = c; advance(); } @@ -227,11 +281,19 @@ private void advance() { while (delegate.hasNext()) { Map.Entry entry = delegate.next(); byte flag = entry.getValue()[0]; - if (flag == expectedFlag) { - next = new long[4]; - quadIndex.keyToQuad(entry.getKey(), next); - return; + if (flag != expectedFlag) { + continue; + } + long[] quad = new long[4]; + quadIndex.keyToQuad(entry.getKey(), quad); + if ((patternS >= 0 && quad[QuadIndex.SUBJ_IDX] != patternS) + || (patternP >= 0 && quad[QuadIndex.PRED_IDX] != patternP) + || (patternO >= 0 && quad[QuadIndex.OBJ_IDX] != patternO) + || (patternC >= 0 && quad[QuadIndex.CONTEXT_IDX] != patternC)) { + continue; } + next = quad; + return; } } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MergeIterator.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MergeIterator.java new file mode 100644 index 00000000000..f94893f7834 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MergeIterator.java @@ -0,0 +1,143 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.PriorityQueue; + +/** + * K-way merge iterator over multiple {@link RawEntrySource} instances ordered newest-to-oldest. Deduplicates entries + * with the same key (newest wins), suppresses tombstones, and filters by expected flag and pattern match. + */ +public class MergeIterator implements Iterator { + + private final QuadIndex quadIndex; + private final byte expectedFlag; + private final long patternS, patternP, patternO, patternC; + private final PriorityQueue heap; + private long[] next; + + /** + * @param sources list of sources ordered newest-to-oldest (index 0 = newest) + * @param quadIndex the quad index for decoding keys + * @param expectedFlag the flag to match (FLAG_EXPLICIT or FLAG_INFERRED) + * @param s subject pattern, or -1 for wildcard + * @param p predicate pattern, or -1 for wildcard + * @param o object pattern, or -1 for wildcard + * @param c context pattern, or -1 for wildcard + */ + public MergeIterator(List sources, QuadIndex quadIndex, byte expectedFlag, + long s, long p, long o, long c) { + this.quadIndex = quadIndex; + this.expectedFlag = expectedFlag; + this.patternS = s; + this.patternP = p; + this.patternO = o; + this.patternC = c; + this.heap = new PriorityQueue<>(); + + for (int i = 0; i < sources.size(); i++) { + RawEntrySource src = sources.get(i); + if (src.hasNext()) { + heap.add(new SourceCursor(src, i)); + } + } + + advance(); + } + + private void advance() { + next = null; + while (!heap.isEmpty()) { + // Pop minimum key + SourceCursor min = heap.poll(); + byte[] winningKey = min.source.peekKey().clone(); + byte winningFlag = min.source.peekFlag(); + + // Advance the winning source + min.source.advance(); + if (min.source.hasNext()) { + heap.add(min); + } + + // Drain all sources with the same key (deduplication) + while (!heap.isEmpty() && Arrays.compareUnsigned(heap.peek().source.peekKey(), winningKey) == 0) { + SourceCursor dup = heap.poll(); + dup.source.advance(); + if (dup.source.hasNext()) { + heap.add(dup); + } + } + + // Tombstone suppression + if (winningFlag == MemTable.FLAG_TOMBSTONE) { + continue; + } + + // Flag filter + if (winningFlag != expectedFlag) { + continue; + } + + // Decode key and verify pattern + long[] quad = new long[4]; + quadIndex.keyToQuad(winningKey, quad); + + if ((patternS >= 0 && quad[QuadIndex.SUBJ_IDX] != patternS) + || (patternP >= 0 && quad[QuadIndex.PRED_IDX] != patternP) + || (patternO >= 0 && quad[QuadIndex.OBJ_IDX] != patternO) + || (patternC >= 0 && quad[QuadIndex.CONTEXT_IDX] != patternC)) { + continue; + } + + next = quad; + return; + } + } + + @Override + public boolean hasNext() { + return next != null; + } + + @Override + public long[] next() { + if (next == null) { + throw new NoSuchElementException(); + } + long[] result = next; + advance(); + return result; + } + + private static class SourceCursor implements Comparable { + final RawEntrySource source; + final int sourceIndex; // lower = newer + + SourceCursor(RawEntrySource source, int sourceIndex) { + this.source = source; + this.sourceIndex = sourceIndex; + } + + @Override + public int compareTo(SourceCursor other) { + int keyCmp = Arrays.compareUnsigned(this.source.peekKey(), other.source.peekKey()); + if (keyCmp != 0) { + return keyCmp; + } + // Ties broken by source index: lower = newer = wins (poll first) + return Integer.compare(this.sourceIndex, other.sourceIndex); + } + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ObjectStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ObjectStore.java new file mode 100644 index 00000000000..b62924d4554 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ObjectStore.java @@ -0,0 +1,30 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.io.Closeable; +import java.util.List; + +/** + * Abstraction over object storage (S3-compatible or filesystem). + */ +public interface ObjectStore extends Closeable { + + void put(String key, byte[] data); + + byte[] get(String key); + + byte[] getRange(String key, long offset, long length); + + void delete(String key); + + List list(String subPrefix); +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/RawEntrySource.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/RawEntrySource.java new file mode 100644 index 00000000000..4a77d548753 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/RawEntrySource.java @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +/** + * A source of raw key/flag entries for the {@link MergeIterator}. Both {@link MemTable} and {@link SSTable} expose this + * interface over a key range. + */ +public interface RawEntrySource { + + boolean hasNext(); + + byte[] peekKey(); + + byte peekFlag(); + + void advance(); +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/S3ObjectStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/S3ObjectStore.java new file mode 100644 index 00000000000..3e9bff4591b --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/S3ObjectStore.java @@ -0,0 +1,146 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.UncheckedIOException; +import java.util.ArrayList; +import java.util.List; + +import io.minio.GetObjectArgs; +import io.minio.ListObjectsArgs; +import io.minio.MinioClient; +import io.minio.PutObjectArgs; +import io.minio.RemoveObjectArgs; +import io.minio.Result; +import io.minio.errors.ErrorResponseException; +import io.minio.messages.Item; + +/** + * {@link ObjectStore} implementation backed by an S3-compatible service via MinIO client. + */ +public class S3ObjectStore implements ObjectStore { + + private final MinioClient client; + private final String bucket; + private final String prefix; + + public S3ObjectStore(String bucket, String endpoint, String region, String prefix, + String accessKey, String secretKey, boolean forcePathStyle) { + this.bucket = bucket; + this.prefix = (prefix != null && !prefix.isEmpty() && !prefix.endsWith("/")) ? prefix + "/" + : (prefix != null ? prefix : ""); + + MinioClient.Builder builder = MinioClient.builder() + .endpoint(endpoint) + .credentials(accessKey, secretKey) + .region(region); + this.client = builder.build(); + } + + private String resolve(String key) { + return prefix + key; + } + + @Override + public void put(String key, byte[] data) { + try { + ByteArrayInputStream bais = new ByteArrayInputStream(data); + client.putObject(PutObjectArgs.builder() + .bucket(bucket) + .object(resolve(key)) + .stream(bais, data.length, -1) + .build()); + } catch (Exception e) { + throw new UncheckedIOException(new IOException("Failed to put " + key, e)); + } + } + + @Override + public byte[] get(String key) { + try (InputStream is = client.getObject(GetObjectArgs.builder() + .bucket(bucket) + .object(resolve(key)) + .build())) { + return is.readAllBytes(); + } catch (ErrorResponseException e) { + if ("NoSuchKey".equals(e.errorResponse().code())) { + return null; + } + throw new UncheckedIOException(new IOException("Failed to get " + key, e)); + } catch (Exception e) { + throw new UncheckedIOException(new IOException("Failed to get " + key, e)); + } + } + + @Override + public byte[] getRange(String key, long offset, long length) { + try (InputStream is = client.getObject(GetObjectArgs.builder() + .bucket(bucket) + .object(resolve(key)) + .offset(offset) + .length(length) + .build())) { + return is.readAllBytes(); + } catch (ErrorResponseException e) { + if ("NoSuchKey".equals(e.errorResponse().code())) { + return null; + } + throw new UncheckedIOException(new IOException("Failed to getRange " + key, e)); + } catch (Exception e) { + throw new UncheckedIOException(new IOException("Failed to getRange " + key, e)); + } + } + + @Override + public void delete(String key) { + try { + client.removeObject(RemoveObjectArgs.builder() + .bucket(bucket) + .object(resolve(key)) + .build()); + } catch (Exception e) { + throw new UncheckedIOException(new IOException("Failed to delete " + key, e)); + } + } + + @Override + public List list(String subPrefix) { + List keys = new ArrayList<>(); + String fullPrefix = resolve(subPrefix); + Iterable> results = client.listObjects(ListObjectsArgs.builder() + .bucket(bucket) + .prefix(fullPrefix) + .recursive(true) + .build()); + try { + for (Result result : results) { + String objectKey = result.get().objectName(); + // Strip the store prefix to return relative keys + if (objectKey.startsWith(prefix)) { + keys.add(objectKey.substring(prefix.length())); + } else { + keys.add(objectKey); + } + } + } catch (Exception e) { + throw new UncheckedIOException(new IOException("Failed to list " + subPrefix, e)); + } + return keys; + } + + @Override + public void close() { + // MinioClient doesn't need explicit close + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SSTable.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SSTable.java new file mode 100644 index 00000000000..984617245c2 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SSTable.java @@ -0,0 +1,332 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Iterator; +import java.util.NoSuchElementException; + +/** + * Reads and queries an immutable SSTable from its binary representation. The entire SSTable byte[] is held in memory + * (Phase 1c). The block index enables binary search to find the starting block for range scans. + */ +public class SSTable { + + private final byte[] raw; + private final QuadIndex quadIndex; + + // Parsed from footer + private final long blockIndexOffset; + private final int blockIndexLength; + private final long statsOffset; + private final int statsLength; + + // Parsed from block index + private final int blockCount; + private final byte[][] blockFirstKeys; + private final long[] blockOffsets; + private final int[] blockLengths; + + // Parsed from stats + private final byte[] minKey; + private final byte[] maxKey; + private final long entryCount; + + public SSTable(byte[] raw, QuadIndex quadIndex) { + this.raw = raw; + this.quadIndex = quadIndex; + + // Parse footer (last 32 bytes) + ByteBuffer footer = ByteBuffer.wrap(raw, raw.length - SSTableWriter.FOOTER_SIZE, SSTableWriter.FOOTER_SIZE); + int magic = footer.getInt(); + if (magic != SSTableWriter.MAGIC) { + throw new IllegalArgumentException("Invalid SSTable magic: 0x" + Integer.toHexString(magic)); + } + int version = footer.getInt(); + if (version != SSTableWriter.VERSION) { + throw new IllegalArgumentException("Unsupported SSTable version: " + version); + } + this.blockIndexOffset = footer.getLong(); + this.blockIndexLength = footer.getInt(); + this.statsOffset = footer.getLong(); + this.statsLength = footer.getInt(); + + // Parse block index + ByteBuffer biBuffer = ByteBuffer.wrap(raw, (int) blockIndexOffset, blockIndexLength); + this.blockCount = biBuffer.getInt(); + this.blockFirstKeys = new byte[blockCount][]; + this.blockOffsets = new long[blockCount]; + this.blockLengths = new int[blockCount]; + for (int i = 0; i < blockCount; i++) { + int keyLen = (int) Varint.readUnsigned(biBuffer); + blockFirstKeys[i] = new byte[keyLen]; + biBuffer.get(blockFirstKeys[i]); + blockOffsets[i] = biBuffer.getLong(); + blockLengths[i] = biBuffer.getInt(); + } + + // Parse stats + ByteBuffer statsBuffer = ByteBuffer.wrap(raw, (int) statsOffset, statsLength); + int minKeyLen = (int) Varint.readUnsigned(statsBuffer); + this.minKey = new byte[minKeyLen]; + statsBuffer.get(this.minKey); + int maxKeyLen = (int) Varint.readUnsigned(statsBuffer); + this.maxKey = new byte[maxKeyLen]; + statsBuffer.get(this.maxKey); + this.entryCount = statsBuffer.getLong(); + } + + public byte[] getMinKey() { + return minKey; + } + + public byte[] getMaxKey() { + return maxKey; + } + + public long getEntryCount() { + return entryCount; + } + + /** + * Scans for matching quads, filtering by flag (explicit/inferred) and pattern. Same contract as + * {@link MemTable#scan(long, long, long, long, boolean)}. + */ + public Iterator scan(long s, long p, long o, long c, boolean explicit) { + byte expectedFlag = explicit ? MemTable.FLAG_EXPLICIT : MemTable.FLAG_INFERRED; + byte[] scanMinKey = quadIndex.getMinKeyBytes(s, p, o, c); + byte[] scanMaxKey = quadIndex.getMaxKeyBytes(s, p, o, c); + int startBlock = findStartBlock(scanMinKey); + + return new ScanIterator(startBlock, scanMinKey, scanMaxKey, expectedFlag, s, p, o, c); + } + + /** + * Returns a {@link RawEntrySource} over the given key range. Includes tombstones (no flag filtering). Used by + * {@link MergeIterator}. + */ + public RawEntrySource asRawSource(long s, long p, long o, long c) { + byte[] scanMinKey = quadIndex.getMinKeyBytes(s, p, o, c); + byte[] scanMaxKey = quadIndex.getMaxKeyBytes(s, p, o, c); + int startBlock = findStartBlock(scanMinKey); + + return new RawSourceImpl(startBlock, scanMinKey, scanMaxKey); + } + + /** + * Binary search to find the block that could contain the given key. + */ + private int findStartBlock(byte[] targetKey) { + int lo = 0, hi = blockCount - 1; + int result = 0; + while (lo <= hi) { + int mid = (lo + hi) >>> 1; + int cmp = Arrays.compareUnsigned(blockFirstKeys[mid], targetKey); + if (cmp <= 0) { + result = mid; + lo = mid + 1; + } else { + hi = mid - 1; + } + } + return result; + } + + /** + * Reads the next entry from the data region at the given position. + * + * @return the position after the entry, or -1 if we've exceeded the data region + */ + private int readEntry(int pos, byte[][] keyOut, byte[] flagOut) { + if (pos >= blockIndexOffset) { + return -1; + } + ByteBuffer bb = ByteBuffer.wrap(raw, pos, (int) blockIndexOffset - pos); + int keyLen = (int) Varint.readUnsigned(bb); + int newPos = pos + (bb.position() - 0) + keyLen + 1; + // Recalculate from actual buffer position + int absKeyStart = pos + (bb.position() - (int) 0); + + // Re-read properly + bb = ByteBuffer.wrap(raw, pos, (int) blockIndexOffset - pos); + keyLen = (int) Varint.readUnsigned(bb); + byte[] key = new byte[keyLen]; + bb.get(key); + byte flag = bb.get(); + + keyOut[0] = key; + flagOut[0] = flag; + return pos + Varint.calcLengthUnsigned(keyLen) + keyLen + 1; + } + + private class ScanIterator implements Iterator { + private int pos; + private final byte[] scanMaxKey; + private final byte expectedFlag; + private final long patternS, patternP, patternO, patternC; + private long[] next; + private final byte[][] keyBuf = new byte[1][]; + private final byte[] flagBuf = new byte[1]; + + ScanIterator(int startBlock, byte[] scanMinKey, byte[] scanMaxKey, byte expectedFlag, + long s, long p, long o, long c) { + this.pos = (int) blockOffsets[startBlock]; + this.scanMaxKey = scanMaxKey; + this.expectedFlag = expectedFlag; + this.patternS = s; + this.patternP = p; + this.patternO = o; + this.patternC = c; + + // Skip entries before scanMinKey + skipToMinKey(scanMinKey); + advance(); + } + + private void skipToMinKey(byte[] scanMinKey) { + while (pos < blockIndexOffset) { + int savedPos = pos; + int nextPos = readEntry(pos, keyBuf, flagBuf); + if (nextPos < 0) { + break; + } + if (Arrays.compareUnsigned(keyBuf[0], scanMinKey) >= 0) { + pos = savedPos; // revert - this entry is in range + return; + } + pos = nextPos; + } + } + + private void advance() { + next = null; + while (pos < blockIndexOffset) { + int nextPos = readEntry(pos, keyBuf, flagBuf); + if (nextPos < 0) { + break; + } + pos = nextPos; + + byte[] key = keyBuf[0]; + byte flag = flagBuf[0]; + + // Past max key? + if (Arrays.compareUnsigned(key, scanMaxKey) > 0) { + pos = (int) blockIndexOffset; // done + return; + } + + if (flag != expectedFlag) { + continue; + } + + long[] quad = new long[4]; + quadIndex.keyToQuad(key, quad); + + if ((patternS >= 0 && quad[QuadIndex.SUBJ_IDX] != patternS) + || (patternP >= 0 && quad[QuadIndex.PRED_IDX] != patternP) + || (patternO >= 0 && quad[QuadIndex.OBJ_IDX] != patternO) + || (patternC >= 0 && quad[QuadIndex.CONTEXT_IDX] != patternC)) { + continue; + } + + next = quad; + return; + } + } + + @Override + public boolean hasNext() { + return next != null; + } + + @Override + public long[] next() { + if (next == null) { + throw new NoSuchElementException(); + } + long[] result = next; + advance(); + return result; + } + } + + private class RawSourceImpl implements RawEntrySource { + private int pos; + private final byte[] scanMaxKey; + private byte[] currentKey; + private byte currentFlag; + private boolean valid; + private final byte[][] keyBuf = new byte[1][]; + private final byte[] flagBuf = new byte[1]; + + RawSourceImpl(int startBlock, byte[] scanMinKey, byte[] scanMaxKey) { + this.pos = (int) blockOffsets[startBlock]; + this.scanMaxKey = scanMaxKey; + skipToMinKey(scanMinKey); + } + + private void skipToMinKey(byte[] scanMinKey) { + while (pos < blockIndexOffset) { + int savedPos = pos; + int nextPos = readEntry(pos, keyBuf, flagBuf); + if (nextPos < 0) { + valid = false; + return; + } + if (Arrays.compareUnsigned(keyBuf[0], scanMinKey) >= 0) { + currentKey = keyBuf[0]; + currentFlag = flagBuf[0]; + pos = nextPos; + valid = Arrays.compareUnsigned(currentKey, scanMaxKey) <= 0; + return; + } + pos = nextPos; + } + valid = false; + } + + @Override + public boolean hasNext() { + return valid; + } + + @Override + public byte[] peekKey() { + return currentKey; + } + + @Override + public byte peekFlag() { + return currentFlag; + } + + @Override + public void advance() { + if (pos >= blockIndexOffset) { + valid = false; + return; + } + int nextPos = readEntry(pos, keyBuf, flagBuf); + if (nextPos < 0) { + valid = false; + return; + } + pos = nextPos; + currentKey = keyBuf[0]; + currentFlag = flagBuf[0]; + if (Arrays.compareUnsigned(currentKey, scanMaxKey) > 0) { + valid = false; + } + } + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SSTableWriter.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SSTableWriter.java new file mode 100644 index 00000000000..6bcc272b7cf --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SSTableWriter.java @@ -0,0 +1,176 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * Serializes a frozen {@link MemTable} into SSTable binary format. + * + *

Format

+ * + *
+ * [DATA BLOCKS]
+ *   Per entry: [key_length varint][key bytes][flag 1 byte]
+ *   Block boundary when cumulative size exceeds blockSize
+ *
+ * [BLOCK INDEX]
+ *   [block_count: 4-byte int BE]
+ *   Per block: [first_key_length varint][first_key bytes][offset: 8-byte long BE][length: 4-byte int BE]
+ *
+ * [STATS]
+ *   [min_key_length varint][min_key bytes]
+ *   [max_key_length varint][max_key bytes]
+ *   [entry_count: 8-byte long BE]
+ *
+ * [FOOTER: 32 bytes]
+ *   [magic: 4 bytes = 0x53535431 "SST1"]
+ *   [version: 4 bytes = 1]
+ *   [block_index_offset: 8-byte long BE]
+ *   [block_index_length: 4-byte int BE]
+ *   [stats_offset: 8-byte long BE]
+ *   [stats_length: 4-byte int BE]
+ * 
+ */ +public class SSTableWriter { + + static final int MAGIC = 0x53535431; // "SST1" + static final int VERSION = 1; + static final int FOOTER_SIZE = 32; + static final int DEFAULT_BLOCK_SIZE = 4 * 1024 * 1024; // 4 MiB + + public static byte[] write(MemTable memTable) { + return write(memTable, DEFAULT_BLOCK_SIZE); + } + + public static byte[] write(MemTable memTable, int blockSize) { + try { + Map data = memTable.getData(); + if (data.isEmpty()) { + throw new IllegalArgumentException("Cannot write empty MemTable to SSTable"); + } + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(baos); + + // Track block boundaries + List blocks = new ArrayList<>(); + byte[] firstKeyInBlock = null; + long blockStartOffset = 0; + long currentBlockSize = 0; + + byte[] minKey = null; + byte[] maxKey = null; + long entryCount = 0; + + for (Map.Entry entry : data.entrySet()) { + byte[] key = entry.getKey(); + byte flag = entry.getValue()[0]; + + if (minKey == null) { + minKey = key; + } + maxKey = key; + entryCount++; + + // Start a new block if needed + if (firstKeyInBlock == null) { + firstKeyInBlock = key; + blockStartOffset = baos.size(); + currentBlockSize = 0; + } + + // Write entry: [key_length varint][key bytes][flag 1 byte] + writeVarint(out, key.length); + out.write(key); + out.write(flag); + currentBlockSize += varintLength(key.length) + key.length + 1; + + // Check block boundary + if (currentBlockSize >= blockSize) { + long blockEnd = baos.size(); + blocks.add(new BlockInfo(firstKeyInBlock, blockStartOffset, (int) (blockEnd - blockStartOffset))); + firstKeyInBlock = null; + currentBlockSize = 0; + } + } + + // Finalize last block + if (firstKeyInBlock != null) { + long blockEnd = baos.size(); + blocks.add(new BlockInfo(firstKeyInBlock, blockStartOffset, (int) (blockEnd - blockStartOffset))); + } + + // Write block index + long blockIndexOffset = baos.size(); + out.writeInt(blocks.size()); + for (BlockInfo block : blocks) { + writeVarint(out, block.firstKey.length); + out.write(block.firstKey); + out.writeLong(block.offset); + out.writeInt(block.length); + } + int blockIndexLength = (int) (baos.size() - blockIndexOffset); + + // Write stats + long statsOffset = baos.size(); + writeVarint(out, minKey.length); + out.write(minKey); + writeVarint(out, maxKey.length); + out.write(maxKey); + out.writeLong(entryCount); + int statsLength = (int) (baos.size() - statsOffset); + + // Write footer (32 bytes) + out.writeInt(MAGIC); + out.writeInt(VERSION); + out.writeLong(blockIndexOffset); + out.writeInt(blockIndexLength); + out.writeLong(statsOffset); + out.writeInt(statsLength); + + out.flush(); + return baos.toByteArray(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + private static void writeVarint(DataOutputStream out, int value) throws IOException { + // Simple varint for lengths (always non-negative int) + ByteBuffer bb = ByteBuffer.allocate(5); + Varint.writeUnsigned(bb, value); + out.write(bb.array(), 0, bb.position()); + } + + private static int varintLength(int value) { + return Varint.calcLengthUnsigned(value); + } + + private static class BlockInfo { + final byte[] firstKey; + final long offset; + final int length; + + BlockInfo(byte[] firstKey, long offset, int length) { + this.firstKey = firstKey; + this.offset = offset; + this.length = length; + } + } +} diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3EvaluationStrategyTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3EvaluationStrategyTest.java new file mode 100644 index 00000000000..ba4920ac6b8 --- /dev/null +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3EvaluationStrategyTest.java @@ -0,0 +1,23 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3; + +import org.eclipse.rdf4j.sail.base.config.BaseSailConfig; +import org.eclipse.rdf4j.sail.s3.config.S3StoreConfig; +import org.eclipse.rdf4j.testsuite.sail.EvaluationStrategyTest; + +public class S3EvaluationStrategyTest extends EvaluationStrategyTest { + + @Override + protected BaseSailConfig getBaseSailConfig() { + return new S3StoreConfig(); + } +} diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceMinioIT.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceMinioIT.java new file mode 100644 index 00000000000..5b312053362 --- /dev/null +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceMinioIT.java @@ -0,0 +1,112 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3; + +import static org.junit.jupiter.api.Assertions.*; + +import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.sail.s3.config.S3StoreConfig; +import org.eclipse.rdf4j.sail.s3.storage.S3ObjectStore; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; + +import io.minio.BucketExistsArgs; +import io.minio.MakeBucketArgs; +import io.minio.MinioClient; + +/** + * Integration test for S3 persistence using a real MinIO container via Testcontainers. Suffixed IT so it runs with + * {@code mvn verify} (Failsafe), not {@code mvn test}. + */ +@Testcontainers +class S3PersistenceMinioIT { + + private static final String BUCKET = "test-bucket"; + private static final String ACCESS_KEY = "minioadmin"; + private static final String SECRET_KEY = "minioadmin"; + private static final ValueFactory VF = SimpleValueFactory.getInstance(); + + @Container + static final GenericContainer MINIO = new GenericContainer<>("minio/minio:latest") + .withExposedPorts(9000) + .withEnv("MINIO_ROOT_USER", ACCESS_KEY) + .withEnv("MINIO_ROOT_PASSWORD", SECRET_KEY) + .withCommand("server", "/data"); + + private static String endpoint; + + @BeforeAll + static void createBucket() throws Exception { + endpoint = "http://" + MINIO.getHost() + ":" + MINIO.getMappedPort(9000); + MinioClient client = MinioClient.builder() + .endpoint(endpoint) + .credentials(ACCESS_KEY, SECRET_KEY) + .build(); + if (!client.bucketExists(BucketExistsArgs.builder().bucket(BUCKET).build())) { + client.makeBucket(MakeBucketArgs.builder().bucket(BUCKET).build()); + } + } + + private S3ObjectStore createStore(String prefix) { + return new S3ObjectStore(BUCKET, endpoint, "us-east-1", prefix, ACCESS_KEY, SECRET_KEY, true); + } + + @Test + void writeFlushShutdownRestart() throws Exception { + String prefix = "test-" + System.nanoTime() + "/"; + + IRI s = VF.createIRI("http://example.org/s1"); + IRI p = VF.createIRI("http://example.org/p1"); + IRI o = VF.createIRI("http://example.org/o1"); + + // Write and flush + { + S3ObjectStore objectStore = createStore(prefix); + S3StoreConfig config = new S3StoreConfig(); + S3SailStore sailStore = new S3SailStore(config, objectStore); + + var source = sailStore.getExplicitSailSource(); + var sink = source.sink(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + sink.approve(s, p, o, null); + sink.flush(); + sailStore.close(); + } + + // Restart and verify + { + S3ObjectStore objectStore = createStore(prefix); + S3StoreConfig config = new S3StoreConfig(); + S3SailStore sailStore = new S3SailStore(config, objectStore); + + var source = sailStore.getExplicitSailSource(); + var dataset = source.dataset(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + + CloseableIteration iter = dataset.getStatements(null, null, null); + assertTrue(iter.hasNext()); + Statement stmt = iter.next(); + assertEquals(s.stringValue(), stmt.getSubject().stringValue()); + assertEquals(p.stringValue(), stmt.getPredicate().stringValue()); + assertEquals(o.stringValue(), stmt.getObject().stringValue()); + assertFalse(iter.hasNext()); + + iter.close(); + dataset.close(); + sailStore.close(); + } + } +} diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceTest.java new file mode 100644 index 00000000000..c89ae8327d1 --- /dev/null +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceTest.java @@ -0,0 +1,213 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3; + +import static org.junit.jupiter.api.Assertions.*; + +import java.nio.file.Path; + +import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.sail.s3.config.S3StoreConfig; +import org.eclipse.rdf4j.sail.s3.storage.FileSystemObjectStore; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +class S3PersistenceTest { + + @TempDir + Path tempDir; + + private static final ValueFactory VF = SimpleValueFactory.getInstance(); + + @Test + void writeFlushShutdownRestart_quadsReadable() throws Exception { + FileSystemObjectStore store = new FileSystemObjectStore(tempDir); + S3StoreConfig config = new S3StoreConfig(); + + IRI s = VF.createIRI("http://example.org/s1"); + IRI p = VF.createIRI("http://example.org/p1"); + IRI o = VF.createIRI("http://example.org/o1"); + IRI ctx = VF.createIRI("http://example.org/g1"); + + // Phase 1: Write and flush + { + S3SailStore sailStore = new S3SailStore(config, store); + ValueFactory svf = sailStore.getValueFactory(); + + // Add statements using sink + var source = sailStore.getExplicitSailSource(); + var sink = source.sink(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + + // We need to resolve values through the sail's value factory + S3ValueStore vs = (S3ValueStore) svf; + long sId = vs.storeValue(s); + long pId = vs.storeValue(p); + long oId = vs.storeValue(o); + long ctxId = vs.storeValue(ctx); + + sink.approve(s, p, o, ctx); + sink.flush(); + sailStore.close(); + } + + // Phase 2: Restart and verify + { + S3SailStore sailStore = new S3SailStore(config, store); + ValueFactory svf = sailStore.getValueFactory(); + + var source = sailStore.getExplicitSailSource(); + var dataset = source.dataset(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + + CloseableIteration iter = dataset.getStatements(null, null, null); + assertTrue(iter.hasNext(), "Should have at least one statement after restart"); + + Statement stmt = iter.next(); + assertEquals(s.stringValue(), stmt.getSubject().stringValue()); + assertEquals(p.stringValue(), stmt.getPredicate().stringValue()); + assertEquals(o.stringValue(), stmt.getObject().stringValue()); + assertEquals(ctx.stringValue(), stmt.getContext().stringValue()); + + assertFalse(iter.hasNext(), "Should have exactly one statement"); + iter.close(); + dataset.close(); + sailStore.close(); + } + } + + @Test + void multipleFlushes_allDataReadable() throws Exception { + FileSystemObjectStore store = new FileSystemObjectStore(tempDir); + S3StoreConfig config = new S3StoreConfig(); + + IRI s1 = VF.createIRI("http://example.org/s1"); + IRI s2 = VF.createIRI("http://example.org/s2"); + IRI p = VF.createIRI("http://example.org/p"); + IRI o = VF.createIRI("http://example.org/o"); + + // Write, flush, write more, flush again + { + S3SailStore sailStore = new S3SailStore(config, store); + + var source = sailStore.getExplicitSailSource(); + var sink = source.sink(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + + sink.approve(s1, p, o, null); + sink.flush(); + + sink.approve(s2, p, o, null); + sink.flush(); + + sailStore.close(); + } + + // Restart and verify both statements exist + { + S3SailStore sailStore = new S3SailStore(config, store); + + var source = sailStore.getExplicitSailSource(); + var dataset = source.dataset(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + + CloseableIteration iter = dataset.getStatements(null, null, null); + int count = 0; + while (iter.hasNext()) { + iter.next(); + count++; + } + assertEquals(2, count, "Should have 2 statements after restart"); + + iter.close(); + dataset.close(); + sailStore.close(); + } + } + + @Test + void deleteAndRestart_deletedQuadsGone() throws Exception { + FileSystemObjectStore store = new FileSystemObjectStore(tempDir); + S3StoreConfig config = new S3StoreConfig(); + + IRI s1 = VF.createIRI("http://example.org/s1"); + IRI s2 = VF.createIRI("http://example.org/s2"); + IRI p = VF.createIRI("http://example.org/p"); + IRI o = VF.createIRI("http://example.org/o"); + + // Write two statements, delete one, flush + { + S3SailStore sailStore = new S3SailStore(config, store); + + var source = sailStore.getExplicitSailSource(); + var sink = source.sink(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + + sink.approve(s1, p, o, null); + sink.approve(s2, p, o, null); + sink.flush(); + + // Now delete s1 + Statement toDeprecate = VF.createStatement(s1, p, o); + sink.deprecate(toDeprecate); + sink.flush(); + + sailStore.close(); + } + + // Restart and verify only s2 remains + { + S3SailStore sailStore = new S3SailStore(config, store); + + var source = sailStore.getExplicitSailSource(); + var dataset = source.dataset(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + + CloseableIteration iter = dataset.getStatements(null, null, null); + assertTrue(iter.hasNext()); + Statement stmt = iter.next(); + assertEquals(s2.stringValue(), stmt.getSubject().stringValue()); + assertFalse(iter.hasNext()); + + iter.close(); + dataset.close(); + sailStore.close(); + } + } + + @Test + void namespacePersistence() throws Exception { + FileSystemObjectStore store = new FileSystemObjectStore(tempDir); + S3StoreConfig config = new S3StoreConfig(); + + // Set a namespace + { + S3SailStore sailStore = new S3SailStore(config, store); + + var source = sailStore.getExplicitSailSource(); + var sink = source.sink(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + sink.setNamespace("ex", "http://example.org/"); + sink.flush(); + sailStore.close(); + } + + // Restart and verify namespace persists + { + S3SailStore sailStore = new S3SailStore(config, store); + + var source = sailStore.getExplicitSailSource(); + var dataset = source.dataset(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + + assertEquals("http://example.org/", dataset.getNamespace("ex")); + + dataset.close(); + sailStore.close(); + } + } +} diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3SparqlOrderByTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3SparqlOrderByTest.java new file mode 100644 index 00000000000..bcea8fb485a --- /dev/null +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3SparqlOrderByTest.java @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3; + +import org.eclipse.rdf4j.repository.Repository; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.sail.s3.config.S3StoreConfig; +import org.eclipse.rdf4j.testsuite.repository.SparqlOrderByTest; + +public class S3SparqlOrderByTest extends SparqlOrderByTest { + + @Override + protected Repository newRepository() { + return new SailRepository(new S3Store(new S3StoreConfig("spoc"))); + } +} diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreConnectionTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreConnectionTest.java new file mode 100644 index 00000000000..256131ec4f4 --- /dev/null +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreConnectionTest.java @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3; + +import java.io.File; + +import org.eclipse.rdf4j.repository.Repository; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.sail.s3.config.S3StoreConfig; +import org.eclipse.rdf4j.testsuite.repository.RepositoryConnectionTest; + +public class S3StoreConnectionTest extends RepositoryConnectionTest { + + @Override + protected Repository createRepository(File dataDir) { + return new SailRepository(new S3Store(new S3StoreConfig("spoc"))); + } +} diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreIsolationLevelTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreIsolationLevelTest.java new file mode 100644 index 00000000000..20715c3b7cf --- /dev/null +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreIsolationLevelTest.java @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3; + +import org.eclipse.rdf4j.sail.NotifyingSail; +import org.eclipse.rdf4j.sail.SailException; +import org.eclipse.rdf4j.sail.s3.config.S3StoreConfig; +import org.eclipse.rdf4j.testsuite.sail.SailIsolationLevelTest; + +public class S3StoreIsolationLevelTest extends SailIsolationLevelTest { + + @Override + protected NotifyingSail createSail() throws SailException { + return new S3Store(new S3StoreConfig("spoc,posc")); + } +} diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreRepositoryTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreRepositoryTest.java new file mode 100644 index 00000000000..716520425e6 --- /dev/null +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreRepositoryTest.java @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3; + +import org.eclipse.rdf4j.repository.Repository; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.sail.s3.config.S3StoreConfig; +import org.eclipse.rdf4j.testsuite.repository.RepositoryTest; + +public class S3StoreRepositoryTest extends RepositoryTest { + + @Override + protected Repository createRepository() { + return new SailRepository(new S3Store(new S3StoreConfig("spoc"))); + } +} diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreTest.java new file mode 100644 index 00000000000..921fd67bb19 --- /dev/null +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreTest.java @@ -0,0 +1,26 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3; + +import org.eclipse.rdf4j.sail.NotifyingSail; +import org.eclipse.rdf4j.sail.SailException; +import org.eclipse.rdf4j.sail.s3.config.S3StoreConfig; +import org.eclipse.rdf4j.testsuite.sail.RDFNotifyingStoreTest; + +public class S3StoreTest extends RDFNotifyingStoreTest { + + @Override + protected NotifyingSail createSail() throws SailException { + NotifyingSail sail = new S3Store(new S3StoreConfig("spoc,posc")); + sail.init(); + return sail; + } +} diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3ValueStoreSerializationTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3ValueStoreSerializationTest.java new file mode 100644 index 00000000000..5e69022d14c --- /dev/null +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3ValueStoreSerializationTest.java @@ -0,0 +1,134 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3; + +import static org.junit.jupiter.api.Assertions.*; + +import java.nio.file.Path; + +import org.eclipse.rdf4j.model.BNode; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.sail.s3.storage.FileSystemObjectStore; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +class S3ValueStoreSerializationTest { + + @TempDir + Path tempDir; + + @Test + void roundTrip_iri() { + FileSystemObjectStore store = new FileSystemObjectStore(tempDir); + S3ValueStore vs = new S3ValueStore(); + + IRI iri = vs.createIRI("http://example.org/test"); + long id = vs.storeValue(iri); + + vs.serialize(store); + + S3ValueStore vs2 = new S3ValueStore(); + vs2.deserialize(store, vs.getNextId()); + + Value restored = vs2.getValue(id); + assertNotNull(restored); + assertTrue(restored instanceof IRI); + assertEquals("http://example.org/test", restored.stringValue()); + assertEquals(id, vs2.getId(iri)); + } + + @Test + void roundTrip_literal() { + FileSystemObjectStore store = new FileSystemObjectStore(tempDir); + S3ValueStore vs = new S3ValueStore(); + + Literal lit = vs.createLiteral("hello world"); + long id = vs.storeValue(lit); + + vs.serialize(store); + + S3ValueStore vs2 = new S3ValueStore(); + vs2.deserialize(store, vs.getNextId()); + + Value restored = vs2.getValue(id); + assertNotNull(restored); + assertTrue(restored instanceof Literal); + assertEquals("hello world", ((Literal) restored).getLabel()); + } + + @Test + void roundTrip_literalWithLanguage() { + FileSystemObjectStore store = new FileSystemObjectStore(tempDir); + S3ValueStore vs = new S3ValueStore(); + + Literal lit = vs.createLiteral("bonjour", "fr"); + long id = vs.storeValue(lit); + + vs.serialize(store); + + S3ValueStore vs2 = new S3ValueStore(); + vs2.deserialize(store, vs.getNextId()); + + Value restored = vs2.getValue(id); + assertNotNull(restored); + assertTrue(restored instanceof Literal); + assertEquals("bonjour", ((Literal) restored).getLabel()); + assertTrue(((Literal) restored).getLanguage().isPresent()); + assertEquals("fr", ((Literal) restored).getLanguage().get()); + } + + @Test + void roundTrip_bnode() { + FileSystemObjectStore store = new FileSystemObjectStore(tempDir); + S3ValueStore vs = new S3ValueStore(); + + BNode bnode = vs.createBNode("node123"); + long id = vs.storeValue(bnode); + + vs.serialize(store); + + S3ValueStore vs2 = new S3ValueStore(); + vs2.deserialize(store, vs.getNextId()); + + Value restored = vs2.getValue(id); + assertNotNull(restored); + assertTrue(restored instanceof BNode); + assertEquals("node123", ((BNode) restored).getID()); + } + + @Test + void roundTrip_multipleValues() { + FileSystemObjectStore store = new FileSystemObjectStore(tempDir); + S3ValueStore vs = new S3ValueStore(); + + IRI iri1 = vs.createIRI("http://example.org/s1"); + IRI iri2 = vs.createIRI("http://example.org/p1"); + Literal lit = vs.createLiteral("value"); + BNode bnode = vs.createBNode("b1"); + + long id1 = vs.storeValue(iri1); + long id2 = vs.storeValue(iri2); + long id3 = vs.storeValue(lit); + long id4 = vs.storeValue(bnode); + + vs.serialize(store); + + S3ValueStore vs2 = new S3ValueStore(); + vs2.deserialize(store, vs.getNextId()); + + assertEquals(iri1, vs2.getValue(id1)); + assertEquals(iri2, vs2.getValue(id2)); + assertEquals(lit.getLabel(), ((Literal) vs2.getValue(id3)).getLabel()); + assertEquals(bnode.getID(), ((BNode) vs2.getValue(id4)).getID()); + } +} diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/FileSystemObjectStore.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/FileSystemObjectStore.java new file mode 100644 index 00000000000..b434757cc2c --- /dev/null +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/FileSystemObjectStore.java @@ -0,0 +1,108 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Stream; + +/** + * Test double for {@link ObjectStore} backed by the local filesystem. + */ +public class FileSystemObjectStore implements ObjectStore { + + private final Path root; + + public FileSystemObjectStore(Path root) { + this.root = root; + } + + private Path resolve(String key) { + return root.resolve(key); + } + + @Override + public void put(String key, byte[] data) { + try { + Path target = resolve(key); + Files.createDirectories(target.getParent()); + Files.write(target, data); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Override + public byte[] get(String key) { + try { + Path target = resolve(key); + if (!Files.exists(target)) { + return null; + } + return Files.readAllBytes(target); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Override + public byte[] getRange(String key, long offset, long length) { + byte[] full = get(key); + if (full == null) { + return null; + } + int start = (int) offset; + int end = (int) Math.min(start + length, full.length); + return Arrays.copyOfRange(full, start, end); + } + + @Override + public void delete(String key) { + try { + Path target = resolve(key); + Files.deleteIfExists(target); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Override + public List list(String subPrefix) { + List result = new ArrayList<>(); + Path prefixPath = resolve(subPrefix); + Path searchDir = Files.isDirectory(prefixPath) ? prefixPath : prefixPath.getParent(); + if (searchDir == null || !Files.exists(searchDir)) { + return result; + } + try (Stream walk = Files.walk(searchDir)) { + walk.filter(Files::isRegularFile) + .forEach(p -> { + String relative = root.relativize(p).toString(); + if (relative.startsWith(subPrefix)) { + result.add(relative); + } + }); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + return result; + } + + @Override + public void close() { + // no-op + } +} diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/ManifestTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/ManifestTest.java new file mode 100644 index 00000000000..8a0e48a334a --- /dev/null +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/ManifestTest.java @@ -0,0 +1,87 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import static org.junit.jupiter.api.Assertions.*; + +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import com.fasterxml.jackson.databind.ObjectMapper; + +class ManifestTest { + + @TempDir + Path tempDir; + + @Test + void roundTrip() throws Exception { + ObjectMapper mapper = new ObjectMapper(); + FileSystemObjectStore store = new FileSystemObjectStore(tempDir); + + Manifest manifest = new Manifest(); + manifest.setNextValueId(42); + List infos = new ArrayList<>(); + infos.add(new Manifest.SSTableInfo("sstables/L0-1-spoc.sst", 0, "spoc", "0102", "0304", 10, 1)); + infos.add(new Manifest.SSTableInfo("sstables/L0-1-posc.sst", 0, "posc", "0506", "0708", 10, 1)); + manifest.setSstables(infos); + + manifest.save(store, mapper, 1); + + Manifest loaded = Manifest.load(store, mapper); + assertEquals(1, loaded.getVersion()); + assertEquals(42, loaded.getNextValueId()); + assertEquals(2, loaded.getSstables().size()); + assertEquals("sstables/L0-1-spoc.sst", loaded.getSstables().get(0).getS3Key()); + assertEquals("spoc", loaded.getSstables().get(0).getIndexName()); + assertEquals(10, loaded.getSstables().get(0).getEntryCount()); + assertEquals(1, loaded.getSstables().get(0).getEpoch()); + } + + @Test + void loadReturnsEmptyManifestWhenNoneExists() { + FileSystemObjectStore store = new FileSystemObjectStore(tempDir); + ObjectMapper mapper = new ObjectMapper(); + + Manifest loaded = Manifest.load(store, mapper); + assertNotNull(loaded); + assertEquals(0, loaded.getSstables().size()); + assertEquals(0, loaded.getNextValueId()); + } + + @Test + void multipleVersions() throws Exception { + ObjectMapper mapper = new ObjectMapper(); + FileSystemObjectStore store = new FileSystemObjectStore(tempDir); + + // Save version 1 + Manifest m1 = new Manifest(); + m1.setNextValueId(10); + m1.save(store, mapper, 1); + + // Save version 2 + Manifest m2 = new Manifest(); + m2.setNextValueId(20); + List infos = new ArrayList<>(); + infos.add(new Manifest.SSTableInfo("sstables/L0-2-spoc.sst", 0, "spoc", "01", "02", 5, 2)); + m2.setSstables(infos); + m2.save(store, mapper, 2); + + // Load should return the latest (version 2) + Manifest loaded = Manifest.load(store, mapper); + assertEquals(20, loaded.getNextValueId()); + assertEquals(1, loaded.getSstables().size()); + } +} diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MergeIteratorTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MergeIteratorTest.java new file mode 100644 index 00000000000..77ff3896ecb --- /dev/null +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MergeIteratorTest.java @@ -0,0 +1,178 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import static org.junit.jupiter.api.Assertions.*; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import org.junit.jupiter.api.Test; + +class MergeIteratorTest { + + private final QuadIndex spoc = new QuadIndex("spoc"); + + @Test + void newerSourceWins() { + // Newer MemTable overrides older SSTable + MemTable newer = new MemTable(spoc); + newer.put(1, 2, 3, 0, true); // explicit in newer + + MemTable older = new MemTable(spoc); + older.put(1, 2, 3, 0, false); // inferred in older + + List sources = Arrays.asList( + newer.asRawSource(-1, -1, -1, -1), + older.asRawSource(-1, -1, -1, -1)); + + MergeIterator iter = new MergeIterator(sources, spoc, MemTable.FLAG_EXPLICIT, -1, -1, -1, -1); + List results = toList(iter); + assertEquals(1, results.size()); + assertArrayEquals(new long[] { 1, 2, 3, 0 }, results.get(0)); + } + + @Test + void tombstoneSuppression() { + MemTable newer = new MemTable(spoc); + newer.remove(1, 2, 3, 0, true); // tombstone + + MemTable older = new MemTable(spoc); + older.put(1, 2, 3, 0, true); // explicit + + List sources = Arrays.asList( + newer.asRawSource(-1, -1, -1, -1), + older.asRawSource(-1, -1, -1, -1)); + + MergeIterator iter = new MergeIterator(sources, spoc, MemTable.FLAG_EXPLICIT, -1, -1, -1, -1); + List results = toList(iter); + assertEquals(0, results.size()); + } + + @Test + void multiSourceMerge() { + MemTable m1 = new MemTable(spoc); + m1.put(1, 2, 3, 0, true); + m1.put(3, 4, 5, 0, true); + + MemTable m2 = new MemTable(spoc); + m2.put(2, 3, 4, 0, true); + m2.put(4, 5, 6, 0, true); + + List sources = Arrays.asList( + m1.asRawSource(-1, -1, -1, -1), + m2.asRawSource(-1, -1, -1, -1)); + + MergeIterator iter = new MergeIterator(sources, spoc, MemTable.FLAG_EXPLICIT, -1, -1, -1, -1); + List results = toList(iter); + assertEquals(4, results.size()); + // Should be sorted by key (SPOC order) + assertEquals(1, results.get(0)[0]); + assertEquals(2, results.get(1)[0]); + assertEquals(3, results.get(2)[0]); + assertEquals(4, results.get(3)[0]); + } + + @Test + void emptySource() { + MemTable empty = new MemTable(spoc); + MemTable withData = new MemTable(spoc); + withData.put(1, 2, 3, 0, true); + + List sources = Arrays.asList( + empty.asRawSource(-1, -1, -1, -1), + withData.asRawSource(-1, -1, -1, -1)); + + MergeIterator iter = new MergeIterator(sources, spoc, MemTable.FLAG_EXPLICIT, -1, -1, -1, -1); + List results = toList(iter); + assertEquals(1, results.size()); + } + + @Test + void allEmptySources() { + MemTable empty1 = new MemTable(spoc); + MemTable empty2 = new MemTable(spoc); + + List sources = Arrays.asList( + empty1.asRawSource(-1, -1, -1, -1), + empty2.asRawSource(-1, -1, -1, -1)); + + MergeIterator iter = new MergeIterator(sources, spoc, MemTable.FLAG_EXPLICIT, -1, -1, -1, -1); + assertFalse(iter.hasNext()); + } + + @Test + void patternFilter() { + MemTable m1 = new MemTable(spoc); + m1.put(1, 2, 3, 0, true); + m1.put(1, 2, 4, 0, true); + m1.put(2, 3, 4, 0, true); + + List sources = List.of(m1.asRawSource(1, -1, -1, -1)); + + MergeIterator iter = new MergeIterator(sources, spoc, MemTable.FLAG_EXPLICIT, 1, -1, -1, -1); + List results = toList(iter); + assertEquals(2, results.size()); + } + + @Test + void mergeMemTableWithSSTable() { + // MemTable (newer) + SSTable (older) + MemTable memTable = new MemTable(spoc); + memTable.put(1, 2, 3, 0, true); + + MemTable olderData = new MemTable(spoc); + olderData.put(2, 3, 4, 0, true); + olderData.put(4, 5, 6, 0, true); + byte[] sstData = SSTableWriter.write(olderData); + SSTable sst = new SSTable(sstData, spoc); + + List sources = Arrays.asList( + memTable.asRawSource(-1, -1, -1, -1), + sst.asRawSource(-1, -1, -1, -1)); + + MergeIterator iter = new MergeIterator(sources, spoc, MemTable.FLAG_EXPLICIT, -1, -1, -1, -1); + List results = toList(iter); + assertEquals(3, results.size()); + } + + @Test + void tombstoneInMemTableShadowsSSTable() { + // SSTable has a value, MemTable deletes it + MemTable olderData = new MemTable(spoc); + olderData.put(1, 2, 3, 0, true); + olderData.put(4, 5, 6, 0, true); + byte[] sstData = SSTableWriter.write(olderData); + SSTable sst = new SSTable(sstData, spoc); + + MemTable memTable = new MemTable(spoc); + memTable.remove(1, 2, 3, 0, true); // tombstone shadows SSTable entry + + List sources = Arrays.asList( + memTable.asRawSource(-1, -1, -1, -1), + sst.asRawSource(-1, -1, -1, -1)); + + MergeIterator iter = new MergeIterator(sources, spoc, MemTable.FLAG_EXPLICIT, -1, -1, -1, -1); + List results = toList(iter); + assertEquals(1, results.size()); + assertArrayEquals(new long[] { 4, 5, 6, 0 }, results.get(0)); + } + + private List toList(Iterator iter) { + List list = new ArrayList<>(); + while (iter.hasNext()) { + list.add(iter.next()); + } + return list; + } +} diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/SSTableWriterReaderTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/SSTableWriterReaderTest.java new file mode 100644 index 00000000000..be49b3f82b9 --- /dev/null +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/SSTableWriterReaderTest.java @@ -0,0 +1,183 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import static org.junit.jupiter.api.Assertions.*; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.junit.jupiter.api.Test; + +class SSTableWriterReaderTest { + + private final QuadIndex spoc = new QuadIndex("spoc"); + + @Test + void roundTrip_singleEntry() { + MemTable mt = new MemTable(spoc); + mt.put(1, 2, 3, 4, true); + + byte[] sstData = SSTableWriter.write(mt); + SSTable sst = new SSTable(sstData, spoc); + + assertEquals(1, sst.getEntryCount()); + + Iterator iter = sst.scan(1, 2, 3, 4, true); + assertTrue(iter.hasNext()); + long[] quad = iter.next(); + assertArrayEquals(new long[] { 1, 2, 3, 4 }, quad); + assertFalse(iter.hasNext()); + } + + @Test + void roundTrip_multipleEntries() { + MemTable mt = new MemTable(spoc); + mt.put(1, 2, 3, 0, true); + mt.put(1, 2, 4, 0, true); + mt.put(2, 3, 4, 0, true); + mt.put(10, 20, 30, 40, true); + + byte[] sstData = SSTableWriter.write(mt); + SSTable sst = new SSTable(sstData, spoc); + + assertEquals(4, sst.getEntryCount()); + + // Wildcard scan + List results = toList(sst.scan(-1, -1, -1, -1, true)); + assertEquals(4, results.size()); + } + + @Test + void roundTrip_patternFilter() { + MemTable mt = new MemTable(spoc); + mt.put(1, 2, 3, 0, true); + mt.put(1, 2, 4, 0, true); + mt.put(2, 3, 4, 0, true); + + byte[] sstData = SSTableWriter.write(mt); + SSTable sst = new SSTable(sstData, spoc); + + // Filter by subject=1 + List results = toList(sst.scan(1, -1, -1, -1, true)); + assertEquals(2, results.size()); + assertEquals(1, results.get(0)[0]); + assertEquals(1, results.get(1)[0]); + + // Filter by subject=2 + results = toList(sst.scan(2, -1, -1, -1, true)); + assertEquals(1, results.size()); + assertEquals(2, results.get(0)[0]); + } + + @Test + void roundTrip_tombstonesFilteredInScan() { + MemTable mt = new MemTable(spoc); + mt.put(1, 2, 3, 0, true); + mt.put(1, 2, 4, 0, true); + mt.remove(1, 2, 3, 0, true); // tombstone + + byte[] sstData = SSTableWriter.write(mt); + SSTable sst = new SSTable(sstData, spoc); + + // Tombstone entry is still in the SSTable (entryCount includes it) + assertEquals(2, sst.getEntryCount()); + + // But scan filters it out + List results = toList(sst.scan(-1, -1, -1, -1, true)); + assertEquals(1, results.size()); + assertArrayEquals(new long[] { 1, 2, 4, 0 }, results.get(0)); + } + + @Test + void roundTrip_tombstonesVisibleInRawSource() { + MemTable mt = new MemTable(spoc); + mt.put(1, 2, 3, 0, true); + mt.remove(1, 2, 3, 0, true); + + byte[] sstData = SSTableWriter.write(mt); + SSTable sst = new SSTable(sstData, spoc); + + RawEntrySource source = sst.asRawSource(-1, -1, -1, -1); + assertTrue(source.hasNext()); + // The tombstone should be visible + assertEquals(MemTable.FLAG_TOMBSTONE, source.peekFlag()); + } + + @Test + void roundTrip_explicitVsInferred() { + MemTable mt = new MemTable(spoc); + mt.put(1, 2, 3, 0, true); // explicit + mt.put(4, 5, 6, 0, false); // inferred + + byte[] sstData = SSTableWriter.write(mt); + SSTable sst = new SSTable(sstData, spoc); + + List explicitResults = toList(sst.scan(-1, -1, -1, -1, true)); + assertEquals(1, explicitResults.size()); + assertArrayEquals(new long[] { 1, 2, 3, 0 }, explicitResults.get(0)); + + List inferredResults = toList(sst.scan(-1, -1, -1, -1, false)); + assertEquals(1, inferredResults.size()); + assertArrayEquals(new long[] { 4, 5, 6, 0 }, inferredResults.get(0)); + } + + @Test + void roundTrip_smallBlockSize() { + // Use a very small block size to test multi-block SSTables + MemTable mt = new MemTable(spoc); + for (long i = 1; i <= 100; i++) { + mt.put(i, i + 1, i + 2, 0, true); + } + + byte[] sstData = SSTableWriter.write(mt, 64); // tiny blocks + SSTable sst = new SSTable(sstData, spoc); + + assertEquals(100, sst.getEntryCount()); + + // Verify all entries are retrievable + List results = toList(sst.scan(-1, -1, -1, -1, true)); + assertEquals(100, results.size()); + + // Verify range scan with block index seeking + results = toList(sst.scan(50, -1, -1, -1, true)); + assertEquals(1, results.size()); + assertEquals(50, results.get(0)[0]); + } + + @Test + void roundTrip_largeIds() { + MemTable mt = new MemTable(spoc); + mt.put(100000, 200000, 300000, 400000, true); + + byte[] sstData = SSTableWriter.write(mt); + SSTable sst = new SSTable(sstData, spoc); + + List results = toList(sst.scan(-1, -1, -1, -1, true)); + assertEquals(1, results.size()); + assertArrayEquals(new long[] { 100000, 200000, 300000, 400000 }, results.get(0)); + } + + @Test + void emptyMemTable_throwsException() { + MemTable mt = new MemTable(spoc); + assertThrows(IllegalArgumentException.class, () -> SSTableWriter.write(mt)); + } + + private List toList(Iterator iter) { + List list = new ArrayList<>(); + while (iter.hasNext()) { + list.add(iter.next()); + } + return list; + } +} From 306c5330c37df72842016fae1a073ee76e479965 Mon Sep 17 00:00:00 2001 From: Chengxu Bian Date: Tue, 24 Feb 2026 00:48:42 -0500 Subject: [PATCH 03/10] feat: replace SSTables with Parquet, add predicate partitioning and tiered cache (Phase 2) Replace custom SSTable binary format with Apache Parquet columnar storage, introduce vertical partitioning by predicate, and add a three-tier cache (Caffeine heap -> local disk LRU -> S3). Storage redesign: - Parquet files on S3 with ZSTD compression and dictionary encoding - Predicate-based partitioning (data/predicates/{id}/) eliminates predicate column from files, tightening column statistics - Three sort orders per partition (SOC, OSC, CSO) for optimal query performance regardless of access pattern - Single MemTable in SPOC order, partitioned on flush - JSON catalog with per-file column statistics for catalog-level pruning Cache system: - L1: Caffeine heap cache (configurable, default 256 MB) - L2: Local disk LRU cache (configurable, default 10 GB) - L3: S3 source of truth - Write-through on flush avoids cold reads Compaction: - L0->L1 merge when epoch count >= 8 per predicate - L1->L2 merge when epoch count >= 4 per predicate - Tombstone suppression at highest level Hadoop dependency elimination: - Zero Hadoop JARs in dependency tree - PlainParquetConfiguration + custom SimpleCodecFactory bypass all Hadoop runtime paths - 14 minimal stub classes in org.apache.hadoop.* satisfy parquet-hadoop JVM class loading requirements Deleted: SSTable, SSTableWriter, Manifest (replaced by Parquet + Catalog) All 529 tests pass. --- core/sail/s3/pom.xml | 20 + .../org/apache/hadoop/conf/Configuration.java | 15 + .../java/org/apache/hadoop/fs/FileStatus.java | 8 + .../java/org/apache/hadoop/fs/FileSystem.java | 8 + .../main/java/org/apache/hadoop/fs/Path.java | 8 + .../java/org/apache/hadoop/fs/PathFilter.java | 8 + .../org/apache/hadoop/mapred/JobConf.java | 10 + .../apache/hadoop/mapreduce/InputFormat.java | 8 + .../apache/hadoop/mapreduce/InputSplit.java | 8 + .../java/org/apache/hadoop/mapreduce/Job.java | 8 + .../apache/hadoop/mapreduce/JobContext.java | 8 + .../apache/hadoop/mapreduce/RecordReader.java | 8 + .../hadoop/mapreduce/TaskAttemptContext.java | 8 + .../mapreduce/lib/input/FileInputFormat.java | 11 + .../hadoop/mapreduce/lib/input/FileSplit.java | 10 + .../eclipse/rdf4j/sail/s3/S3SailStore.java | 516 +++++++++++------- .../rdf4j/sail/s3/cache/L1HeapCache.java | 54 ++ .../rdf4j/sail/s3/cache/L2DiskCache.java | 215 ++++++++ .../rdf4j/sail/s3/cache/TieredCache.java | 98 ++++ .../sail/s3/storage/ByteArrayInputFile.java | 142 +++++ .../sail/s3/storage/ByteArrayOutputFile.java | 110 ++++ .../rdf4j/sail/s3/storage/Catalog.java | 387 +++++++++++++ .../sail/s3/storage/CompactionPolicy.java | 81 +++ .../rdf4j/sail/s3/storage/Compactor.java | 256 +++++++++ .../rdf4j/sail/s3/storage/Manifest.java | 197 ------- .../rdf4j/sail/s3/storage/MemTable.java | 129 +++++ .../sail/s3/storage/ParquetFileBuilder.java | 249 +++++++++ .../sail/s3/storage/ParquetFilterBuilder.java | 82 +++ .../sail/s3/storage/ParquetQuadSource.java | 185 +++++++ .../rdf4j/sail/s3/storage/ParquetSchemas.java | 126 +++++ .../s3/storage/PartitionIndexSelector.java | 122 +++++ .../s3/storage/PartitionMergeIterator.java | 183 +++++++ .../rdf4j/sail/s3/storage/RawEntrySource.java | 2 +- .../rdf4j/sail/s3/storage/SSTable.java | 332 ----------- .../rdf4j/sail/s3/storage/SSTableWriter.java | 176 ------ .../sail/s3/storage/SimpleCodecFactory.java | 138 +++++ .../rdf4j/sail/s3/storage/ManifestTest.java | 87 --- .../sail/s3/storage/MergeIteratorTest.java | 18 +- .../s3/storage/SSTableWriterReaderTest.java | 183 ------- 39 files changed, 3036 insertions(+), 1178 deletions(-) create mode 100644 core/sail/s3/src/main/java/org/apache/hadoop/conf/Configuration.java create mode 100644 core/sail/s3/src/main/java/org/apache/hadoop/fs/FileStatus.java create mode 100644 core/sail/s3/src/main/java/org/apache/hadoop/fs/FileSystem.java create mode 100644 core/sail/s3/src/main/java/org/apache/hadoop/fs/Path.java create mode 100644 core/sail/s3/src/main/java/org/apache/hadoop/fs/PathFilter.java create mode 100644 core/sail/s3/src/main/java/org/apache/hadoop/mapred/JobConf.java create mode 100644 core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/InputFormat.java create mode 100644 core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/InputSplit.java create mode 100644 core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/Job.java create mode 100644 core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/JobContext.java create mode 100644 core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/RecordReader.java create mode 100644 core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/TaskAttemptContext.java create mode 100644 core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java create mode 100644 core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileSplit.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/L1HeapCache.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/L2DiskCache.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/TieredCache.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ByteArrayInputFile.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ByteArrayOutputFile.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/CompactionPolicy.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java delete mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Manifest.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFileBuilder.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFilterBuilder.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetQuadSource.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetSchemas.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/PartitionIndexSelector.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/PartitionMergeIterator.java delete mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SSTable.java delete mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SSTableWriter.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SimpleCodecFactory.java delete mode 100644 core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/ManifestTest.java delete mode 100644 core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/SSTableWriterReaderTest.java diff --git a/core/sail/s3/pom.xml b/core/sail/s3/pom.xml index 601b0c3eb5e..a7d25931146 100644 --- a/core/sail/s3/pom.xml +++ b/core/sail/s3/pom.xml @@ -52,6 +52,26 @@ com.fasterxml.jackson.core jackson-databind + + org.apache.parquet + parquet-hadoop + 1.15.2 + + + javax.annotation + javax.annotation-api + + + + + + com.github.ben-manes.caffeine + caffeine + 3.1.8 + ${project.groupId} rdf4j-sail-testsuite diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/conf/Configuration.java b/core/sail/s3/src/main/java/org/apache/hadoop/conf/Configuration.java new file mode 100644 index 00000000000..e0904545c5a --- /dev/null +++ b/core/sail/s3/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -0,0 +1,15 @@ +/* + * Minimal stub for org.apache.hadoop.conf.Configuration. + * + * Parquet-hadoop references this class in abstract method signatures + * (WriteSupport.init, ParquetWriter.Builder.getWriteSupport). Our code + * overrides the ParquetConfiguration variants instead, so this class is + * never instantiated or used at runtime. It exists only to satisfy the + * JVM class loader. + */ +package org.apache.hadoop.conf; + +public class Configuration { + public Configuration() { + } +} diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/fs/FileStatus.java b/core/sail/s3/src/main/java/org/apache/hadoop/fs/FileStatus.java new file mode 100644 index 00000000000..acb583d94a2 --- /dev/null +++ b/core/sail/s3/src/main/java/org/apache/hadoop/fs/FileStatus.java @@ -0,0 +1,8 @@ +/* + * Minimal stub — satisfies JVM class loading for parquet-hadoop. + * Never instantiated at runtime. + */ +package org.apache.hadoop.fs; + +public class FileStatus { +} diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/fs/FileSystem.java b/core/sail/s3/src/main/java/org/apache/hadoop/fs/FileSystem.java new file mode 100644 index 00000000000..fd73ea3e2ef --- /dev/null +++ b/core/sail/s3/src/main/java/org/apache/hadoop/fs/FileSystem.java @@ -0,0 +1,8 @@ +/* + * Minimal stub — satisfies JVM class loading for parquet-hadoop. + * Never instantiated at runtime. + */ +package org.apache.hadoop.fs; + +public abstract class FileSystem { +} diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/fs/Path.java b/core/sail/s3/src/main/java/org/apache/hadoop/fs/Path.java new file mode 100644 index 00000000000..f0ceeb9ab79 --- /dev/null +++ b/core/sail/s3/src/main/java/org/apache/hadoop/fs/Path.java @@ -0,0 +1,8 @@ +/* + * Minimal stub — satisfies JVM class loading for parquet-hadoop. + * Never instantiated at runtime. + */ +package org.apache.hadoop.fs; + +public class Path { +} diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/fs/PathFilter.java b/core/sail/s3/src/main/java/org/apache/hadoop/fs/PathFilter.java new file mode 100644 index 00000000000..25be1aaf955 --- /dev/null +++ b/core/sail/s3/src/main/java/org/apache/hadoop/fs/PathFilter.java @@ -0,0 +1,8 @@ +/* + * Minimal stub — satisfies JVM class loading for parquet-hadoop. + * Never instantiated at runtime. + */ +package org.apache.hadoop.fs; + +public interface PathFilter { +} diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/mapred/JobConf.java b/core/sail/s3/src/main/java/org/apache/hadoop/mapred/JobConf.java new file mode 100644 index 00000000000..71836d9a557 --- /dev/null +++ b/core/sail/s3/src/main/java/org/apache/hadoop/mapred/JobConf.java @@ -0,0 +1,10 @@ +/* + * Minimal stub — satisfies JVM class loading for parquet-hadoop. + * Never instantiated at runtime. + */ +package org.apache.hadoop.mapred; + +import org.apache.hadoop.conf.Configuration; + +public class JobConf extends Configuration { +} diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/InputFormat.java b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/InputFormat.java new file mode 100644 index 00000000000..872f9916abf --- /dev/null +++ b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/InputFormat.java @@ -0,0 +1,8 @@ +/* + * Minimal stub — satisfies JVM class loading for parquet-hadoop. + * Never instantiated at runtime. + */ +package org.apache.hadoop.mapreduce; + +public abstract class InputFormat { +} diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/InputSplit.java b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/InputSplit.java new file mode 100644 index 00000000000..3f106aeab5b --- /dev/null +++ b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/InputSplit.java @@ -0,0 +1,8 @@ +/* + * Minimal stub — satisfies JVM class loading for parquet-hadoop. + * Never instantiated at runtime. + */ +package org.apache.hadoop.mapreduce; + +public abstract class InputSplit { +} diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/Job.java b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/Job.java new file mode 100644 index 00000000000..f6ced8fc859 --- /dev/null +++ b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/Job.java @@ -0,0 +1,8 @@ +/* + * Minimal stub — satisfies JVM class loading for parquet-hadoop. + * Never instantiated at runtime. + */ +package org.apache.hadoop.mapreduce; + +public class Job { +} diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/JobContext.java b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/JobContext.java new file mode 100644 index 00000000000..e06f131fc38 --- /dev/null +++ b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/JobContext.java @@ -0,0 +1,8 @@ +/* + * Minimal stub — satisfies JVM class loading for parquet-hadoop. + * Never instantiated at runtime. + */ +package org.apache.hadoop.mapreduce; + +public interface JobContext { +} diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/RecordReader.java b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/RecordReader.java new file mode 100644 index 00000000000..e9d4741a711 --- /dev/null +++ b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/RecordReader.java @@ -0,0 +1,8 @@ +/* + * Minimal stub — satisfies JVM class loading for parquet-hadoop. + * Never instantiated at runtime. + */ +package org.apache.hadoop.mapreduce; + +public abstract class RecordReader { +} diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/TaskAttemptContext.java b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/TaskAttemptContext.java new file mode 100644 index 00000000000..abb3c7cc7e9 --- /dev/null +++ b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/TaskAttemptContext.java @@ -0,0 +1,8 @@ +/* + * Minimal stub — satisfies JVM class loading for parquet-hadoop. + * Never instantiated at runtime. + */ +package org.apache.hadoop.mapreduce; + +public interface TaskAttemptContext extends JobContext { +} diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java new file mode 100644 index 00000000000..5386226e74b --- /dev/null +++ b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java @@ -0,0 +1,11 @@ +/* + * Minimal stub — satisfies JVM class loading for parquet-hadoop. + * ParquetInputFormat extends this class; loaded when ParquetReadOptions.Builder + * calls ParquetInputFormat.getFilter(). Never used at runtime. + */ +package org.apache.hadoop.mapreduce.lib.input; + +import org.apache.hadoop.mapreduce.InputFormat; + +public abstract class FileInputFormat extends InputFormat { +} diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileSplit.java b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileSplit.java new file mode 100644 index 00000000000..6ae226d8383 --- /dev/null +++ b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileSplit.java @@ -0,0 +1,10 @@ +/* + * Minimal stub — satisfies JVM class loading for parquet-hadoop. + * Never instantiated at runtime. + */ +package org.apache.hadoop.mapreduce.lib.input; + +import org.apache.hadoop.mapreduce.InputSplit; + +public class FileSplit extends InputSplit { +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java index 0c92e9359cb..488fc60adc2 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java @@ -11,10 +11,12 @@ package org.eclipse.rdf4j.sail.s3; import java.io.IOException; +import java.nio.file.Path; import java.util.ArrayList; import java.util.Comparator; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; @@ -41,42 +43,66 @@ import org.eclipse.rdf4j.sail.base.SailSink; import org.eclipse.rdf4j.sail.base.SailSource; import org.eclipse.rdf4j.sail.base.SailStore; +import org.eclipse.rdf4j.sail.s3.cache.TieredCache; import org.eclipse.rdf4j.sail.s3.config.S3StoreConfig; -import org.eclipse.rdf4j.sail.s3.storage.Manifest; +import org.eclipse.rdf4j.sail.s3.storage.Catalog; +import org.eclipse.rdf4j.sail.s3.storage.CompactionPolicy; +import org.eclipse.rdf4j.sail.s3.storage.Compactor; import org.eclipse.rdf4j.sail.s3.storage.MemTable; -import org.eclipse.rdf4j.sail.s3.storage.MergeIterator; import org.eclipse.rdf4j.sail.s3.storage.ObjectStore; +import org.eclipse.rdf4j.sail.s3.storage.ParquetFileBuilder; +import org.eclipse.rdf4j.sail.s3.storage.ParquetQuadSource; +import org.eclipse.rdf4j.sail.s3.storage.ParquetSchemas; +import org.eclipse.rdf4j.sail.s3.storage.PartitionIndexSelector; +import org.eclipse.rdf4j.sail.s3.storage.PartitionMergeIterator; import org.eclipse.rdf4j.sail.s3.storage.QuadIndex; import org.eclipse.rdf4j.sail.s3.storage.RawEntrySource; import org.eclipse.rdf4j.sail.s3.storage.S3ObjectStore; -import org.eclipse.rdf4j.sail.s3.storage.SSTable; -import org.eclipse.rdf4j.sail.s3.storage.SSTableWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; /** - * {@link SailStore} implementation that stores RDF quads in {@link MemTable}s with optional persistence to - * S3-compatible object storage via SSTables. When S3 is not configured, operates in pure in-memory mode. + * {@link SailStore} implementation that stores RDF quads using Parquet files on S3-compatible object storage with + * predicate-based vertical partitioning. + * + *

+ * Architecture: single in-memory {@link MemTable} in SPOC order → on flush, partition by predicate and write 3 Parquet + * files per partition (SOC, OSC, CSO sort orders) → multi-tier cache (Caffeine heap + disk) → compaction. + *

+ * + *

+ * When S3 is not configured, operates in pure in-memory mode. + *

*/ class S3SailStore implements SailStore { final Logger logger = LoggerFactory.getLogger(S3SailStore.class); + private static final String[] SORT_ORDERS = { "soc", "osc", "cso" }; + private static final int DEFAULT_ROW_GROUP_SIZE = 8 * 1024 * 1024; // 8 MiB + private static final int DEFAULT_PAGE_SIZE = 64 * 1024; // 64 KiB + private final S3ValueStore valueStore; private final S3NamespaceStore namespaceStore; - private final List indexes; - private List memTables; + + // Single MemTable in SPOC order (new design: 1x memory, partition on flush) + private final QuadIndex spocIndex; + private volatile MemTable memTable; private volatile boolean mayHaveInferred; // Persistence fields (null when S3 is not configured) private final ObjectStore objectStore; private final ObjectMapper jsonMapper; - private Manifest manifest; - private final List> sstablesByIndex; // per-index list, newest first + private Catalog catalog; private final AtomicLong epochCounter; private final long memTableFlushSize; + private final TieredCache cache; + private final CompactionPolicy compactionPolicy; + private final Compactor compactor; + private final int rowGroupSize; + private final int pageSize; /** * A lock to control concurrent access by {@link S3SailSink} to the stores. @@ -99,78 +125,41 @@ class S3SailStore implements SailStore { this.namespaceStore = new S3NamespaceStore(); this.objectStore = objectStore; this.memTableFlushSize = config.getMemTableSize(); + this.rowGroupSize = DEFAULT_ROW_GROUP_SIZE; + this.pageSize = DEFAULT_PAGE_SIZE; - // Parse index specifications from config - String indexSpec = config.getQuadIndexes(); - Set indexSpecs = QuadIndex.parseIndexSpecList(indexSpec); - this.indexes = new ArrayList<>(indexSpecs.size()); - this.memTables = new ArrayList<>(indexSpecs.size()); - for (String spec : indexSpecs) { - QuadIndex qi = new QuadIndex(spec); - indexes.add(qi); - memTables.add(new MemTable(qi)); - } - - if (indexes.isEmpty()) { - QuadIndex defaultIndex = new QuadIndex("spoc"); - indexes.add(defaultIndex); - memTables.add(new MemTable(defaultIndex)); - } + // Single SPOC index for the MemTable + this.spocIndex = new QuadIndex("spoc"); + this.memTable = new MemTable(spocIndex); // Initialize persistence if (objectStore != null) { this.jsonMapper = new ObjectMapper(); - this.manifest = Manifest.load(objectStore, jsonMapper); - this.epochCounter = new AtomicLong(computeMaxEpoch(manifest) + 1); - this.sstablesByIndex = new ArrayList<>(indexes.size()); - for (int i = 0; i < indexes.size(); i++) { - sstablesByIndex.add(new ArrayList<>()); - } + this.catalog = Catalog.load(objectStore, jsonMapper); + this.epochCounter = new AtomicLong(catalog.getEpoch() + 1); + + // Initialize cache + Path diskCachePath = config.getDiskCachePath() != null ? Path.of(config.getDiskCachePath()) : null; + this.cache = new TieredCache(config.getMemoryCacheSize(), diskCachePath, + config.getDiskCacheSize(), objectStore); + + this.compactionPolicy = new CompactionPolicy(); + this.compactor = new Compactor(objectStore, cache, rowGroupSize, pageSize); // Deserialize value store and namespaces - if (manifest.getNextValueId() > 0) { - valueStore.deserialize(objectStore, manifest.getNextValueId()); + if (catalog.getNextValueId() > 0) { + valueStore.deserialize(objectStore, catalog.getNextValueId()); } namespaceStore.deserialize(objectStore, jsonMapper); - - // Load existing SSTables from manifest - for (Manifest.SSTableInfo info : manifest.getSstables()) { - int idxPos = findIndexByName(info.getIndexName()); - if (idxPos >= 0) { - byte[] sstData = objectStore.get(info.getS3Key()); - if (sstData != null) { - SSTable sst = new SSTable(sstData, indexes.get(idxPos)); - sstablesByIndex.get(idxPos).add(sst); - } - } - } } else { this.jsonMapper = null; - this.manifest = null; this.epochCounter = null; - this.sstablesByIndex = null; + this.cache = null; + this.compactionPolicy = null; + this.compactor = null; } } - private static long computeMaxEpoch(Manifest manifest) { - long max = 0; - for (Manifest.SSTableInfo info : manifest.getSstables()) { - if (info.getEpoch() > max) { - max = info.getEpoch(); - } - } - return max; - } - - private int findIndexByName(String indexName) { - for (int i = 0; i < indexes.size(); i++) { - if (indexes.get(i).getFieldSeqString().equals(indexName)) { - return i; - } - } - return -1; - } - @Override public ValueFactory getValueFactory() { return valueStore; @@ -196,108 +185,178 @@ public void close() throws SailException { try { if (objectStore != null) { flushToObjectStore(); + if (cache != null) { + cache.close(); + } objectStore.close(); } } catch (IOException e) { throw new SailException(e); } valueStore.close(); - for (MemTable mt : memTables) { - mt.clear(); - } - } - - /** - * Selects the best MemTable for the given query pattern. - */ - private int getBestIndex(long subj, long pred, long obj, long context) { - int bestScore = -1; - int bestIdx = 0; - for (int i = 0; i < indexes.size(); i++) { - int score = indexes.get(i).getPatternScore(subj, pred, obj, context); - if (score > bestScore) { - bestScore = score; - bestIdx = i; - } - } - return bestIdx; + memTable.clear(); } /** - * Flushes active MemTables to SSTables on the object store. + * Flushes active MemTable to Parquet files on the object store, partitioned by predicate. */ private void flushToObjectStore() { if (objectStore == null) { return; } - // Check if any MemTable has data - boolean hasMemTableData = false; - for (MemTable mt : memTables) { - if (mt.size() > 0) { - hasMemTableData = true; - break; - } + if (memTable.size() == 0) { + // Still persist value store and namespaces + long epoch = epochCounter.getAndIncrement(); + valueStore.serialize(objectStore); + namespaceStore.serialize(objectStore, jsonMapper); + catalog.setNextValueId(valueStore.getNextId()); + catalog.setEpoch(epoch); + catalog.save(objectStore, jsonMapper, epoch); + return; } long epoch = epochCounter.getAndIncrement(); - List newInfos = new ArrayList<>(); + // Freeze active MemTable and swap in fresh one + MemTable frozen = memTable; + frozen.freeze(); + memTable = new MemTable(spocIndex); + + // Partition by predicate + Map> partitions = frozen.partitionByPredicate(); - if (hasMemTableData) { - // Freeze active MemTables and swap in fresh ones - List frozenTables = memTables; - List newTables = new ArrayList<>(indexes.size()); - for (int i = 0; i < indexes.size(); i++) { - frozenTables.get(i).freeze(); - newTables.add(new MemTable(indexes.get(i))); + // For each predicate partition, write 3 Parquet files (all sort orders) + for (Map.Entry> partEntry : partitions.entrySet()) { + long predId = partEntry.getKey(); + List entries = partEntry.getValue(); + + // Set predicate label for debugging + Value predValue = valueStore.getValue(predId); + if (predValue != null) { + catalog.getPredicateLabels().put(String.valueOf(predId), predValue.stringValue()); } - memTables = newTables; - // Write each frozen MemTable as an SSTable - for (int i = 0; i < indexes.size(); i++) { - MemTable frozen = frozenTables.get(i); - if (frozen.size() == 0) { - continue; + for (String sortOrder : SORT_ORDERS) { + // Sort entries according to sort order + List sorted = sortEntries(entries, sortOrder); + + // Build Parquet file + List pqEntries = new ArrayList<>(sorted.size()); + for (MemTable.QuadEntry e : sorted) { + pqEntries.add(new ParquetFileBuilder.QuadEntry(e.subject, e.object, e.context, e.flag)); } - String indexName = indexes.get(i).getFieldSeqString(); - String s3Key = "sstables/L0-" + epoch + "-" + indexName + ".sst"; - byte[] sstData = SSTableWriter.write(frozen); - objectStore.put(s3Key, sstData); + byte[] parquetData = ParquetFileBuilder.build(pqEntries, ParquetSchemas.PARTITIONED_SCHEMA, + ParquetSchemas.SortOrder.fromSuffix(sortOrder), predId, rowGroupSize, pageSize); - SSTable sst = new SSTable(sstData, indexes.get(i)); - sstablesByIndex.get(i).add(0, sst); // prepend (newest first) + String s3Key = "data/predicates/" + predId + "/L0-" + + String.format("%05d", epoch) + "-" + sortOrder + ".parquet"; - newInfos.add(new Manifest.SSTableInfo( - s3Key, 0, indexName, - bytesToHex(sst.getMinKey()), bytesToHex(sst.getMaxKey()), - sst.getEntryCount(), epoch)); + objectStore.put(s3Key, parquetData); + + // Write-through to cache + if (cache != null) { + cache.writeThrough(s3Key, parquetData); + } + + // Compute stats + long minSubject = Long.MAX_VALUE, maxSubject = Long.MIN_VALUE; + long minObject = Long.MAX_VALUE, maxObject = Long.MIN_VALUE; + long minContext = Long.MAX_VALUE, maxContext = Long.MIN_VALUE; + for (MemTable.QuadEntry e : sorted) { + minSubject = Math.min(minSubject, e.subject); + maxSubject = Math.max(maxSubject, e.subject); + minObject = Math.min(minObject, e.object); + maxObject = Math.max(maxObject, e.object); + minContext = Math.min(minContext, e.context); + maxContext = Math.max(maxContext, e.context); + } + + catalog.addFile(predId, new Catalog.ParquetFileInfo( + s3Key, 0, sortOrder, sorted.size(), epoch, parquetData.length, + minSubject, maxSubject, minObject, maxObject, minContext, maxContext)); } } - // Always persist value store and namespaces + // Persist value store and namespaces valueStore.serialize(objectStore); namespaceStore.serialize(objectStore, jsonMapper); - // Update and save manifest - List allInfos = new ArrayList<>(newInfos); - allInfos.addAll(manifest.getSstables()); - manifest.setSstables(allInfos); - manifest.setNextValueId(valueStore.getNextId()); - manifest.save(objectStore, jsonMapper, epoch); + // Atomic catalog update + catalog.setNextValueId(valueStore.getNextId()); + catalog.setEpoch(epoch); + catalog.save(objectStore, jsonMapper, epoch); + + // Check compaction triggers + runCompactionIfNeeded(); } - private static String bytesToHex(byte[] bytes) { - StringBuilder sb = new StringBuilder(bytes.length * 2); - for (byte b : bytes) { - sb.append(String.format("%02x", b & 0xFF)); + /** + * Sorts entries according to the given sort order. + */ + private static List sortEntries(List entries, String sortOrder) { + List sorted = new ArrayList<>(entries); + Comparator cmp; + switch (sortOrder) { + case "osc": + cmp = Comparator.comparingLong((MemTable.QuadEntry e) -> e.object) + .thenComparingLong(e -> e.subject) + .thenComparingLong(e -> e.context); + break; + case "cso": + cmp = Comparator.comparingLong((MemTable.QuadEntry e) -> e.context) + .thenComparingLong(e -> e.subject) + .thenComparingLong(e -> e.object); + break; + case "soc": + default: + cmp = Comparator.comparingLong((MemTable.QuadEntry e) -> e.subject) + .thenComparingLong(e -> e.object) + .thenComparingLong(e -> e.context); + break; + } + sorted.sort(cmp); + return sorted; + } + + /** + * Checks compaction triggers and runs compaction if needed. + */ + private void runCompactionIfNeeded() { + if (compactionPolicy == null || compactor == null) { + return; + } + + for (long predId : catalog.getPredicateIds()) { + List files = catalog.getFilesForPredicate(predId); + + // L0→L1 compaction + if (compactionPolicy.shouldCompactL0(files)) { + List l0Files = CompactionPolicy.filesAtLevel(files, 0); + long compactEpoch = epochCounter.getAndIncrement(); + compactor.compact(predId, l0Files, 0, 1, compactEpoch, catalog); + + // Re-fetch files after compaction and check L1→L2 + files = catalog.getFilesForPredicate(predId); + } + + // L1→L2 compaction + if (compactionPolicy.shouldCompactL1(files)) { + List l1Files = CompactionPolicy.filesAtLevel(files, 1); + long compactEpoch = epochCounter.getAndIncrement(); + compactor.compact(predId, l1Files, 1, 2, compactEpoch, catalog); + } } - return sb.toString(); + + // Save catalog after compaction + long epoch = epochCounter.getAndIncrement(); + catalog.setEpoch(epoch); + catalog.save(objectStore, jsonMapper, epoch); } /** - * Creates a statement iterator for the given pattern. + * Creates a statement iterator for the given pattern using predicate partitioning. */ CloseableIteration createStatementIterator( Resource subj, IRI pred, Value obj, boolean explicit, Resource... contexts) { @@ -350,27 +409,16 @@ CloseableIteration createStatementIterator( return new EmptyIteration<>(); } - int bestIdx = getBestIndex(subjID, predID, objID, - contextIDList.size() == 1 ? contextIDList.get(0) : S3ValueStore.UNKNOWN_ID); - - boolean hasSSTables = sstablesByIndex != null && !sstablesByIndex.get(bestIdx).isEmpty(); + boolean hasPersistence = objectStore != null && catalog != null; ArrayList> perContextIterList = new ArrayList<>(contextIDList.size()); for (long contextID : contextIDList) { Iterator quads; - if (hasSSTables) { - // Build merged source: MemTable (newest) + SSTables (newest first) - List sources = new ArrayList<>(); - sources.add(memTables.get(bestIdx).asRawSource(subjID, predID, objID, contextID)); - for (SSTable sst : sstablesByIndex.get(bestIdx)) { - sources.add(sst.asRawSource(subjID, predID, objID, contextID)); - } - byte expectedFlag = explicit ? MemTable.FLAG_EXPLICIT : MemTable.FLAG_INFERRED; - quads = new MergeIterator(sources, indexes.get(bestIdx), expectedFlag, - subjID, predID, objID, contextID); + if (hasPersistence) { + quads = createMergedIterator(subjID, predID, objID, contextID, explicit); } else { - quads = memTables.get(bestIdx).scan(subjID, predID, objID, contextID, explicit); + quads = memTable.scan(subjID, predID, objID, contextID, explicit); } perContextIterList.add(new QuadToStatementIteration(quads, valueStore)); } @@ -382,6 +430,88 @@ CloseableIteration createStatementIterator( } } + /** + * Creates a merged iterator across MemTable and Parquet files for a given pattern. + */ + private Iterator createMergedIterator(long subjID, long predID, long objID, long contextID, + boolean explicit) { + + boolean subjectBound = subjID >= 0; + boolean objectBound = objID >= 0; + boolean contextBound = contextID >= 0; + + // Select best sort order for within-partition queries + String bestSortOrder = PartitionIndexSelector.selectSortOrder(subjectBound, objectBound, contextBound); + + if (predID >= 0) { + // Predicate bound → single partition + return createPartitionIterator(predID, subjID, objID, contextID, bestSortOrder, explicit); + } else { + // Predicate unbound → fan out to all partitions + Set predIds = catalog.getPredicateIds(); + if (predIds.isEmpty()) { + // Only MemTable data + return memTable.scan(subjID, predID, objID, contextID, explicit); + } + + List> partitionIters = new ArrayList<>(); + for (long pid : predIds) { + partitionIters.add(createPartitionIterator(pid, subjID, objID, contextID, bestSortOrder, explicit)); + } + + // Union all partitions (each partition's iterator handles dedup internally) + return new UnionIterator(partitionIters); + } + } + + /** + * Creates a merged iterator for a single predicate partition. All sources produce 3-varint keys in the partition + * sort order (predicate is implicit in the partition). + */ + private Iterator createPartitionIterator(long predId, long subjID, long objID, long contextID, + String sortOrder, boolean explicit) { + + byte expectedFlag = explicit ? MemTable.FLAG_EXPLICIT : MemTable.FLAG_INFERRED; + + // Build sources: MemTable (newest) + Parquet files (newest epoch first) + // All sources produce 3-varint keys in the same partition sort order + List sources = new ArrayList<>(); + + // MemTable source (always newest) — re-encoded as 3-varint partition keys + sources.add(memTable.asPartitionRawSource(predId, subjID, objID, contextID, sortOrder)); + + // Parquet files for this predicate partition and sort order + List files = catalog.getFilesForPredicate(predId); + List sortOrderFiles = files.stream() + .filter(f -> sortOrder.equals(f.getSortOrder())) + .sorted(Comparator.comparingLong(Catalog.ParquetFileInfo::getEpoch).reversed()) + .toList(); + + for (Catalog.ParquetFileInfo fileInfo : sortOrderFiles) { + // Catalog-level pruning using per-file stats + if (subjID >= 0 && (subjID < fileInfo.getMinSubject() || subjID > fileInfo.getMaxSubject())) { + continue; + } + if (objID >= 0 && (objID < fileInfo.getMinObject() || objID > fileInfo.getMaxObject())) { + continue; + } + if (contextID >= 0 && (contextID < fileInfo.getMinContext() || contextID > fileInfo.getMaxContext())) { + continue; + } + + byte[] fileData = cache != null ? cache.get(fileInfo.getS3Key()) : objectStore.get(fileInfo.getS3Key()); + if (fileData == null) { + logger.warn("Missing Parquet file: {}", fileInfo.getS3Key()); + continue; + } + + sources.add(new ParquetQuadSource(fileData, sortOrder, subjID, objID, contextID)); + } + + // Use PartitionMergeIterator: all sources produce 3-varint keys, predicate injected on decode + return new PartitionMergeIterator(sources, predId, sortOrder, expectedFlag, subjID, objID, contextID); + } + // ========================================================================= // Inner classes // ========================================================================= @@ -502,20 +632,12 @@ public void approveAll(Set approved, Set approvedContexts) mayHaveInferred = true; } - for (MemTable mt : memTables) { - mt.put(s, p, o, c, explicit); - } + memTable.put(s, p, o, c, explicit); } // Size-triggered flush - if (objectStore != null) { - long totalSize = 0; - for (MemTable mt : memTables) { - totalSize += mt.approximateSizeInBytes(); - } - if (totalSize >= memTableFlushSize) { - flushToObjectStore(); - } + if (objectStore != null && memTable.approximateSizeInBytes() >= memTableFlushSize) { + flushToObjectStore(); } } finally { sinkStoreAccessLock.unlock(); @@ -550,9 +672,7 @@ private void addStatement(Resource subj, IRI pred, Value obj, boolean explicit, mayHaveInferred = true; } - for (MemTable mt : memTables) { - mt.put(s, p, o, c, explicit); - } + memTable.put(s, p, o, c, explicit); } finally { sinkStoreAccessLock.unlock(); } @@ -610,26 +730,15 @@ private long removeStatements(Resource subj, IRI pred, Value obj, boolean explic } } - int bestIdx = getBestIndex(subjID, predID, objID, - contextIds.length == 1 ? contextIds[0] : S3ValueStore.UNKNOWN_ID); - MemTable scanTable = memTables.get(bestIdx); - long removeCount = 0; for (long contextId : contextIds) { - // When SSTables exist, use merged iterator for remove scan + boolean hasPersistence = objectStore != null && catalog != null; + Iterator iter; - boolean hasSSTables = sstablesByIndex != null && !sstablesByIndex.get(bestIdx).isEmpty(); - if (hasSSTables) { - List sources = new ArrayList<>(); - sources.add(scanTable.asRawSource(subjID, predID, objID, contextId)); - for (SSTable sst : sstablesByIndex.get(bestIdx)) { - sources.add(sst.asRawSource(subjID, predID, objID, contextId)); - } - byte expectedFlag = explicit ? MemTable.FLAG_EXPLICIT : MemTable.FLAG_INFERRED; - iter = new MergeIterator(sources, indexes.get(bestIdx), expectedFlag, - subjID, predID, objID, contextId); + if (hasPersistence) { + iter = createMergedIterator(subjID, predID, objID, contextId, explicit); } else { - iter = scanTable.scan(subjID, predID, objID, contextId, explicit); + iter = memTable.scan(subjID, predID, objID, contextId, explicit); } List toRemove = new ArrayList<>(); @@ -637,9 +746,7 @@ private long removeStatements(Resource subj, IRI pred, Value obj, boolean explic toRemove.add(iter.next()); } for (long[] quad : toRemove) { - for (MemTable mt : memTables) { - mt.remove(quad[0], quad[1], quad[2], quad[3], explicit); - } + memTable.remove(quad[0], quad[1], quad[2], quad[3], explicit); removeCount++; } } @@ -677,21 +784,13 @@ public CloseableIteration getNamespaces() { @Override public CloseableIteration getContextIDs() throws SailException { // Scan all quads and collect distinct non-null contexts - // Use the merged read path (createStatementIterator covers this) - int bestIdx = 0; // use first index for full scan - boolean hasSSTables = sstablesByIndex != null && !sstablesByIndex.get(bestIdx).isEmpty(); + boolean hasPersistence = objectStore != null && catalog != null; Iterator allQuads; - if (hasSSTables) { - List sources = new ArrayList<>(); - sources.add(memTables.get(bestIdx).asRawSource(-1, -1, -1, -1)); - for (SSTable sst : sstablesByIndex.get(bestIdx)) { - sources.add(sst.asRawSource(-1, -1, -1, -1)); - } - byte expectedFlag = explicit ? MemTable.FLAG_EXPLICIT : MemTable.FLAG_INFERRED; - allQuads = new MergeIterator(sources, indexes.get(bestIdx), expectedFlag, -1, -1, -1, -1); + if (hasPersistence) { + allQuads = createMergedIterator(-1, -1, -1, -1, explicit); } else { - allQuads = memTables.get(bestIdx).scan(-1, -1, -1, -1, explicit); + allQuads = memTable.scan(-1, -1, -1, -1, explicit); } return new FilterIteration( @@ -776,4 +875,39 @@ public void close() { // no-op } } + + /** + * Simple union iterator that concatenates multiple iterators. Used for fan-out across predicate partitions. + */ + private static class UnionIterator implements Iterator { + private final List> iterators; + private int currentIdx; + + UnionIterator(List> iterators) { + this.iterators = iterators; + this.currentIdx = 0; + advanceToNonEmpty(); + } + + private void advanceToNonEmpty() { + while (currentIdx < iterators.size() && !iterators.get(currentIdx).hasNext()) { + currentIdx++; + } + } + + @Override + public boolean hasNext() { + return currentIdx < iterators.size(); + } + + @Override + public long[] next() { + long[] result = iterators.get(currentIdx).next(); + if (!iterators.get(currentIdx).hasNext()) { + currentIdx++; + advanceToNonEmpty(); + } + return result; + } + } } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/L1HeapCache.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/L1HeapCache.java new file mode 100644 index 00000000000..fb16361202d --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/L1HeapCache.java @@ -0,0 +1,54 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.cache; + +import java.io.Closeable; + +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; + +/** + * L1 in-memory cache backed by Caffeine. Caches full file bytes keyed by S3 object key, weighted by byte array length. + */ +public class L1HeapCache implements Closeable { + + private final Cache fileCache; + + public L1HeapCache(long maxWeightBytes) { + this.fileCache = Caffeine.newBuilder() + .maximumWeight(maxWeightBytes) + .weigher((String key, byte[] value) -> value.length) + .recordStats() + .build(); + } + + public byte[] get(String s3Key) { + return fileCache.getIfPresent(s3Key); + } + + public void put(String s3Key, byte[] data) { + fileCache.put(s3Key, data); + } + + public void invalidate(String s3Key) { + fileCache.invalidate(s3Key); + } + + public void invalidateAll() { + fileCache.invalidateAll(); + } + + @Override + public void close() { + fileCache.invalidateAll(); + fileCache.cleanUp(); + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/L2DiskCache.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/L2DiskCache.java new file mode 100644 index 00000000000..52d138de8a0 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/L2DiskCache.java @@ -0,0 +1,215 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.cache; + +import java.io.Closeable; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.nio.file.StandardOpenOption; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Stream; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.ObjectMapper; + +/** + * L2 disk-based LRU cache that mirrors S3 path structure on local filesystem. Entries are evicted in LRU order when the + * total cache size exceeds the configured maximum. A JSON index file is persisted on close so that cache state survives + * restarts. + */ +public class L2DiskCache implements Closeable { + + private static final Logger logger = LoggerFactory.getLogger(L2DiskCache.class); + private static final String INDEX_FILE = "_cache_index.json"; + + private final Path cacheDir; + private final long maxSizeBytes; + private final AtomicLong currentSizeBytes = new AtomicLong(0); + private final ConcurrentHashMap index = new ConcurrentHashMap<>(); + private final ObjectMapper mapper = new ObjectMapper(); + + public L2DiskCache(Path cacheDir, long maxSizeBytes) { + this.cacheDir = cacheDir; + this.maxSizeBytes = maxSizeBytes; + try { + Files.createDirectories(cacheDir); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + loadIndex(); + } + + public byte[] get(String s3Key) { + CacheEntry entry = index.get(s3Key); + if (entry == null) { + return null; + } + Path filePath = cacheDir.resolve(s3Key); + if (!Files.exists(filePath)) { + index.remove(s3Key); + currentSizeBytes.addAndGet(-entry.sizeBytes); + return null; + } + entry.lastAccessNanos = System.nanoTime(); + try { + return Files.readAllBytes(filePath); + } catch (IOException e) { + logger.warn("Failed to read cache file: {}", filePath, e); + return null; + } + } + + public void put(String s3Key, byte[] data) { + evictIfNeeded(data.length); + Path filePath = cacheDir.resolve(s3Key); + try { + Files.createDirectories(filePath.getParent()); + // Atomic write via temp file + rename + Path tmpFile = filePath.resolveSibling(filePath.getFileName() + ".tmp"); + Files.write(tmpFile, data, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); + Files.move(tmpFile, filePath, StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + logger.warn("Failed to write cache file: {}", filePath, e); + return; + } + + CacheEntry prev = index.put(s3Key, new CacheEntry(data.length, System.nanoTime())); + if (prev != null) { + currentSizeBytes.addAndGet(data.length - prev.sizeBytes); + } else { + currentSizeBytes.addAndGet(data.length); + } + } + + public void remove(String s3Key) { + CacheEntry entry = index.remove(s3Key); + if (entry != null) { + currentSizeBytes.addAndGet(-entry.sizeBytes); + try { + Files.deleteIfExists(cacheDir.resolve(s3Key)); + } catch (IOException e) { + logger.warn("Failed to delete cache file: {}", s3Key, e); + } + } + } + + private void evictIfNeeded(long incomingSize) { + while (currentSizeBytes.get() + incomingSize > maxSizeBytes && !index.isEmpty()) { + // Find LRU entry + String lruKey = null; + long oldestAccess = Long.MAX_VALUE; + for (var e : index.entrySet()) { + if (e.getValue().lastAccessNanos < oldestAccess) { + oldestAccess = e.getValue().lastAccessNanos; + lruKey = e.getKey(); + } + } + if (lruKey != null) { + remove(lruKey); + } else { + break; + } + } + } + + private void loadIndex() { + Path indexPath = cacheDir.resolve(INDEX_FILE); + if (Files.exists(indexPath)) { + try { + CacheIndex saved = mapper.readValue(indexPath.toFile(), CacheIndex.class); + if (saved.entries != null) { + long totalSize = 0; + for (var e : saved.entries.entrySet()) { + if (Files.exists(cacheDir.resolve(e.getKey()))) { + index.put(e.getKey(), e.getValue()); + totalSize += e.getValue().sizeBytes; + } + } + currentSizeBytes.set(totalSize); + } + return; + } catch (IOException e) { + logger.warn("Failed to load cache index, rebuilding", e); + } + } + rebuildIndex(); + } + + private void rebuildIndex() { + index.clear(); + long totalSize = 0; + try (Stream walk = Files.walk(cacheDir)) { + var iter = walk.filter(Files::isRegularFile) + .filter(p -> !p.getFileName().toString().equals(INDEX_FILE)) + .filter(p -> !p.getFileName().toString().endsWith(".tmp")) + .iterator(); + while (iter.hasNext()) { + Path p = iter.next(); + try { + long size = Files.size(p); + String key = cacheDir.relativize(p).toString(); + index.put(key, new CacheEntry(size, System.nanoTime())); + totalSize += size; + } catch (IOException e) { + // skip unreadable files + } + } + } catch (IOException e) { + logger.warn("Failed to walk cache directory", e); + } + currentSizeBytes.set(totalSize); + } + + public void persistIndex() { + try { + CacheIndex ci = new CacheIndex(); + ci.entries = new ConcurrentHashMap<>(index); + Path indexPath = cacheDir.resolve(INDEX_FILE); + mapper.writeValue(indexPath.toFile(), ci); + } catch (IOException e) { + logger.warn("Failed to persist cache index", e); + } + } + + @Override + public void close() { + persistIndex(); + } + + static class CacheEntry { + @JsonProperty("sizeBytes") + public long sizeBytes; + + @JsonProperty("lastAccessNanos") + public long lastAccessNanos; + + public CacheEntry() { + // for Jackson deserialization + } + + CacheEntry(long sizeBytes, long lastAccessNanos) { + this.sizeBytes = sizeBytes; + this.lastAccessNanos = lastAccessNanos; + } + } + + static class CacheIndex { + @JsonProperty("entries") + public ConcurrentHashMap entries; + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/TieredCache.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/TieredCache.java new file mode 100644 index 00000000000..e78424b8c1e --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/TieredCache.java @@ -0,0 +1,98 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.cache; + +import java.io.Closeable; +import java.io.IOException; +import java.nio.file.Path; + +import org.eclipse.rdf4j.sail.s3.storage.ObjectStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unified three-tier cache facade: L1 (heap) -> L2 (disk) -> L3 (S3 / ObjectStore). On a cache miss at a given tier, + * data is fetched from the next tier down and promoted into all higher tiers. + */ +public class TieredCache implements Closeable { + + private static final Logger logger = LoggerFactory.getLogger(TieredCache.class); + + private final L1HeapCache l1; + private final L2DiskCache l2; // nullable when no disk cache path is configured + private final ObjectStore objectStore; + + public TieredCache(long heapCacheSize, Path diskCachePath, long diskCacheSize, ObjectStore objectStore) { + this.l1 = new L1HeapCache(heapCacheSize); + this.l2 = diskCachePath != null ? new L2DiskCache(diskCachePath, diskCacheSize) : null; + this.objectStore = objectStore; + } + + /** + * Get file bytes for the given S3 key. Checks L1 (heap) first, then L2 (disk), then L3 (S3), promoting data into + * higher tiers on a miss. + */ + public byte[] get(String s3Key) { + // L1 + byte[] data = l1.get(s3Key); + if (data != null) { + return data; + } + + // L2 + if (l2 != null) { + data = l2.get(s3Key); + if (data != null) { + l1.put(s3Key, data); // promote to L1 + return data; + } + } + + // L3 (S3) + data = objectStore.get(s3Key); + if (data != null) { + l1.put(s3Key, data); // populate L1 + if (l2 != null) { + l2.put(s3Key, data); // populate L2 + } + } + return data; + } + + /** + * Write-through: populate L1 and L2 immediately (e.g., on flush). Does NOT write to S3; the caller handles that + * separately. + */ + public void writeThrough(String s3Key, byte[] data) { + l1.put(s3Key, data); + if (l2 != null) { + l2.put(s3Key, data); + } + } + + /** + * Invalidate a key from all cache tiers (e.g., after compaction deletes a file). + */ + public void invalidate(String s3Key) { + l1.invalidate(s3Key); + if (l2 != null) { + l2.remove(s3Key); + } + } + + @Override + public void close() throws IOException { + l1.close(); + if (l2 != null) { + l2.close(); + } + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ByteArrayInputFile.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ByteArrayInputFile.java new file mode 100644 index 00000000000..355dbb544df --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ByteArrayInputFile.java @@ -0,0 +1,142 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.io.EOFException; +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.parquet.io.InputFile; +import org.apache.parquet.io.SeekableInputStream; + +/** + * An {@link InputFile} implementation that reads Parquet data from an in-memory byte array. This avoids any dependency + * on Hadoop's file system abstraction. + */ +public class ByteArrayInputFile implements InputFile { + + private final byte[] data; + + /** + * Creates a new input file backed by the given byte array. + * + * @param data the Parquet file content + */ + public ByteArrayInputFile(byte[] data) { + this.data = data; + } + + @Override + public long getLength() { + return data.length; + } + + @Override + public SeekableInputStream newStream() { + return new ByteArraySeekableInputStream(data); + } + + /** + * A {@link SeekableInputStream} backed by a byte array. + */ + private static class ByteArraySeekableInputStream extends SeekableInputStream { + + private final byte[] data; + private int pos; + + ByteArraySeekableInputStream(byte[] data) { + this.data = data; + this.pos = 0; + } + + @Override + public int read() throws IOException { + if (pos >= data.length) { + return -1; + } + return data[pos++] & 0xFF; + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + if (pos >= data.length) { + return -1; + } + int available = data.length - pos; + int toRead = Math.min(len, available); + System.arraycopy(data, pos, b, off, toRead); + pos += toRead; + return toRead; + } + + @Override + public long getPos() throws IOException { + return pos; + } + + @Override + public void seek(long newPos) throws IOException { + if (newPos < 0 || newPos > data.length) { + throw new IOException("Seek position " + newPos + " is out of range [0, " + data.length + "]"); + } + this.pos = (int) newPos; + } + + @Override + public void readFully(byte[] bytes) throws IOException { + readFully(bytes, 0, bytes.length); + } + + @Override + public void readFully(byte[] bytes, int start, int len) throws IOException { + int available = data.length - pos; + if (available < len) { + throw new EOFException( + "Reached end of stream: needed " + len + " bytes but only " + available + " available"); + } + System.arraycopy(data, pos, bytes, start, len); + pos += len; + } + + @Override + public int read(ByteBuffer buf) throws IOException { + int len = buf.remaining(); + if (len == 0) { + return 0; + } + int available = data.length - pos; + if (available <= 0) { + return -1; + } + int toRead = Math.min(len, available); + buf.put(data, pos, toRead); + pos += toRead; + return toRead; + } + + @Override + public void readFully(ByteBuffer buf) throws IOException { + int len = buf.remaining(); + int available = data.length - pos; + if (available < len) { + throw new EOFException( + "Reached end of stream: needed " + len + " bytes but only " + available + " available"); + } + buf.put(data, pos, len); + pos += len; + } + + @Override + public int available() { + return data.length - pos; + } + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ByteArrayOutputFile.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ByteArrayOutputFile.java new file mode 100644 index 00000000000..2b84ed7f230 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ByteArrayOutputFile.java @@ -0,0 +1,110 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import org.apache.parquet.io.OutputFile; +import org.apache.parquet.io.PositionOutputStream; + +/** + * An {@link OutputFile} implementation that writes Parquet data to an in-memory byte array. This avoids any dependency + * on Hadoop's file system abstraction. + * + *

+ * After writing is complete, call {@link #toByteArray()} to retrieve the serialized Parquet bytes. + */ +public class ByteArrayOutputFile implements OutputFile { + + private ByteArrayOutputStream baos; + + @Override + public PositionOutputStream create(long blockSizeHint) throws IOException { + baos = new ByteArrayOutputStream(); + return new ByteArrayPositionOutputStream(baos); + } + + @Override + public PositionOutputStream createOrOverwrite(long blockSizeHint) throws IOException { + return create(blockSizeHint); + } + + @Override + public boolean supportsBlockSize() { + return false; + } + + @Override + public long defaultBlockSize() { + return 0; + } + + /** + * Returns the bytes written to this output file. + * + * @return the Parquet file content as a byte array + * @throws IllegalStateException if no data has been written yet + */ + public byte[] toByteArray() { + if (baos == null) { + throw new IllegalStateException("No data has been written"); + } + return baos.toByteArray(); + } + + /** + * A {@link PositionOutputStream} backed by a {@link ByteArrayOutputStream}. + */ + private static class ByteArrayPositionOutputStream extends PositionOutputStream { + + private final ByteArrayOutputStream baos; + private long pos; + + ByteArrayPositionOutputStream(ByteArrayOutputStream baos) { + this.baos = baos; + this.pos = 0; + } + + @Override + public long getPos() { + return pos; + } + + @Override + public void write(int b) throws IOException { + baos.write(b); + pos++; + } + + @Override + public void write(byte[] b) throws IOException { + baos.write(b); + pos += b.length; + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + baos.write(b, off, len); + pos += len; + } + + @Override + public void flush() throws IOException { + baos.flush(); + } + + @Override + public void close() throws IOException { + baos.close(); + } + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java new file mode 100644 index 00000000000..9c6e2bd806e --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java @@ -0,0 +1,387 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.ObjectMapper; + +/** + * JSON-serialized catalog tracking Parquet files with per-file statistics and predicate partitioning. + * + *

+ * Evolved from {@link Manifest} to support the Parquet-based storage format with predicate partitioning. Each predicate + * ID maps to a list of {@link ParquetFileInfo} entries describing the Parquet files for that partition. + * + *

S3 Layout

+ * + *
+ * catalog/current             -> plain text "v{epoch}.json"
+ * catalog/v{epoch}.json       -> JSON catalog
+ * 
+ * + *

JSON Structure

+ * + *
+ * {
+ *   "version": 2,
+ *   "epoch": 42,
+ *   "nextValueId": 12345,
+ *   "predicatePartitions": {
+ *     "7": [ { file info... } ],
+ *     "42": [ { file info... } ]
+ *   },
+ *   "predicateLabels": { "7": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" },
+ *   "unpartitionedFiles": []
+ * }
+ * 
+ */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class Catalog { + + @JsonProperty("version") + private int version = 2; + + @JsonProperty("epoch") + private long epoch; + + @JsonProperty("nextValueId") + private long nextValueId; + + @JsonProperty("predicatePartitions") + private Map> predicatePartitions = new LinkedHashMap<>(); + + @JsonProperty("predicateLabels") + private Map predicateLabels = new LinkedHashMap<>(); + + @JsonProperty("unpartitionedFiles") + private List unpartitionedFiles = new ArrayList<>(); + + public Catalog() { + } + + public int getVersion() { + return version; + } + + public void setVersion(int version) { + this.version = version; + } + + public long getEpoch() { + return epoch; + } + + public void setEpoch(long epoch) { + this.epoch = epoch; + } + + public long getNextValueId() { + return nextValueId; + } + + public void setNextValueId(long nextValueId) { + this.nextValueId = nextValueId; + } + + public Map> getPredicatePartitions() { + return predicatePartitions; + } + + public void setPredicatePartitions(Map> predicatePartitions) { + this.predicatePartitions = predicatePartitions; + } + + public Map getPredicateLabels() { + return predicateLabels; + } + + public void setPredicateLabels(Map predicateLabels) { + this.predicateLabels = predicateLabels; + } + + public List getUnpartitionedFiles() { + return unpartitionedFiles; + } + + public void setUnpartitionedFiles(List unpartitionedFiles) { + this.unpartitionedFiles = unpartitionedFiles; + } + + /** + * Loads the catalog from the object store. + * + *

+ * Reads the {@code catalog/current} pointer to find the active catalog version, then parses the corresponding JSON + * file. Returns an empty catalog if no pointer or catalog file exists. + * + * @param store the object store to read from + * @param mapper the Jackson ObjectMapper for JSON parsing + * @return the loaded catalog, or an empty catalog if none exists + */ + public static Catalog load(ObjectStore store, ObjectMapper mapper) { + byte[] pointer = store.get("catalog/current"); + if (pointer == null) { + return new Catalog(); + } + String catalogKey = "catalog/" + new String(pointer, StandardCharsets.UTF_8).trim(); + byte[] json = store.get(catalogKey); + if (json == null) { + return new Catalog(); + } + try { + return mapper.readValue(json, Catalog.class); + } catch (IOException e) { + throw new UncheckedIOException("Failed to parse catalog", e); + } + } + + /** + * Saves this catalog to the object store. + * + *

+ * Writes the catalog JSON to {@code catalog/v{epoch}.json} and updates the {@code catalog/current} pointer. The + * epoch field is set to the given value before saving. + * + * @param store the object store to write to + * @param mapper the Jackson ObjectMapper for JSON serialization + * @param epoch the epoch number for this catalog version + */ + public void save(ObjectStore store, ObjectMapper mapper, long epoch) { + this.epoch = epoch; + try { + String versionedKey = "v" + epoch + ".json"; + byte[] json = mapper.writerWithDefaultPrettyPrinter().writeValueAsBytes(this); + store.put("catalog/" + versionedKey, json); + store.put("catalog/current", versionedKey.getBytes(StandardCharsets.UTF_8)); + } catch (IOException e) { + throw new UncheckedIOException("Failed to save catalog", e); + } + } + + /** + * Returns the set of predicate IDs that have partitioned files. + * + * @return set of predicate IDs parsed from the partition keys + */ + public Set getPredicateIds() { + return predicatePartitions.keySet() + .stream() + .map(Long::parseLong) + .collect(Collectors.toSet()); + } + + /** + * Returns the list of Parquet files for the given predicate ID. + * + * @param predicateId the predicate value ID + * @return the list of file info entries, or an empty list if no files exist for this predicate + */ + public List getFilesForPredicate(long predicateId) { + return predicatePartitions.getOrDefault(String.valueOf(predicateId), Collections.emptyList()); + } + + /** + * Adds a Parquet file to the partition for the given predicate. + * + * @param predicateId the predicate value ID + * @param info the file info to add + */ + public void addFile(long predicateId, ParquetFileInfo info) { + predicatePartitions.computeIfAbsent(String.valueOf(predicateId), k -> new ArrayList<>()).add(info); + } + + /** + * Removes Parquet files from the partition for the given predicate by their S3 keys. + * + * @param predicateId the predicate value ID + * @param s3Keys the set of S3 keys to remove + */ + public void removeFiles(long predicateId, Set s3Keys) { + List files = predicatePartitions.get(String.valueOf(predicateId)); + if (files != null) { + files.removeIf(f -> s3Keys.contains(f.getS3Key())); + } + } + + /** + * Metadata about a single Parquet file in the catalog, including its location, sort order, size, and min/max + * statistics for subject, object, and context columns. + */ + @JsonIgnoreProperties(ignoreUnknown = true) + public static class ParquetFileInfo { + + @JsonProperty("s3Key") + private String s3Key; + + @JsonProperty("level") + private int level; + + @JsonProperty("sortOrder") + private String sortOrder; + + @JsonProperty("rowCount") + private long rowCount; + + @JsonProperty("epoch") + private long epoch; + + @JsonProperty("sizeBytes") + private long sizeBytes; + + @JsonProperty("minSubject") + private long minSubject; + + @JsonProperty("maxSubject") + private long maxSubject; + + @JsonProperty("minObject") + private long minObject; + + @JsonProperty("maxObject") + private long maxObject; + + @JsonProperty("minContext") + private long minContext; + + @JsonProperty("maxContext") + private long maxContext; + + public ParquetFileInfo() { + } + + public ParquetFileInfo(String s3Key, int level, String sortOrder, long rowCount, + long epoch, long sizeBytes, + long minSubject, long maxSubject, + long minObject, long maxObject, + long minContext, long maxContext) { + this.s3Key = s3Key; + this.level = level; + this.sortOrder = sortOrder; + this.rowCount = rowCount; + this.epoch = epoch; + this.sizeBytes = sizeBytes; + this.minSubject = minSubject; + this.maxSubject = maxSubject; + this.minObject = minObject; + this.maxObject = maxObject; + this.minContext = minContext; + this.maxContext = maxContext; + } + + public String getS3Key() { + return s3Key; + } + + public void setS3Key(String s3Key) { + this.s3Key = s3Key; + } + + public int getLevel() { + return level; + } + + public void setLevel(int level) { + this.level = level; + } + + public String getSortOrder() { + return sortOrder; + } + + public void setSortOrder(String sortOrder) { + this.sortOrder = sortOrder; + } + + public long getRowCount() { + return rowCount; + } + + public void setRowCount(long rowCount) { + this.rowCount = rowCount; + } + + public long getEpoch() { + return epoch; + } + + public void setEpoch(long epoch) { + this.epoch = epoch; + } + + public long getSizeBytes() { + return sizeBytes; + } + + public void setSizeBytes(long sizeBytes) { + this.sizeBytes = sizeBytes; + } + + public long getMinSubject() { + return minSubject; + } + + public void setMinSubject(long minSubject) { + this.minSubject = minSubject; + } + + public long getMaxSubject() { + return maxSubject; + } + + public void setMaxSubject(long maxSubject) { + this.maxSubject = maxSubject; + } + + public long getMinObject() { + return minObject; + } + + public void setMinObject(long minObject) { + this.minObject = minObject; + } + + public long getMaxObject() { + return maxObject; + } + + public void setMaxObject(long maxObject) { + this.maxObject = maxObject; + } + + public long getMinContext() { + return minContext; + } + + public void setMinContext(long minContext) { + this.minContext = minContext; + } + + public long getMaxContext() { + return maxContext; + } + + public void setMaxContext(long maxContext) { + this.maxContext = maxContext; + } + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/CompactionPolicy.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/CompactionPolicy.java new file mode 100644 index 00000000000..bd749b3904b --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/CompactionPolicy.java @@ -0,0 +1,81 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Determines when compaction should be triggered for a predicate partition. Counts distinct epochs at each level and + * compares against configurable thresholds. + */ +public class CompactionPolicy { + + /** Default number of L0 epochs before triggering L0→L1 compaction. */ + public static final int DEFAULT_L0_THRESHOLD = 8; + + /** Default number of L1 epochs before triggering L1→L2 compaction. */ + public static final int DEFAULT_L1_THRESHOLD = 4; + + private final int l0Threshold; + private final int l1Threshold; + + public CompactionPolicy() { + this(DEFAULT_L0_THRESHOLD, DEFAULT_L1_THRESHOLD); + } + + public CompactionPolicy(int l0Threshold, int l1Threshold) { + this.l0Threshold = l0Threshold; + this.l1Threshold = l1Threshold; + } + + /** + * Checks if L0→L1 compaction should run for the given predicate partition files. + * + * @param files all files in the predicate partition + * @return true if the number of distinct L0 epochs >= l0Threshold + */ + public boolean shouldCompactL0(List files) { + return countEpochsAtLevel(files, 0) >= l0Threshold; + } + + /** + * Checks if L1→L2 compaction should run for the given predicate partition files. + * + * @param files all files in the predicate partition + * @return true if the number of distinct L1 epochs >= l1Threshold + */ + public boolean shouldCompactL1(List files) { + return countEpochsAtLevel(files, 1) >= l1Threshold; + } + + private static int countEpochsAtLevel(List files, int level) { + Set epochs = new HashSet<>(); + for (Catalog.ParquetFileInfo f : files) { + if (f.getLevel() == level) { + epochs.add(f.getEpoch()); + } + } + return epochs.size(); + } + + /** + * Returns the files at the given level for a predicate partition. + * + * @param files all files in the partition + * @param level the target level (0, 1, or 2) + * @return files at that level + */ + public static List filesAtLevel(List files, int level) { + return files.stream().filter(f -> f.getLevel() == level).toList(); + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java new file mode 100644 index 00000000000..0e83f4fbb29 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java @@ -0,0 +1,256 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.eclipse.rdf4j.sail.s3.cache.TieredCache; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Performs merge compaction on Parquet files within a predicate partition. Merges files at a source level into one set + * of files at the target level, per sort order. + * + *

    + *
  • L0→L1: merge all L0 files per sort order, tombstones preserved
  • + *
  • L1→L2: merge all L1 files per sort order, tombstones suppressed (L2 = highest level)
  • + *
+ */ +public class Compactor { + + private static final Logger logger = LoggerFactory.getLogger(Compactor.class); + private static final String[] SORT_ORDERS = { "soc", "osc", "cso" }; + + private final ObjectStore objectStore; + private final TieredCache cache; + private final int rowGroupSize; + private final int pageSize; + + public Compactor(ObjectStore objectStore, TieredCache cache, int rowGroupSize, int pageSize) { + this.objectStore = objectStore; + this.cache = cache; + this.rowGroupSize = rowGroupSize; + this.pageSize = pageSize; + } + + /** + * Compacts files at the source level into a single set of files at the target level. + * + * @param predicateId the predicate partition being compacted + * @param sourceFiles all files at the source level in this partition + * @param sourceLevel the source level (0 or 1) + * @param targetLevel the target level (1 or 2) + * @param epoch the epoch for the new compacted files + * @param catalog the catalog to update + * @return result containing new files created and old files removed + */ + public CompactionResult compact(long predicateId, List sourceFiles, + int sourceLevel, int targetLevel, long epoch, Catalog catalog) { + + boolean suppressTombstones = (targetLevel == 2); + List newFiles = new ArrayList<>(); + Set oldKeys = new HashSet<>(); + + for (String sortOrder : SORT_ORDERS) { + // Collect source files for this sort order, ordered newest-first (highest epoch first) + List sortOrderFiles = sourceFiles.stream() + .filter(f -> sortOrder.equals(f.getSortOrder())) + .sorted(Comparator.comparingLong(Catalog.ParquetFileInfo::getEpoch).reversed()) + .toList(); + + if (sortOrderFiles.isEmpty()) { + continue; + } + + // Collect old keys for cleanup + for (Catalog.ParquetFileInfo f : sortOrderFiles) { + oldKeys.add(f.getS3Key()); + } + + // Build merge sources from Parquet files (newest first) + List sources = new ArrayList<>(); + for (Catalog.ParquetFileInfo fileInfo : sortOrderFiles) { + byte[] fileData = cache != null ? cache.get(fileInfo.getS3Key()) : objectStore.get(fileInfo.getS3Key()); + if (fileData == null) { + logger.warn("Missing Parquet file during compaction: {}", fileInfo.getS3Key()); + continue; + } + sources.add(new ParquetQuadSource(fileData, sortOrder)); + } + + if (sources.isEmpty()) { + continue; + } + + // Merge and collect entries + List merged = mergeEntries(sources, suppressTombstones); + + if (merged.isEmpty()) { + continue; + } + + // Convert to ParquetFileBuilder.QuadEntry + List parquetEntries = new ArrayList<>(); + for (MemTable.QuadEntry e : merged) { + parquetEntries.add(new ParquetFileBuilder.QuadEntry(e.subject, e.object, e.context, e.flag)); + } + + // Write merged Parquet file + ParquetSchemas.SortOrder parsedSortOrder = ParquetSchemas.SortOrder.valueOf(sortOrder.toUpperCase()); + String s3Key = "data/predicates/" + predicateId + "/L" + targetLevel + "-" + + String.format("%05d", epoch) + "-" + sortOrder + ".parquet"; + + byte[] parquetData = ParquetFileBuilder.build(parquetEntries, ParquetSchemas.PARTITIONED_SCHEMA, + parsedSortOrder, predicateId, rowGroupSize, pageSize); + + objectStore.put(s3Key, parquetData); + if (cache != null) { + cache.writeThrough(s3Key, parquetData); + } + + // Compute stats from sorted entries + long minSubject = Long.MAX_VALUE, maxSubject = Long.MIN_VALUE; + long minObject = Long.MAX_VALUE, maxObject = Long.MIN_VALUE; + long minContext = Long.MAX_VALUE, maxContext = Long.MIN_VALUE; + for (MemTable.QuadEntry e : merged) { + minSubject = Math.min(minSubject, e.subject); + maxSubject = Math.max(maxSubject, e.subject); + minObject = Math.min(minObject, e.object); + maxObject = Math.max(maxObject, e.object); + minContext = Math.min(minContext, e.context); + maxContext = Math.max(maxContext, e.context); + } + + newFiles.add(new Catalog.ParquetFileInfo(s3Key, targetLevel, sortOrder, merged.size(), + epoch, parquetData.length, + minSubject, maxSubject, minObject, maxObject, minContext, maxContext)); + } + + // Update catalog: remove old files, add new ones + catalog.removeFiles(predicateId, oldKeys); + for (Catalog.ParquetFileInfo newFile : newFiles) { + catalog.addFile(predicateId, newFile); + } + + // Delete old S3 files and invalidate cache + for (String key : oldKeys) { + objectStore.delete(key); + if (cache != null) { + cache.invalidate(key); + } + } + + logger.info("Compacted predicate {} L{}→L{}: {} files merged into {} files", + predicateId, sourceLevel, targetLevel, oldKeys.size(), newFiles.size()); + + return new CompactionResult(newFiles, oldKeys); + } + + private List mergeEntries(List sources, boolean suppressTombstones) { + List result = new ArrayList<>(); + + // Simple K-way merge: use a priority queue approach + // Each source is already sorted. We merge them, dedup by key, newest wins. + // For simplicity, read all into one list then dedup. + // Since compaction is a background operation, this is acceptable. + + // Use ParquetQuadSource entries directly + // Sources are ordered newest-first, so for dedup, first occurrence wins + java.util.TreeMap deduped = new java.util.TreeMap<>(); + for (RawEntrySource source : sources) { + while (source.hasNext()) { + byte[] key = source.peekKey(); + byte flag = source.peekFlag(); + // Only insert if not already present (first = newest wins) + CompactKey ck = new CompactKey(key); + if (!deduped.containsKey(ck)) { + // Decode the key to get quad values + // The key format from ParquetQuadSource encodes (subject, object, context) as varints + java.nio.ByteBuffer bb = java.nio.ByteBuffer.wrap(key); + long v1 = Varint.readUnsigned(bb); + long v2 = Varint.readUnsigned(bb); + long v3 = Varint.readUnsigned(bb); + if (!suppressTombstones || flag != MemTable.FLAG_TOMBSTONE) { + deduped.put(ck, new MemTable.QuadEntry(v1, v2, v3, flag)); + } + } + source.advance(); + } + } + + if (suppressTombstones) { + for (MemTable.QuadEntry e : deduped.values()) { + if (e.flag != MemTable.FLAG_TOMBSTONE) { + result.add(e); + } + } + } else { + result.addAll(deduped.values()); + } + + return result; + } + + private static class CompactKey implements Comparable { + final byte[] key; + + CompactKey(byte[] key) { + this.key = key.clone(); + } + + @Override + public int compareTo(CompactKey other) { + return java.util.Arrays.compareUnsigned(this.key, other.key); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof CompactKey)) { + return false; + } + return java.util.Arrays.equals(key, ((CompactKey) o).key); + } + + @Override + public int hashCode() { + return java.util.Arrays.hashCode(key); + } + } + + /** + * Result of a compaction operation. + */ + public static class CompactionResult { + private final List newFiles; + private final Set deletedKeys; + + public CompactionResult(List newFiles, Set deletedKeys) { + this.newFiles = newFiles; + this.deletedKeys = deletedKeys; + } + + public List getNewFiles() { + return newFiles; + } + + public Set getDeletedKeys() { + return deletedKeys; + } + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Manifest.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Manifest.java deleted file mode 100644 index 0e72137bcd3..00000000000 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Manifest.java +++ /dev/null @@ -1,197 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2025 Eclipse RDF4J contributors. - * - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Distribution License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/org/documents/edl-v10.php. - * - * SPDX-License-Identifier: BSD-3-Clause - *******************************************************************************/ -package org.eclipse.rdf4j.sail.s3.storage; - -import java.io.IOException; -import java.io.UncheckedIOException; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.databind.ObjectMapper; - -/** - * JSON manifest tracking which SSTables exist in the object store. - * - *

S3 Layout

- * - *
- * manifest/current           -> plain text "v{epoch}.json"
- * manifest/v{epoch}.json     -> JSON manifest
- * sstables/L0-{epoch}-{indexName}.sst
- * values/current             -> serialized value store
- * namespaces/current         -> JSON namespace map
- * 
- */ -@JsonIgnoreProperties(ignoreUnknown = true) -public class Manifest { - - @JsonProperty("version") - private int version = 1; - - @JsonProperty("nextValueId") - private long nextValueId; - - @JsonProperty("sstables") - private List sstables = new ArrayList<>(); - - public Manifest() { - } - - public int getVersion() { - return version; - } - - public void setVersion(int version) { - this.version = version; - } - - public long getNextValueId() { - return nextValueId; - } - - public void setNextValueId(long nextValueId) { - this.nextValueId = nextValueId; - } - - public List getSstables() { - return sstables; - } - - public void setSstables(List sstables) { - this.sstables = sstables; - } - - public static Manifest load(ObjectStore store, ObjectMapper mapper) { - byte[] pointer = store.get("manifest/current"); - if (pointer == null) { - return new Manifest(); - } - String manifestKey = "manifest/" + new String(pointer, StandardCharsets.UTF_8).trim(); - byte[] json = store.get(manifestKey); - if (json == null) { - return new Manifest(); - } - try { - return mapper.readValue(json, Manifest.class); - } catch (IOException e) { - throw new UncheckedIOException("Failed to parse manifest", e); - } - } - - public void save(ObjectStore store, ObjectMapper mapper, long epoch) { - try { - String versionedKey = "v" + epoch + ".json"; - byte[] json = mapper.writerWithDefaultPrettyPrinter().writeValueAsBytes(this); - store.put("manifest/" + versionedKey, json); - store.put("manifest/current", versionedKey.getBytes(StandardCharsets.UTF_8)); - } catch (IOException e) { - throw new UncheckedIOException("Failed to save manifest", e); - } - } - - @JsonIgnoreProperties(ignoreUnknown = true) - public static class SSTableInfo { - - @JsonProperty("s3Key") - private String s3Key; - - @JsonProperty("level") - private int level; - - @JsonProperty("indexName") - private String indexName; - - @JsonProperty("minKeyHex") - private String minKeyHex; - - @JsonProperty("maxKeyHex") - private String maxKeyHex; - - @JsonProperty("entryCount") - private long entryCount; - - @JsonProperty("epoch") - private long epoch; - - public SSTableInfo() { - } - - public SSTableInfo(String s3Key, int level, String indexName, String minKeyHex, String maxKeyHex, - long entryCount, long epoch) { - this.s3Key = s3Key; - this.level = level; - this.indexName = indexName; - this.minKeyHex = minKeyHex; - this.maxKeyHex = maxKeyHex; - this.entryCount = entryCount; - this.epoch = epoch; - } - - public String getS3Key() { - return s3Key; - } - - public void setS3Key(String s3Key) { - this.s3Key = s3Key; - } - - public int getLevel() { - return level; - } - - public void setLevel(int level) { - this.level = level; - } - - public String getIndexName() { - return indexName; - } - - public void setIndexName(String indexName) { - this.indexName = indexName; - } - - public String getMinKeyHex() { - return minKeyHex; - } - - public void setMinKeyHex(String minKeyHex) { - this.minKeyHex = minKeyHex; - } - - public String getMaxKeyHex() { - return maxKeyHex; - } - - public void setMaxKeyHex(String maxKeyHex) { - this.maxKeyHex = maxKeyHex; - } - - public long getEntryCount() { - return entryCount; - } - - public void setEntryCount(long entryCount) { - this.entryCount = entryCount; - } - - public long getEpoch() { - return epoch; - } - - public void setEpoch(long epoch) { - this.epoch = epoch; - } - } -} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java index d0ce15a7155..bd5f4b7e512 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java @@ -10,9 +10,12 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.s3.storage; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.concurrent.ConcurrentNavigableMap; @@ -247,6 +250,132 @@ public void advance() { } } + /** + * Returns a {@link RawEntrySource} over MemTable entries matching the given predicate, encoded as 3-varint keys in + * the specified partition sort order. Used by {@link PartitionMergeIterator} to merge MemTable entries with Parquet + * partition files. + * + * @param predId the predicate ID to filter by + * @param subj subject filter, or -1 for wildcard + * @param obj object filter, or -1 for wildcard + * @param ctx context filter, or -1 for wildcard + * @param sortOrder the partition sort order ("soc", "osc", or "cso") + * @return a RawEntrySource with 3-varint keys in the specified sort order + */ + public RawEntrySource asPartitionRawSource(long predId, long subj, long obj, long ctx, String sortOrder) { + // Scan the SPOC MemTable for entries matching the given predicate + byte[] minKey = index.getMinKeyBytes(subj <= 0 ? 0 : subj, predId, obj <= 0 ? 0 : obj, + ctx < 0 ? 0 : ctx); + byte[] maxKey = index.getMaxKeyBytes(subj <= 0 ? Long.MAX_VALUE : subj, predId, + obj <= 0 ? Long.MAX_VALUE : obj, ctx < 0 ? Long.MAX_VALUE : ctx); + ConcurrentNavigableMap range = data.subMap(minKey, true, maxKey, true); + + // Collect matching entries, re-encode as 3-varint partition keys + List entries = new ArrayList<>(); + long[] quad = new long[4]; + for (Map.Entry entry : range.entrySet()) { + index.keyToQuad(entry.getKey(), quad); + // Verify predicate matches (range scan may include adjacent predicates) + if (quad[QuadIndex.PRED_IDX] != predId) { + continue; + } + // Apply additional filters + if (subj >= 0 && quad[QuadIndex.SUBJ_IDX] != subj) { + continue; + } + if (obj >= 0 && quad[QuadIndex.OBJ_IDX] != obj) { + continue; + } + if (ctx >= 0 && quad[QuadIndex.CONTEXT_IDX] != ctx) { + continue; + } + byte[] partitionKey = ParquetQuadSource.encodeKey(sortOrder, + quad[QuadIndex.SUBJ_IDX], quad[QuadIndex.OBJ_IDX], quad[QuadIndex.CONTEXT_IDX]); + entries.add(new PartitionEntry(partitionKey, entry.getValue()[0])); + } + + // Sort by partition key (entries may not be in partition sort order) + entries.sort((a, b) -> java.util.Arrays.compareUnsigned(a.key, b.key)); + + return new PartitionRawSourceImpl(entries); + } + + private static class PartitionEntry { + final byte[] key; + final byte flag; + + PartitionEntry(byte[] key, byte flag) { + this.key = key; + this.flag = flag; + } + } + + private static class PartitionRawSourceImpl implements RawEntrySource { + private final List entries; + private int pos; + + PartitionRawSourceImpl(List entries) { + this.entries = entries; + this.pos = 0; + } + + @Override + public boolean hasNext() { + return pos < entries.size(); + } + + @Override + public byte[] peekKey() { + return entries.get(pos).key; + } + + @Override + public byte peekFlag() { + return entries.get(pos).flag; + } + + @Override + public void advance() { + pos++; + } + } + + /** + * Partitions entries by predicate ID. Returns a map from predicate ID to a list of {@link QuadEntry} records + * containing (subject, object, context, flag). Used during Parquet flush to write per-predicate partition files. + * + * @return map from predicate ID to list of quad entries (without predicate column) + */ + public Map> partitionByPredicate() { + Map> result = new LinkedHashMap<>(); + long[] quad = new long[4]; + for (Map.Entry entry : data.entrySet()) { + index.keyToQuad(entry.getKey(), quad); + long predId = quad[QuadIndex.PRED_IDX]; + result.computeIfAbsent(predId, k -> new ArrayList<>()) + .add(new QuadEntry(quad[QuadIndex.SUBJ_IDX], quad[QuadIndex.OBJ_IDX], + quad[QuadIndex.CONTEXT_IDX], entry.getValue()[0])); + } + return result; + } + + /** + * A quad entry with predicate removed (implicit in partition). Used for Parquet file writing. + */ + public static class QuadEntry { + public final long subject; + public final long object; + public final long context; + public final byte flag; + + public QuadEntry(long subject, long object, long context, byte flag) { + this.subject = subject; + this.object = object; + this.context = context; + this.flag = flag; + } + } + private void checkNotFrozen() { if (frozen.get()) { throw new IllegalStateException("MemTable is frozen and cannot accept writes"); diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFileBuilder.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFileBuilder.java new file mode 100644 index 00000000000..d2f4379b3d9 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFileBuilder.java @@ -0,0 +1,249 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.HashMap; +import java.util.List; + +import org.apache.parquet.conf.ParquetConfiguration; +import org.apache.parquet.conf.PlainParquetConfiguration; +import org.apache.parquet.hadoop.ParquetWriter; +import org.apache.parquet.hadoop.api.WriteSupport; +import org.apache.parquet.hadoop.metadata.CompressionCodecName; +import org.apache.parquet.io.OutputFile; +import org.apache.parquet.io.api.RecordConsumer; +import org.apache.parquet.schema.MessageType; + +/** + * Writes quad entries to a Parquet file in memory and returns the serialized bytes. + * + *

+ * This builder uses Parquet's {@link OutputFile} API to avoid Hadoop filesystem dependencies. Entries should already be + * sorted by the caller according to the specified {@link ParquetSchemas.SortOrder}. + * + *

+ * Example usage: + * + *

+ * List<QuadEntry> entries = ...;
+ * byte[] parquetBytes = ParquetFileBuilder.build(entries, SortOrder.SOC);
+ * 
+ */ +public final class ParquetFileBuilder { + + /** Default row group size: 8 MiB. */ + private static final int DEFAULT_ROW_GROUP_SIZE = 8 * 1024 * 1024; + + /** Default page size: 64 KiB. */ + private static final int DEFAULT_PAGE_SIZE = 64 * 1024; + + private ParquetFileBuilder() { + // utility class + } + + /** + * A quad entry to be written to a Parquet file. + * + *

+ * For partitioned schemas the predicate is implicit in the partition path, so only subject, object, context, and + * flag are stored. For unpartitioned schemas, the predicate field is also written. + */ + public static class QuadEntry { + public final long subject; + public final long predicate; + public final long object; + public final long context; + public final byte flag; + + /** + * Creates a quad entry for partitioned files (predicate implicit in path). + * + * @param subject the subject value ID + * @param object the object value ID + * @param context the context value ID + * @param flag the entry flag (e.g. insert vs tombstone) + */ + public QuadEntry(long subject, long object, long context, byte flag) { + this(subject, -1, object, context, flag); + } + + /** + * Creates a quad entry for unpartitioned files (predicate stored explicitly). + * + * @param subject the subject value ID + * @param predicate the predicate value ID + * @param object the object value ID + * @param context the context value ID + * @param flag the entry flag (e.g. insert vs tombstone) + */ + public QuadEntry(long subject, long predicate, long object, long context, byte flag) { + this.subject = subject; + this.predicate = predicate; + this.object = object; + this.context = context; + this.flag = flag; + } + } + + /** + * Builds a Parquet file from the given entries using default settings. + * + *

+ * Uses {@link ParquetSchemas#PARTITIONED_SCHEMA}, 8 MiB row group size, and 64 KiB page size. + * + * @param entries the quad entries to write (must already be sorted) + * @param sortOrder the sort order of the entries + * @return the serialized Parquet file as a byte array + */ + public static byte[] build(List entries, ParquetSchemas.SortOrder sortOrder) { + return build(entries, ParquetSchemas.PARTITIONED_SCHEMA, sortOrder, -1, + DEFAULT_ROW_GROUP_SIZE, DEFAULT_PAGE_SIZE); + } + + /** + * Builds a Parquet file from the given entries with full control over parameters. + * + * @param entries the quad entries to write (must already be sorted) + * @param schema the Parquet schema to use + * @param sortOrder the sort order of the entries + * @param predicateId the predicate ID for partitioned files (ignored for unpartitioned) + * @param rowGroupSize the row group size in bytes + * @param pageSize the page size in bytes + * @return the serialized Parquet file as a byte array + */ + public static byte[] build(List entries, MessageType schema, + ParquetSchemas.SortOrder sortOrder, long predicateId, + int rowGroupSize, int pageSize) { + try { + ByteArrayOutputFile outputFile = new ByteArrayOutputFile(); + + try (ParquetWriter writer = new QuadEntryWriterBuilder(outputFile, schema) + .withConf(new PlainParquetConfiguration()) + .withCodecFactory(SimpleCodecFactory.INSTANCE) + .withCompressionCodec(CompressionCodecName.ZSTD) + .withRowGroupSize(rowGroupSize) + .withPageSize(pageSize) + .withDictionaryEncoding(true) + .build()) { + for (QuadEntry entry : entries) { + writer.write(entry); + } + } + + return outputFile.toByteArray(); + } catch (IOException e) { + throw new UncheckedIOException("Failed to build Parquet file", e); + } + } + + /** + * Custom {@link WriteSupport} that writes {@link QuadEntry} records to Parquet. + */ + private static class QuadEntryWriteSupport extends WriteSupport { + + private final MessageType schema; + private final boolean hasPredicateColumn; + private RecordConsumer recordConsumer; + + QuadEntryWriteSupport(MessageType schema) { + this.schema = schema; + this.hasPredicateColumn = schema.containsField(ParquetSchemas.COL_PREDICATE); + } + + @Override + public WriteContext init(org.apache.hadoop.conf.Configuration configuration) { + return new WriteContext(schema, new HashMap<>()); + } + + @Override + public WriteContext init(ParquetConfiguration configuration) { + return new WriteContext(schema, new HashMap<>()); + } + + @Override + public void prepareForWrite(RecordConsumer recordConsumer) { + this.recordConsumer = recordConsumer; + } + + @Override + public void write(QuadEntry entry) { + recordConsumer.startMessage(); + + int fieldIndex = 0; + + // subject + recordConsumer.startField(ParquetSchemas.COL_SUBJECT, fieldIndex); + recordConsumer.addLong(entry.subject); + recordConsumer.endField(ParquetSchemas.COL_SUBJECT, fieldIndex); + fieldIndex++; + + // predicate (only for unpartitioned schema) + if (hasPredicateColumn) { + recordConsumer.startField(ParquetSchemas.COL_PREDICATE, fieldIndex); + recordConsumer.addLong(entry.predicate); + recordConsumer.endField(ParquetSchemas.COL_PREDICATE, fieldIndex); + fieldIndex++; + } + + // object + recordConsumer.startField(ParquetSchemas.COL_OBJECT, fieldIndex); + recordConsumer.addLong(entry.object); + recordConsumer.endField(ParquetSchemas.COL_OBJECT, fieldIndex); + fieldIndex++; + + // context + recordConsumer.startField(ParquetSchemas.COL_CONTEXT, fieldIndex); + recordConsumer.addLong(entry.context); + recordConsumer.endField(ParquetSchemas.COL_CONTEXT, fieldIndex); + fieldIndex++; + + // flag + recordConsumer.startField(ParquetSchemas.COL_FLAG, fieldIndex); + recordConsumer.addInteger(entry.flag); + recordConsumer.endField(ParquetSchemas.COL_FLAG, fieldIndex); + + recordConsumer.endMessage(); + } + } + + /** + * Builder for creating a {@link ParquetWriter} that writes {@link QuadEntry} records. Uses + * {@link PlainParquetConfiguration} to avoid Hadoop runtime dependencies. + */ + private static class QuadEntryWriterBuilder + extends ParquetWriter.Builder { + + private final MessageType schema; + + QuadEntryWriterBuilder(OutputFile file, MessageType schema) { + super(file); + this.schema = schema; + } + + @Override + protected QuadEntryWriterBuilder self() { + return this; + } + + @Override + protected WriteSupport getWriteSupport( + org.apache.hadoop.conf.Configuration conf) { + return new QuadEntryWriteSupport(schema); + } + + @Override + protected WriteSupport getWriteSupport(ParquetConfiguration conf) { + return new QuadEntryWriteSupport(schema); + } + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFilterBuilder.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFilterBuilder.java new file mode 100644 index 00000000000..5f32dfe01c7 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFilterBuilder.java @@ -0,0 +1,82 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import static org.apache.parquet.filter2.predicate.FilterApi.and; +import static org.apache.parquet.filter2.predicate.FilterApi.eq; +import static org.apache.parquet.filter2.predicate.FilterApi.longColumn; + +import org.apache.parquet.filter2.compat.FilterCompat; +import org.apache.parquet.filter2.predicate.FilterPredicate; + +/** + * Builds Parquet {@link FilterPredicate}s from quad query patterns. Bound components (>= 0) become equality filters; + * unbound components (-1) are omitted. + */ +public class ParquetFilterBuilder { + + /** + * Builds a Parquet filter for a within-partition query (predicate is implicit). + * + * @param subject subject ID, or -1 for wildcard + * @param object object ID, or -1 for wildcard + * @param context context ID, or -1 for wildcard + * @return a FilterCompat.Filter, or FilterCompat.NOOP if no filters apply + */ + public static FilterCompat.Filter buildPartitionedFilter(long subject, long object, long context) { + FilterPredicate predicate = null; + + if (subject >= 0) { + predicate = eq(longColumn(ParquetSchemas.COL_SUBJECT), subject); + } + if (object >= 0) { + FilterPredicate objFilter = eq(longColumn(ParquetSchemas.COL_OBJECT), object); + predicate = predicate != null ? and(predicate, objFilter) : objFilter; + } + if (context >= 0) { + FilterPredicate ctxFilter = eq(longColumn(ParquetSchemas.COL_CONTEXT), context); + predicate = predicate != null ? and(predicate, ctxFilter) : ctxFilter; + } + + return predicate != null ? FilterCompat.get(predicate) : FilterCompat.NOOP; + } + + /** + * Builds a Parquet filter for an unpartitioned file query (all 4 components). + * + * @param subject subject ID, or -1 for wildcard + * @param predId predicate ID, or -1 for wildcard + * @param object object ID, or -1 for wildcard + * @param context context ID, or -1 for wildcard + * @return a FilterCompat.Filter, or FilterCompat.NOOP if no filters apply + */ + public static FilterCompat.Filter buildUnpartitionedFilter(long subject, long predId, long object, long context) { + FilterPredicate predicate = null; + + if (subject >= 0) { + predicate = eq(longColumn(ParquetSchemas.COL_SUBJECT), subject); + } + if (predId >= 0) { + FilterPredicate pFilter = eq(longColumn(ParquetSchemas.COL_PREDICATE), predId); + predicate = predicate != null ? and(predicate, pFilter) : pFilter; + } + if (object >= 0) { + FilterPredicate objFilter = eq(longColumn(ParquetSchemas.COL_OBJECT), object); + predicate = predicate != null ? and(predicate, objFilter) : objFilter; + } + if (context >= 0) { + FilterPredicate ctxFilter = eq(longColumn(ParquetSchemas.COL_CONTEXT), context); + predicate = predicate != null ? and(predicate, ctxFilter) : ctxFilter; + } + + return predicate != null ? FilterCompat.get(predicate) : FilterCompat.NOOP; + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetQuadSource.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetQuadSource.java new file mode 100644 index 00000000000..3824e9f1001 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetQuadSource.java @@ -0,0 +1,185 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +import org.apache.parquet.ParquetReadOptions; +import org.apache.parquet.column.page.PageReadStore; +import org.apache.parquet.conf.PlainParquetConfiguration; +import org.apache.parquet.example.data.Group; +import org.apache.parquet.example.data.simple.convert.GroupRecordConverter; +import org.apache.parquet.hadoop.ParquetFileReader; +import org.apache.parquet.io.ColumnIOFactory; +import org.apache.parquet.io.MessageColumnIO; +import org.apache.parquet.io.RecordReader; +import org.apache.parquet.schema.MessageType; + +/** + * A {@link RawEntrySource} that reads entries from an in-memory Parquet file. Entries are sorted according to the + * file's sort order (soc, osc, cso) and emitted as varint-encoded byte[] keys with 1-byte flag values. + * + *

+ * The key format encodes (value1, value2, value3) as varints in the sort order of the file. For example, an "soc" file + * produces keys as varint(subject)||varint(object)||varint(context). + *

+ */ +public class ParquetQuadSource implements RawEntrySource { + + private final List entries; + private int pos; + + /** + * Creates a source from Parquet file bytes. + * + * @param parquetData the complete Parquet file as byte[] + * @param sortOrder the sort order of the file ("soc", "osc", or "cso") + */ + public ParquetQuadSource(byte[] parquetData, String sortOrder) { + this.entries = readAllEntries(parquetData, sortOrder); + this.pos = 0; + } + + /** + * Creates a source from Parquet file bytes with filtering. + * + * @param parquetData the complete Parquet file as byte[] + * @param sortOrder the sort order of the file + * @param subject subject filter, or -1 for wildcard + * @param object object filter, or -1 for wildcard + * @param context context filter, or -1 for wildcard + */ + public ParquetQuadSource(byte[] parquetData, String sortOrder, long subject, long object, long context) { + List all = readAllEntries(parquetData, sortOrder); + if (subject >= 0 || object >= 0 || context >= 0) { + List filtered = new ArrayList<>(); + for (Entry e : all) { + if ((subject >= 0 && e.subject != subject) + || (object >= 0 && e.object != object) + || (context >= 0 && e.context != context)) { + continue; + } + filtered.add(e); + } + this.entries = filtered; + } else { + this.entries = all; + } + this.pos = 0; + } + + @Override + public boolean hasNext() { + return pos < entries.size(); + } + + @Override + public byte[] peekKey() { + return entries.get(pos).key; + } + + @Override + public byte peekFlag() { + return entries.get(pos).flag; + } + + @Override + public void advance() { + pos++; + } + + private static List readAllEntries(byte[] parquetData, String sortOrder) { + List result = new ArrayList<>(); + ByteArrayInputFile inputFile = new ByteArrayInputFile(parquetData); + + try (ParquetFileReader reader = ParquetFileReader.open(inputFile, + new ParquetReadOptions.Builder(new PlainParquetConfiguration()) + .withCodecFactory(SimpleCodecFactory.INSTANCE) + .build())) { + MessageType schema = reader.getFooter().getFileMetaData().getSchema(); + MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema); + + PageReadStore pages; + while ((pages = reader.readNextRowGroup()) != null) { + long rows = pages.getRowCount(); + RecordReader recordReader = columnIO.getRecordReader(pages, + new GroupRecordConverter(schema)); + + for (long i = 0; i < rows; i++) { + Group group = recordReader.read(); + long subject = group.getLong(ParquetSchemas.COL_SUBJECT, 0); + long object = group.getLong(ParquetSchemas.COL_OBJECT, 0); + long context = group.getLong(ParquetSchemas.COL_CONTEXT, 0); + int flag = group.getInteger(ParquetSchemas.COL_FLAG, 0); + + byte[] key = encodeKey(sortOrder, subject, object, context); + result.add(new Entry(key, (byte) flag, subject, object, context)); + } + } + } catch (IOException e) { + throw new UncheckedIOException("Failed to read Parquet file", e); + } + + return result; + } + + /** + * Encodes a key in the given sort order as varints. + */ + static byte[] encodeKey(String sortOrder, long subject, long object, long context) { + long v1, v2, v3; + switch (sortOrder) { + case "osc": + v1 = object; + v2 = subject; + v3 = context; + break; + case "cso": + v1 = context; + v2 = subject; + v3 = object; + break; + case "soc": + default: + v1 = subject; + v2 = object; + v3 = context; + break; + } + + int len = Varint.calcLengthUnsigned(v1) + Varint.calcLengthUnsigned(v2) + Varint.calcLengthUnsigned(v3); + ByteBuffer bb = ByteBuffer.allocate(len); + Varint.writeUnsigned(bb, v1); + Varint.writeUnsigned(bb, v2); + Varint.writeUnsigned(bb, v3); + return bb.array(); + } + + private static class Entry { + final byte[] key; + final byte flag; + final long subject; + final long object; + final long context; + + Entry(byte[] key, byte flag, long subject, long object, long context) { + this.key = key; + this.flag = flag; + this.subject = subject; + this.object = object; + this.context = context; + } + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetSchemas.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetSchemas.java new file mode 100644 index 00000000000..d8c6e1d8b9c --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetSchemas.java @@ -0,0 +1,126 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; +import org.apache.parquet.schema.Types; + +/** + * Parquet schema definitions for quad storage. + * + *

+ * Two schemas are provided: + *

    + *
  • {@link #PARTITIONED_SCHEMA} - for files within {@code predicates/{id}/} directories, where the predicate is + * implicit in the partition path.
  • + *
  • {@link #UNPARTITIONED_SCHEMA} - for files in {@code _unpartitioned/}, which include an explicit predicate + * column.
  • + *
+ */ +public final class ParquetSchemas { + + /** Column name for subject ID. */ + public static final String COL_SUBJECT = "subject"; + + /** Column name for predicate ID. */ + public static final String COL_PREDICATE = "predicate"; + + /** Column name for object ID. */ + public static final String COL_OBJECT = "object"; + + /** Column name for context (named graph) ID. */ + public static final String COL_CONTEXT = "context"; + + /** Column name for the entry flag (e.g. insert vs tombstone). */ + public static final String COL_FLAG = "flag"; + + /** + * Schema for partitioned Parquet files stored under {@code predicates/{id}/}. The predicate is implicit in the + * directory path and not stored as a column. + */ + public static final MessageType PARTITIONED_SCHEMA = Types.buildMessage() + .required(PrimitiveTypeName.INT64) + .named(COL_SUBJECT) + .required(PrimitiveTypeName.INT64) + .named(COL_OBJECT) + .required(PrimitiveTypeName.INT64) + .named(COL_CONTEXT) + .required(PrimitiveTypeName.INT32) + .named(COL_FLAG) + .named("quad_partitioned"); + + /** + * Schema for unpartitioned Parquet files stored under {@code _unpartitioned/}. Includes an explicit predicate + * column. + */ + public static final MessageType UNPARTITIONED_SCHEMA = Types.buildMessage() + .required(PrimitiveTypeName.INT64) + .named(COL_SUBJECT) + .required(PrimitiveTypeName.INT64) + .named(COL_PREDICATE) + .required(PrimitiveTypeName.INT64) + .named(COL_OBJECT) + .required(PrimitiveTypeName.INT64) + .named(COL_CONTEXT) + .required(PrimitiveTypeName.INT32) + .named(COL_FLAG) + .named("quad_unpartitioned"); + + /** + * Sort orders for quad entries within a Parquet file. + */ + public enum SortOrder { + /** Subject-Object-Context ordering (partitioned). */ + SOC("soc"), + /** Object-Subject-Context ordering (partitioned). */ + OSC("osc"), + /** Context-Subject-Object ordering (partitioned). */ + CSO("cso"), + /** Subject-Predicate-Object-Context ordering (unpartitioned). */ + SPOC("spoc"); + + private final String suffix; + + SortOrder(String suffix) { + this.suffix = suffix; + } + + /** + * Returns the file-name suffix for this sort order. + * + * @return the suffix string + */ + public String suffix() { + return suffix; + } + + /** + * Returns the SortOrder for the given suffix string. + * + * @param suffix the suffix (e.g. "soc", "osc", "cso", "spoc") + * @return the matching SortOrder + * @throws IllegalArgumentException if no match found + */ + public static SortOrder fromSuffix(String suffix) { + for (SortOrder so : values()) { + if (so.suffix.equals(suffix)) { + return so; + } + } + throw new IllegalArgumentException("Unknown sort order suffix: " + suffix); + } + } + + private ParquetSchemas() { + // utility class + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/PartitionIndexSelector.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/PartitionIndexSelector.java new file mode 100644 index 00000000000..58746fd0c6f --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/PartitionIndexSelector.java @@ -0,0 +1,122 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +/** + * Selects the best sort order for queries within a predicate partition. + * + *

+ * Within a predicate partition the predicate component is implicit, so query optimization selects among + * three-dimensional sort orders over the remaining components: subject, object, and context. + * + *

    + *
  • soc - sorted by (subject, object, context)
  • + *
  • osc - sorted by (object, subject, context)
  • + *
  • cso - sorted by (context, subject, object)
  • + *
+ * + *

+ * The selection strategy counts leading bound components for each sort order and picks the order with the highest + * score. On ties, {@code soc} is preferred as the default. + */ +public class PartitionIndexSelector { + + private PartitionIndexSelector() { + // utility class + } + + /** + * Selects the best sort order for a within-partition query. + * + *

+ * Within a partition, predicate is fixed, so we pick from: + *

    + *
  • soc: sort by (subject, object, context)
  • + *
  • osc: sort by (object, subject, context)
  • + *
  • cso: sort by (context, subject, object)
  • + *
+ * + * Each sort order is scored by counting its leading bound components. The order with the highest score wins. Ties + * are broken in favor of {@code soc}. + * + * @param subjectBound true if the subject is bound in the query + * @param objectBound true if the object is bound in the query + * @param contextBound true if the context is bound in the query + * @return the sort order suffix string: "soc", "osc", or "cso" + */ + public static String selectSortOrder(boolean subjectBound, boolean objectBound, boolean contextBound) { + // Score each sort order by counting leading bound components + + // soc: subject -> object -> context + int socScore = 0; + if (subjectBound) { + socScore++; + if (objectBound) { + socScore++; + if (contextBound) { + socScore++; + } + } + } + + // osc: object -> subject -> context + int oscScore = 0; + if (objectBound) { + oscScore++; + if (subjectBound) { + oscScore++; + if (contextBound) { + oscScore++; + } + } + } + + // cso: context -> subject -> object + int csoScore = 0; + if (contextBound) { + csoScore++; + if (subjectBound) { + csoScore++; + if (objectBound) { + csoScore++; + } + } + } + + // Pick highest score; ties prefer soc (default) + if (oscScore > socScore && oscScore > csoScore) { + return "osc"; + } + if (csoScore > socScore && csoScore > oscScore) { + return "cso"; + } + return "soc"; + } + + /** + * Returns the column order for a given sort order suffix. Used when sorting entries before writing to Parquet. + * + * @param sortOrder the sort order suffix: "soc", "osc", or "cso" + * @return array of column names in sort priority order + */ + public static String[] getColumnOrder(String sortOrder) { + switch (sortOrder) { + case "soc": + return new String[] { "subject", "object", "context" }; + case "osc": + return new String[] { "object", "subject", "context" }; + case "cso": + return new String[] { "context", "subject", "object" }; + default: + return new String[] { "subject", "object", "context" }; + } + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/PartitionMergeIterator.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/PartitionMergeIterator.java new file mode 100644 index 00000000000..3109503cb72 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/PartitionMergeIterator.java @@ -0,0 +1,183 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.PriorityQueue; + +/** + * K-way merge iterator for within-partition queries. Works with 3-varint keys (subject, object, context encoded in + * partition sort order) where the predicate is implicit in the partition directory. + * + *

+ * Sources are ordered newest-to-oldest. Deduplicates entries with the same key (newest wins), suppresses tombstones, + * and filters by expected flag. + *

+ */ +public class PartitionMergeIterator implements Iterator { + + private final long predicateId; + private final String sortOrder; + private final byte expectedFlag; + private final long patternS, patternO, patternC; + private final PriorityQueue heap; + private long[] next; + + /** + * @param sources list of sources ordered newest-to-oldest (index 0 = newest) + * @param predicateId the predicate ID for this partition (injected into results) + * @param sortOrder the sort order of all sources ("soc", "osc", or "cso") + * @param expectedFlag the flag to match (FLAG_EXPLICIT or FLAG_INFERRED) + * @param s subject pattern, or -1 for wildcard + * @param o object pattern, or -1 for wildcard + * @param c context pattern, or -1 for wildcard + */ + public PartitionMergeIterator(List sources, long predicateId, String sortOrder, + byte expectedFlag, long s, long o, long c) { + this.predicateId = predicateId; + this.sortOrder = sortOrder; + this.expectedFlag = expectedFlag; + this.patternS = s; + this.patternO = o; + this.patternC = c; + this.heap = new PriorityQueue<>(); + + for (int i = 0; i < sources.size(); i++) { + RawEntrySource src = sources.get(i); + if (src.hasNext()) { + heap.add(new SourceCursor(src, i)); + } + } + + advance(); + } + + private void advance() { + next = null; + while (!heap.isEmpty()) { + // Pop minimum key + SourceCursor min = heap.poll(); + byte[] winningKey = min.source.peekKey().clone(); + byte winningFlag = min.source.peekFlag(); + + // Advance the winning source + min.source.advance(); + if (min.source.hasNext()) { + heap.add(min); + } + + // Drain all sources with the same key (deduplication) + while (!heap.isEmpty() && Arrays.compareUnsigned(heap.peek().source.peekKey(), winningKey) == 0) { + SourceCursor dup = heap.poll(); + dup.source.advance(); + if (dup.source.hasNext()) { + heap.add(dup); + } + } + + // Tombstone suppression + if (winningFlag == MemTable.FLAG_TOMBSTONE) { + continue; + } + + // Flag filter + if (winningFlag != expectedFlag) { + continue; + } + + // Decode 3-varint key to (subject, object, context) based on sort order + long[] quad = decodePartitionKey(winningKey, sortOrder, predicateId); + + // Pattern filter + if ((patternS >= 0 && quad[QuadIndex.SUBJ_IDX] != patternS) + || (patternO >= 0 && quad[QuadIndex.OBJ_IDX] != patternO) + || (patternC >= 0 && quad[QuadIndex.CONTEXT_IDX] != patternC)) { + continue; + } + + next = quad; + return; + } + } + + /** + * Decodes a 3-varint partition key into a full SPOC quad array. + */ + static long[] decodePartitionKey(byte[] key, String sortOrder, long predicateId) { + ByteBuffer bb = ByteBuffer.wrap(key); + long v1 = Varint.readUnsigned(bb); + long v2 = Varint.readUnsigned(bb); + long v3 = Varint.readUnsigned(bb); + + long[] quad = new long[4]; + quad[QuadIndex.PRED_IDX] = predicateId; + + switch (sortOrder) { + case "osc": + quad[QuadIndex.OBJ_IDX] = v1; + quad[QuadIndex.SUBJ_IDX] = v2; + quad[QuadIndex.CONTEXT_IDX] = v3; + break; + case "cso": + quad[QuadIndex.CONTEXT_IDX] = v1; + quad[QuadIndex.SUBJ_IDX] = v2; + quad[QuadIndex.OBJ_IDX] = v3; + break; + case "soc": + default: + quad[QuadIndex.SUBJ_IDX] = v1; + quad[QuadIndex.OBJ_IDX] = v2; + quad[QuadIndex.CONTEXT_IDX] = v3; + break; + } + + return quad; + } + + @Override + public boolean hasNext() { + return next != null; + } + + @Override + public long[] next() { + if (next == null) { + throw new NoSuchElementException(); + } + long[] result = next; + advance(); + return result; + } + + private static class SourceCursor implements Comparable { + final RawEntrySource source; + final int sourceIndex; // lower = newer + + SourceCursor(RawEntrySource source, int sourceIndex) { + this.source = source; + this.sourceIndex = sourceIndex; + } + + @Override + public int compareTo(SourceCursor other) { + int keyCmp = Arrays.compareUnsigned(this.source.peekKey(), other.source.peekKey()); + if (keyCmp != 0) { + return keyCmp; + } + // Ties broken by source index: lower = newer = wins (poll first) + return Integer.compare(this.sourceIndex, other.sourceIndex); + } + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/RawEntrySource.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/RawEntrySource.java index 4a77d548753..9095094a319 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/RawEntrySource.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/RawEntrySource.java @@ -11,7 +11,7 @@ package org.eclipse.rdf4j.sail.s3.storage; /** - * A source of raw key/flag entries for the {@link MergeIterator}. Both {@link MemTable} and {@link SSTable} expose this + * A source of raw key/flag entries for merge iterators. {@link MemTable} and {@link ParquetQuadSource} expose this * interface over a key range. */ public interface RawEntrySource { diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SSTable.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SSTable.java deleted file mode 100644 index 984617245c2..00000000000 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SSTable.java +++ /dev/null @@ -1,332 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2025 Eclipse RDF4J contributors. - * - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Distribution License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/org/documents/edl-v10.php. - * - * SPDX-License-Identifier: BSD-3-Clause - *******************************************************************************/ -package org.eclipse.rdf4j.sail.s3.storage; - -import java.nio.ByteBuffer; -import java.util.Arrays; -import java.util.Iterator; -import java.util.NoSuchElementException; - -/** - * Reads and queries an immutable SSTable from its binary representation. The entire SSTable byte[] is held in memory - * (Phase 1c). The block index enables binary search to find the starting block for range scans. - */ -public class SSTable { - - private final byte[] raw; - private final QuadIndex quadIndex; - - // Parsed from footer - private final long blockIndexOffset; - private final int blockIndexLength; - private final long statsOffset; - private final int statsLength; - - // Parsed from block index - private final int blockCount; - private final byte[][] blockFirstKeys; - private final long[] blockOffsets; - private final int[] blockLengths; - - // Parsed from stats - private final byte[] minKey; - private final byte[] maxKey; - private final long entryCount; - - public SSTable(byte[] raw, QuadIndex quadIndex) { - this.raw = raw; - this.quadIndex = quadIndex; - - // Parse footer (last 32 bytes) - ByteBuffer footer = ByteBuffer.wrap(raw, raw.length - SSTableWriter.FOOTER_SIZE, SSTableWriter.FOOTER_SIZE); - int magic = footer.getInt(); - if (magic != SSTableWriter.MAGIC) { - throw new IllegalArgumentException("Invalid SSTable magic: 0x" + Integer.toHexString(magic)); - } - int version = footer.getInt(); - if (version != SSTableWriter.VERSION) { - throw new IllegalArgumentException("Unsupported SSTable version: " + version); - } - this.blockIndexOffset = footer.getLong(); - this.blockIndexLength = footer.getInt(); - this.statsOffset = footer.getLong(); - this.statsLength = footer.getInt(); - - // Parse block index - ByteBuffer biBuffer = ByteBuffer.wrap(raw, (int) blockIndexOffset, blockIndexLength); - this.blockCount = biBuffer.getInt(); - this.blockFirstKeys = new byte[blockCount][]; - this.blockOffsets = new long[blockCount]; - this.blockLengths = new int[blockCount]; - for (int i = 0; i < blockCount; i++) { - int keyLen = (int) Varint.readUnsigned(biBuffer); - blockFirstKeys[i] = new byte[keyLen]; - biBuffer.get(blockFirstKeys[i]); - blockOffsets[i] = biBuffer.getLong(); - blockLengths[i] = biBuffer.getInt(); - } - - // Parse stats - ByteBuffer statsBuffer = ByteBuffer.wrap(raw, (int) statsOffset, statsLength); - int minKeyLen = (int) Varint.readUnsigned(statsBuffer); - this.minKey = new byte[minKeyLen]; - statsBuffer.get(this.minKey); - int maxKeyLen = (int) Varint.readUnsigned(statsBuffer); - this.maxKey = new byte[maxKeyLen]; - statsBuffer.get(this.maxKey); - this.entryCount = statsBuffer.getLong(); - } - - public byte[] getMinKey() { - return minKey; - } - - public byte[] getMaxKey() { - return maxKey; - } - - public long getEntryCount() { - return entryCount; - } - - /** - * Scans for matching quads, filtering by flag (explicit/inferred) and pattern. Same contract as - * {@link MemTable#scan(long, long, long, long, boolean)}. - */ - public Iterator scan(long s, long p, long o, long c, boolean explicit) { - byte expectedFlag = explicit ? MemTable.FLAG_EXPLICIT : MemTable.FLAG_INFERRED; - byte[] scanMinKey = quadIndex.getMinKeyBytes(s, p, o, c); - byte[] scanMaxKey = quadIndex.getMaxKeyBytes(s, p, o, c); - int startBlock = findStartBlock(scanMinKey); - - return new ScanIterator(startBlock, scanMinKey, scanMaxKey, expectedFlag, s, p, o, c); - } - - /** - * Returns a {@link RawEntrySource} over the given key range. Includes tombstones (no flag filtering). Used by - * {@link MergeIterator}. - */ - public RawEntrySource asRawSource(long s, long p, long o, long c) { - byte[] scanMinKey = quadIndex.getMinKeyBytes(s, p, o, c); - byte[] scanMaxKey = quadIndex.getMaxKeyBytes(s, p, o, c); - int startBlock = findStartBlock(scanMinKey); - - return new RawSourceImpl(startBlock, scanMinKey, scanMaxKey); - } - - /** - * Binary search to find the block that could contain the given key. - */ - private int findStartBlock(byte[] targetKey) { - int lo = 0, hi = blockCount - 1; - int result = 0; - while (lo <= hi) { - int mid = (lo + hi) >>> 1; - int cmp = Arrays.compareUnsigned(blockFirstKeys[mid], targetKey); - if (cmp <= 0) { - result = mid; - lo = mid + 1; - } else { - hi = mid - 1; - } - } - return result; - } - - /** - * Reads the next entry from the data region at the given position. - * - * @return the position after the entry, or -1 if we've exceeded the data region - */ - private int readEntry(int pos, byte[][] keyOut, byte[] flagOut) { - if (pos >= blockIndexOffset) { - return -1; - } - ByteBuffer bb = ByteBuffer.wrap(raw, pos, (int) blockIndexOffset - pos); - int keyLen = (int) Varint.readUnsigned(bb); - int newPos = pos + (bb.position() - 0) + keyLen + 1; - // Recalculate from actual buffer position - int absKeyStart = pos + (bb.position() - (int) 0); - - // Re-read properly - bb = ByteBuffer.wrap(raw, pos, (int) blockIndexOffset - pos); - keyLen = (int) Varint.readUnsigned(bb); - byte[] key = new byte[keyLen]; - bb.get(key); - byte flag = bb.get(); - - keyOut[0] = key; - flagOut[0] = flag; - return pos + Varint.calcLengthUnsigned(keyLen) + keyLen + 1; - } - - private class ScanIterator implements Iterator { - private int pos; - private final byte[] scanMaxKey; - private final byte expectedFlag; - private final long patternS, patternP, patternO, patternC; - private long[] next; - private final byte[][] keyBuf = new byte[1][]; - private final byte[] flagBuf = new byte[1]; - - ScanIterator(int startBlock, byte[] scanMinKey, byte[] scanMaxKey, byte expectedFlag, - long s, long p, long o, long c) { - this.pos = (int) blockOffsets[startBlock]; - this.scanMaxKey = scanMaxKey; - this.expectedFlag = expectedFlag; - this.patternS = s; - this.patternP = p; - this.patternO = o; - this.patternC = c; - - // Skip entries before scanMinKey - skipToMinKey(scanMinKey); - advance(); - } - - private void skipToMinKey(byte[] scanMinKey) { - while (pos < blockIndexOffset) { - int savedPos = pos; - int nextPos = readEntry(pos, keyBuf, flagBuf); - if (nextPos < 0) { - break; - } - if (Arrays.compareUnsigned(keyBuf[0], scanMinKey) >= 0) { - pos = savedPos; // revert - this entry is in range - return; - } - pos = nextPos; - } - } - - private void advance() { - next = null; - while (pos < blockIndexOffset) { - int nextPos = readEntry(pos, keyBuf, flagBuf); - if (nextPos < 0) { - break; - } - pos = nextPos; - - byte[] key = keyBuf[0]; - byte flag = flagBuf[0]; - - // Past max key? - if (Arrays.compareUnsigned(key, scanMaxKey) > 0) { - pos = (int) blockIndexOffset; // done - return; - } - - if (flag != expectedFlag) { - continue; - } - - long[] quad = new long[4]; - quadIndex.keyToQuad(key, quad); - - if ((patternS >= 0 && quad[QuadIndex.SUBJ_IDX] != patternS) - || (patternP >= 0 && quad[QuadIndex.PRED_IDX] != patternP) - || (patternO >= 0 && quad[QuadIndex.OBJ_IDX] != patternO) - || (patternC >= 0 && quad[QuadIndex.CONTEXT_IDX] != patternC)) { - continue; - } - - next = quad; - return; - } - } - - @Override - public boolean hasNext() { - return next != null; - } - - @Override - public long[] next() { - if (next == null) { - throw new NoSuchElementException(); - } - long[] result = next; - advance(); - return result; - } - } - - private class RawSourceImpl implements RawEntrySource { - private int pos; - private final byte[] scanMaxKey; - private byte[] currentKey; - private byte currentFlag; - private boolean valid; - private final byte[][] keyBuf = new byte[1][]; - private final byte[] flagBuf = new byte[1]; - - RawSourceImpl(int startBlock, byte[] scanMinKey, byte[] scanMaxKey) { - this.pos = (int) blockOffsets[startBlock]; - this.scanMaxKey = scanMaxKey; - skipToMinKey(scanMinKey); - } - - private void skipToMinKey(byte[] scanMinKey) { - while (pos < blockIndexOffset) { - int savedPos = pos; - int nextPos = readEntry(pos, keyBuf, flagBuf); - if (nextPos < 0) { - valid = false; - return; - } - if (Arrays.compareUnsigned(keyBuf[0], scanMinKey) >= 0) { - currentKey = keyBuf[0]; - currentFlag = flagBuf[0]; - pos = nextPos; - valid = Arrays.compareUnsigned(currentKey, scanMaxKey) <= 0; - return; - } - pos = nextPos; - } - valid = false; - } - - @Override - public boolean hasNext() { - return valid; - } - - @Override - public byte[] peekKey() { - return currentKey; - } - - @Override - public byte peekFlag() { - return currentFlag; - } - - @Override - public void advance() { - if (pos >= blockIndexOffset) { - valid = false; - return; - } - int nextPos = readEntry(pos, keyBuf, flagBuf); - if (nextPos < 0) { - valid = false; - return; - } - pos = nextPos; - currentKey = keyBuf[0]; - currentFlag = flagBuf[0]; - if (Arrays.compareUnsigned(currentKey, scanMaxKey) > 0) { - valid = false; - } - } - } -} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SSTableWriter.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SSTableWriter.java deleted file mode 100644 index 6bcc272b7cf..00000000000 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SSTableWriter.java +++ /dev/null @@ -1,176 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2025 Eclipse RDF4J contributors. - * - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Distribution License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/org/documents/edl-v10.php. - * - * SPDX-License-Identifier: BSD-3-Clause - *******************************************************************************/ -package org.eclipse.rdf4j.sail.s3.storage; - -import java.io.ByteArrayOutputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.io.UncheckedIOException; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - * Serializes a frozen {@link MemTable} into SSTable binary format. - * - *

Format

- * - *
- * [DATA BLOCKS]
- *   Per entry: [key_length varint][key bytes][flag 1 byte]
- *   Block boundary when cumulative size exceeds blockSize
- *
- * [BLOCK INDEX]
- *   [block_count: 4-byte int BE]
- *   Per block: [first_key_length varint][first_key bytes][offset: 8-byte long BE][length: 4-byte int BE]
- *
- * [STATS]
- *   [min_key_length varint][min_key bytes]
- *   [max_key_length varint][max_key bytes]
- *   [entry_count: 8-byte long BE]
- *
- * [FOOTER: 32 bytes]
- *   [magic: 4 bytes = 0x53535431 "SST1"]
- *   [version: 4 bytes = 1]
- *   [block_index_offset: 8-byte long BE]
- *   [block_index_length: 4-byte int BE]
- *   [stats_offset: 8-byte long BE]
- *   [stats_length: 4-byte int BE]
- * 
- */ -public class SSTableWriter { - - static final int MAGIC = 0x53535431; // "SST1" - static final int VERSION = 1; - static final int FOOTER_SIZE = 32; - static final int DEFAULT_BLOCK_SIZE = 4 * 1024 * 1024; // 4 MiB - - public static byte[] write(MemTable memTable) { - return write(memTable, DEFAULT_BLOCK_SIZE); - } - - public static byte[] write(MemTable memTable, int blockSize) { - try { - Map data = memTable.getData(); - if (data.isEmpty()) { - throw new IllegalArgumentException("Cannot write empty MemTable to SSTable"); - } - - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - DataOutputStream out = new DataOutputStream(baos); - - // Track block boundaries - List blocks = new ArrayList<>(); - byte[] firstKeyInBlock = null; - long blockStartOffset = 0; - long currentBlockSize = 0; - - byte[] minKey = null; - byte[] maxKey = null; - long entryCount = 0; - - for (Map.Entry entry : data.entrySet()) { - byte[] key = entry.getKey(); - byte flag = entry.getValue()[0]; - - if (minKey == null) { - minKey = key; - } - maxKey = key; - entryCount++; - - // Start a new block if needed - if (firstKeyInBlock == null) { - firstKeyInBlock = key; - blockStartOffset = baos.size(); - currentBlockSize = 0; - } - - // Write entry: [key_length varint][key bytes][flag 1 byte] - writeVarint(out, key.length); - out.write(key); - out.write(flag); - currentBlockSize += varintLength(key.length) + key.length + 1; - - // Check block boundary - if (currentBlockSize >= blockSize) { - long blockEnd = baos.size(); - blocks.add(new BlockInfo(firstKeyInBlock, blockStartOffset, (int) (blockEnd - blockStartOffset))); - firstKeyInBlock = null; - currentBlockSize = 0; - } - } - - // Finalize last block - if (firstKeyInBlock != null) { - long blockEnd = baos.size(); - blocks.add(new BlockInfo(firstKeyInBlock, blockStartOffset, (int) (blockEnd - blockStartOffset))); - } - - // Write block index - long blockIndexOffset = baos.size(); - out.writeInt(blocks.size()); - for (BlockInfo block : blocks) { - writeVarint(out, block.firstKey.length); - out.write(block.firstKey); - out.writeLong(block.offset); - out.writeInt(block.length); - } - int blockIndexLength = (int) (baos.size() - blockIndexOffset); - - // Write stats - long statsOffset = baos.size(); - writeVarint(out, minKey.length); - out.write(minKey); - writeVarint(out, maxKey.length); - out.write(maxKey); - out.writeLong(entryCount); - int statsLength = (int) (baos.size() - statsOffset); - - // Write footer (32 bytes) - out.writeInt(MAGIC); - out.writeInt(VERSION); - out.writeLong(blockIndexOffset); - out.writeInt(blockIndexLength); - out.writeLong(statsOffset); - out.writeInt(statsLength); - - out.flush(); - return baos.toByteArray(); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - private static void writeVarint(DataOutputStream out, int value) throws IOException { - // Simple varint for lengths (always non-negative int) - ByteBuffer bb = ByteBuffer.allocate(5); - Varint.writeUnsigned(bb, value); - out.write(bb.array(), 0, bb.position()); - } - - private static int varintLength(int value) { - return Varint.calcLengthUnsigned(value); - } - - private static class BlockInfo { - final byte[] firstKey; - final long offset; - final int length; - - BlockInfo(byte[] firstKey, long offset, int length) { - this.firstKey = firstKey; - this.offset = offset; - this.length = length; - } - } -} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SimpleCodecFactory.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SimpleCodecFactory.java new file mode 100644 index 00000000000..8f098c419ff --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/SimpleCodecFactory.java @@ -0,0 +1,138 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.parquet.bytes.BytesInput; +import org.apache.parquet.compression.CompressionCodecFactory; +import org.apache.parquet.hadoop.metadata.CompressionCodecName; + +import com.github.luben.zstd.Zstd; + +/** + * A lightweight {@link CompressionCodecFactory} that handles ZSTD and UNCOMPRESSED codecs without any Hadoop + * dependencies. Uses {@code zstd-jni} directly for ZSTD compression/decompression. + */ +final class SimpleCodecFactory implements CompressionCodecFactory { + + static final SimpleCodecFactory INSTANCE = new SimpleCodecFactory(); + + private SimpleCodecFactory() { + } + + @Override + public BytesInputCompressor getCompressor(CompressionCodecName codec) { + switch (codec) { + case ZSTD: + return ZSTD_COMPRESSOR; + case UNCOMPRESSED: + return NOOP_COMPRESSOR; + default: + throw new UnsupportedOperationException("Unsupported compression codec: " + codec); + } + } + + @Override + public BytesInputDecompressor getDecompressor(CompressionCodecName codec) { + switch (codec) { + case ZSTD: + return ZSTD_DECOMPRESSOR; + case UNCOMPRESSED: + return NOOP_DECOMPRESSOR; + default: + throw new UnsupportedOperationException("Unsupported compression codec: " + codec); + } + } + + @Override + public void release() { + // no resources to release + } + + private static final BytesInputCompressor ZSTD_COMPRESSOR = new BytesInputCompressor() { + @Override + public BytesInput compress(BytesInput bytes) throws IOException { + byte[] input = bytes.toByteArray(); + byte[] compressed = Zstd.compress(input); + return BytesInput.from(compressed); + } + + @Override + public CompressionCodecName getCodecName() { + return CompressionCodecName.ZSTD; + } + + @Override + public void release() { + } + }; + + private static final BytesInputDecompressor ZSTD_DECOMPRESSOR = new BytesInputDecompressor() { + @Override + public BytesInput decompress(BytesInput bytes, int uncompressedSize) throws IOException { + byte[] input = bytes.toByteArray(); + byte[] decompressed = new byte[uncompressedSize]; + Zstd.decompress(decompressed, input); + return BytesInput.from(decompressed); + } + + @Override + public void decompress(ByteBuffer input, int compressedSize, ByteBuffer output, int uncompressedSize) + throws IOException { + byte[] compressedBytes = new byte[compressedSize]; + input.get(compressedBytes); + byte[] decompressed = new byte[uncompressedSize]; + Zstd.decompress(decompressed, compressedBytes); + output.put(decompressed); + } + + @Override + public void release() { + } + }; + + private static final BytesInputCompressor NOOP_COMPRESSOR = new BytesInputCompressor() { + @Override + public BytesInput compress(BytesInput bytes) throws IOException { + return bytes; + } + + @Override + public CompressionCodecName getCodecName() { + return CompressionCodecName.UNCOMPRESSED; + } + + @Override + public void release() { + } + }; + + private static final BytesInputDecompressor NOOP_DECOMPRESSOR = new BytesInputDecompressor() { + @Override + public BytesInput decompress(BytesInput bytes, int uncompressedSize) throws IOException { + return bytes; + } + + @Override + public void decompress(ByteBuffer input, int compressedSize, ByteBuffer output, int uncompressedSize) + throws IOException { + byte[] data = new byte[compressedSize]; + input.get(data); + output.put(data); + } + + @Override + public void release() { + } + }; +} diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/ManifestTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/ManifestTest.java deleted file mode 100644 index 8a0e48a334a..00000000000 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/ManifestTest.java +++ /dev/null @@ -1,87 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2025 Eclipse RDF4J contributors. - * - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Distribution License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/org/documents/edl-v10.php. - * - * SPDX-License-Identifier: BSD-3-Clause - *******************************************************************************/ -package org.eclipse.rdf4j.sail.s3.storage; - -import static org.junit.jupiter.api.Assertions.*; - -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.List; - -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -import com.fasterxml.jackson.databind.ObjectMapper; - -class ManifestTest { - - @TempDir - Path tempDir; - - @Test - void roundTrip() throws Exception { - ObjectMapper mapper = new ObjectMapper(); - FileSystemObjectStore store = new FileSystemObjectStore(tempDir); - - Manifest manifest = new Manifest(); - manifest.setNextValueId(42); - List infos = new ArrayList<>(); - infos.add(new Manifest.SSTableInfo("sstables/L0-1-spoc.sst", 0, "spoc", "0102", "0304", 10, 1)); - infos.add(new Manifest.SSTableInfo("sstables/L0-1-posc.sst", 0, "posc", "0506", "0708", 10, 1)); - manifest.setSstables(infos); - - manifest.save(store, mapper, 1); - - Manifest loaded = Manifest.load(store, mapper); - assertEquals(1, loaded.getVersion()); - assertEquals(42, loaded.getNextValueId()); - assertEquals(2, loaded.getSstables().size()); - assertEquals("sstables/L0-1-spoc.sst", loaded.getSstables().get(0).getS3Key()); - assertEquals("spoc", loaded.getSstables().get(0).getIndexName()); - assertEquals(10, loaded.getSstables().get(0).getEntryCount()); - assertEquals(1, loaded.getSstables().get(0).getEpoch()); - } - - @Test - void loadReturnsEmptyManifestWhenNoneExists() { - FileSystemObjectStore store = new FileSystemObjectStore(tempDir); - ObjectMapper mapper = new ObjectMapper(); - - Manifest loaded = Manifest.load(store, mapper); - assertNotNull(loaded); - assertEquals(0, loaded.getSstables().size()); - assertEquals(0, loaded.getNextValueId()); - } - - @Test - void multipleVersions() throws Exception { - ObjectMapper mapper = new ObjectMapper(); - FileSystemObjectStore store = new FileSystemObjectStore(tempDir); - - // Save version 1 - Manifest m1 = new Manifest(); - m1.setNextValueId(10); - m1.save(store, mapper, 1); - - // Save version 2 - Manifest m2 = new Manifest(); - m2.setNextValueId(20); - List infos = new ArrayList<>(); - infos.add(new Manifest.SSTableInfo("sstables/L0-2-spoc.sst", 0, "spoc", "01", "02", 5, 2)); - m2.setSstables(infos); - m2.save(store, mapper, 2); - - // Load should return the latest (version 2) - Manifest loaded = Manifest.load(store, mapper); - assertEquals(20, loaded.getNextValueId()); - assertEquals(1, loaded.getSstables().size()); - } -} diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MergeIteratorTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MergeIteratorTest.java index 77ff3896ecb..b5fa1076a1d 100644 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MergeIteratorTest.java +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MergeIteratorTest.java @@ -126,20 +126,18 @@ void patternFilter() { } @Test - void mergeMemTableWithSSTable() { - // MemTable (newer) + SSTable (older) + void mergeMemTableWithOlderSource() { + // MemTable (newer) + older MemTable source MemTable memTable = new MemTable(spoc); memTable.put(1, 2, 3, 0, true); MemTable olderData = new MemTable(spoc); olderData.put(2, 3, 4, 0, true); olderData.put(4, 5, 6, 0, true); - byte[] sstData = SSTableWriter.write(olderData); - SSTable sst = new SSTable(sstData, spoc); List sources = Arrays.asList( memTable.asRawSource(-1, -1, -1, -1), - sst.asRawSource(-1, -1, -1, -1)); + olderData.asRawSource(-1, -1, -1, -1)); MergeIterator iter = new MergeIterator(sources, spoc, MemTable.FLAG_EXPLICIT, -1, -1, -1, -1); List results = toList(iter); @@ -147,20 +145,18 @@ void mergeMemTableWithSSTable() { } @Test - void tombstoneInMemTableShadowsSSTable() { - // SSTable has a value, MemTable deletes it + void tombstoneInNewerShadowsOlder() { + // Older source has a value, newer MemTable deletes it MemTable olderData = new MemTable(spoc); olderData.put(1, 2, 3, 0, true); olderData.put(4, 5, 6, 0, true); - byte[] sstData = SSTableWriter.write(olderData); - SSTable sst = new SSTable(sstData, spoc); MemTable memTable = new MemTable(spoc); - memTable.remove(1, 2, 3, 0, true); // tombstone shadows SSTable entry + memTable.remove(1, 2, 3, 0, true); // tombstone shadows older entry List sources = Arrays.asList( memTable.asRawSource(-1, -1, -1, -1), - sst.asRawSource(-1, -1, -1, -1)); + olderData.asRawSource(-1, -1, -1, -1)); MergeIterator iter = new MergeIterator(sources, spoc, MemTable.FLAG_EXPLICIT, -1, -1, -1, -1); List results = toList(iter); diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/SSTableWriterReaderTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/SSTableWriterReaderTest.java deleted file mode 100644 index be49b3f82b9..00000000000 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/SSTableWriterReaderTest.java +++ /dev/null @@ -1,183 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2025 Eclipse RDF4J contributors. - * - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Distribution License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/org/documents/edl-v10.php. - * - * SPDX-License-Identifier: BSD-3-Clause - *******************************************************************************/ -package org.eclipse.rdf4j.sail.s3.storage; - -import static org.junit.jupiter.api.Assertions.*; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.junit.jupiter.api.Test; - -class SSTableWriterReaderTest { - - private final QuadIndex spoc = new QuadIndex("spoc"); - - @Test - void roundTrip_singleEntry() { - MemTable mt = new MemTable(spoc); - mt.put(1, 2, 3, 4, true); - - byte[] sstData = SSTableWriter.write(mt); - SSTable sst = new SSTable(sstData, spoc); - - assertEquals(1, sst.getEntryCount()); - - Iterator iter = sst.scan(1, 2, 3, 4, true); - assertTrue(iter.hasNext()); - long[] quad = iter.next(); - assertArrayEquals(new long[] { 1, 2, 3, 4 }, quad); - assertFalse(iter.hasNext()); - } - - @Test - void roundTrip_multipleEntries() { - MemTable mt = new MemTable(spoc); - mt.put(1, 2, 3, 0, true); - mt.put(1, 2, 4, 0, true); - mt.put(2, 3, 4, 0, true); - mt.put(10, 20, 30, 40, true); - - byte[] sstData = SSTableWriter.write(mt); - SSTable sst = new SSTable(sstData, spoc); - - assertEquals(4, sst.getEntryCount()); - - // Wildcard scan - List results = toList(sst.scan(-1, -1, -1, -1, true)); - assertEquals(4, results.size()); - } - - @Test - void roundTrip_patternFilter() { - MemTable mt = new MemTable(spoc); - mt.put(1, 2, 3, 0, true); - mt.put(1, 2, 4, 0, true); - mt.put(2, 3, 4, 0, true); - - byte[] sstData = SSTableWriter.write(mt); - SSTable sst = new SSTable(sstData, spoc); - - // Filter by subject=1 - List results = toList(sst.scan(1, -1, -1, -1, true)); - assertEquals(2, results.size()); - assertEquals(1, results.get(0)[0]); - assertEquals(1, results.get(1)[0]); - - // Filter by subject=2 - results = toList(sst.scan(2, -1, -1, -1, true)); - assertEquals(1, results.size()); - assertEquals(2, results.get(0)[0]); - } - - @Test - void roundTrip_tombstonesFilteredInScan() { - MemTable mt = new MemTable(spoc); - mt.put(1, 2, 3, 0, true); - mt.put(1, 2, 4, 0, true); - mt.remove(1, 2, 3, 0, true); // tombstone - - byte[] sstData = SSTableWriter.write(mt); - SSTable sst = new SSTable(sstData, spoc); - - // Tombstone entry is still in the SSTable (entryCount includes it) - assertEquals(2, sst.getEntryCount()); - - // But scan filters it out - List results = toList(sst.scan(-1, -1, -1, -1, true)); - assertEquals(1, results.size()); - assertArrayEquals(new long[] { 1, 2, 4, 0 }, results.get(0)); - } - - @Test - void roundTrip_tombstonesVisibleInRawSource() { - MemTable mt = new MemTable(spoc); - mt.put(1, 2, 3, 0, true); - mt.remove(1, 2, 3, 0, true); - - byte[] sstData = SSTableWriter.write(mt); - SSTable sst = new SSTable(sstData, spoc); - - RawEntrySource source = sst.asRawSource(-1, -1, -1, -1); - assertTrue(source.hasNext()); - // The tombstone should be visible - assertEquals(MemTable.FLAG_TOMBSTONE, source.peekFlag()); - } - - @Test - void roundTrip_explicitVsInferred() { - MemTable mt = new MemTable(spoc); - mt.put(1, 2, 3, 0, true); // explicit - mt.put(4, 5, 6, 0, false); // inferred - - byte[] sstData = SSTableWriter.write(mt); - SSTable sst = new SSTable(sstData, spoc); - - List explicitResults = toList(sst.scan(-1, -1, -1, -1, true)); - assertEquals(1, explicitResults.size()); - assertArrayEquals(new long[] { 1, 2, 3, 0 }, explicitResults.get(0)); - - List inferredResults = toList(sst.scan(-1, -1, -1, -1, false)); - assertEquals(1, inferredResults.size()); - assertArrayEquals(new long[] { 4, 5, 6, 0 }, inferredResults.get(0)); - } - - @Test - void roundTrip_smallBlockSize() { - // Use a very small block size to test multi-block SSTables - MemTable mt = new MemTable(spoc); - for (long i = 1; i <= 100; i++) { - mt.put(i, i + 1, i + 2, 0, true); - } - - byte[] sstData = SSTableWriter.write(mt, 64); // tiny blocks - SSTable sst = new SSTable(sstData, spoc); - - assertEquals(100, sst.getEntryCount()); - - // Verify all entries are retrievable - List results = toList(sst.scan(-1, -1, -1, -1, true)); - assertEquals(100, results.size()); - - // Verify range scan with block index seeking - results = toList(sst.scan(50, -1, -1, -1, true)); - assertEquals(1, results.size()); - assertEquals(50, results.get(0)[0]); - } - - @Test - void roundTrip_largeIds() { - MemTable mt = new MemTable(spoc); - mt.put(100000, 200000, 300000, 400000, true); - - byte[] sstData = SSTableWriter.write(mt); - SSTable sst = new SSTable(sstData, spoc); - - List results = toList(sst.scan(-1, -1, -1, -1, true)); - assertEquals(1, results.size()); - assertArrayEquals(new long[] { 100000, 200000, 300000, 400000 }, results.get(0)); - } - - @Test - void emptyMemTable_throwsException() { - MemTable mt = new MemTable(spoc); - assertThrows(IllegalArgumentException.class, () -> SSTableWriter.write(mt)); - } - - private List toList(Iterator iter) { - List list = new ArrayList<>(); - while (iter.hasNext()) { - list.add(iter.next()); - } - return list; - } -} From 60707caf56aa0842f332cfdb1742852b6dff18aa Mon Sep 17 00:00:00 2001 From: Chengxu Bian Date: Thu, 26 Feb 2026 21:11:10 -0500 Subject: [PATCH 04/10] feat: stats-based pruning, workbench UI, and instance-level S3 config (Phase 3) Replace predicate partitioning with flat files and per-file min/max stats for pruning. Add S3 Store to RDF4J Workbench with creation form and TTL config template. S3 connection settings (bucket, endpoint, credentials) resolve from environment variables (RDF4J_S3_*) or system properties so multiple repositories share a single bucket, each isolated by s3Prefix. --- .../eclipse/rdf4j/repository/config/s3.ttl | 25 ++ core/sail/s3/docker-compose.yml | 32 ++ .../eclipse/rdf4j/sail/s3/S3SailStore.java | 316 ++++++++---------- .../rdf4j/sail/s3/config/S3StoreConfig.java | 65 +++- .../rdf4j/sail/s3/storage/Catalog.java | 126 +++---- .../rdf4j/sail/s3/storage/Compactor.java | 77 ++--- .../rdf4j/sail/s3/storage/MemTable.java | 177 ++++------ .../sail/s3/storage/ParquetFileBuilder.java | 44 +-- .../sail/s3/storage/ParquetQuadSource.java | 91 ++--- .../rdf4j/sail/s3/storage/ParquetSchemas.java | 47 +-- .../s3/storage/PartitionIndexSelector.java | 122 ------- .../s3/storage/PartitionMergeIterator.java | 183 ---------- .../rdf4j/sail/s3/storage/QuadIndex.java | 8 +- .../rdf4j/sail/s3/S3PersistenceTest.java | 156 +++++++++ .../rdf4j/sail/s3/storage/CatalogTest.java | 150 +++++++++ .../sail/s3/storage/MemTableReorderTest.java | 163 +++++++++ .../sail/s3/storage/ParquetRoundTripTest.java | 199 +++++++++++ .../s3/storage/QuadIndexSelectionTest.java | 106 ++++++ .../main/webapp/transformations/create-s3.xsl | 75 +++++ .../main/webapp/transformations/create.xsl | 1 + 20 files changed, 1301 insertions(+), 862 deletions(-) create mode 100644 core/repository/api/src/main/resources/org/eclipse/rdf4j/repository/config/s3.ttl create mode 100644 core/sail/s3/docker-compose.yml delete mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/PartitionIndexSelector.java delete mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/PartitionMergeIterator.java create mode 100644 core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/CatalogTest.java create mode 100644 core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MemTableReorderTest.java create mode 100644 core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/ParquetRoundTripTest.java create mode 100644 core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndexSelectionTest.java create mode 100644 tools/workbench/src/main/webapp/transformations/create-s3.xsl diff --git a/core/repository/api/src/main/resources/org/eclipse/rdf4j/repository/config/s3.ttl b/core/repository/api/src/main/resources/org/eclipse/rdf4j/repository/config/s3.ttl new file mode 100644 index 00000000000..3806dc0bed4 --- /dev/null +++ b/core/repository/api/src/main/resources/org/eclipse/rdf4j/repository/config/s3.ttl @@ -0,0 +1,25 @@ +# +# Configuration template for an S3Store +# +# S3 connection settings (bucket, endpoint, region, credentials) are configured +# at the RDF4J instance level via environment variables or system properties: +# +# RDF4J_S3_BUCKET, RDF4J_S3_ENDPOINT, RDF4J_S3_REGION, +# RDF4J_S3_ACCESS_KEY, RDF4J_S3_SECRET_KEY, RDF4J_S3_FORCE_PATH_STYLE +# +# Each repository uses s3Prefix to partition its data within the shared bucket. +# +@prefix rdfs: . +@prefix config: . +@prefix s3: . + +[] a config:Repository ; + config:rep.id "{%Repository ID|s3%}" ; + rdfs:label "{%Repository title|S3 Store%}" ; + config:rep.impl [ + config:rep.type "openrdf:SailRepository" ; + config:sail.impl [ + config:sail.type "rdf4j:S3Store" ; + s3:s3Prefix "{%S3 Prefix|%}" + ] + ]. diff --git a/core/sail/s3/docker-compose.yml b/core/sail/s3/docker-compose.yml new file mode 100644 index 00000000000..096bbf7fcb2 --- /dev/null +++ b/core/sail/s3/docker-compose.yml @@ -0,0 +1,32 @@ +services: + minio: + image: minio/minio:latest + ports: + - "9000:9000" + - "9001:9001" + environment: + MINIO_ROOT_USER: minioadmin + MINIO_ROOT_PASSWORD: minioadmin + command: server /data --console-address ":9001" + volumes: + - minio-data:/data + healthcheck: + test: ["CMD", "mc", "ready", "local"] + interval: 5s + timeout: 5s + retries: 5 + + createbucket: + image: minio/mc:latest + depends_on: + minio: + condition: service_healthy + entrypoint: > + /bin/sh -c " + mc alias set local http://minio:9000 minioadmin minioadmin; + mc mb --ignore-existing local/rdf4j-data; + echo 'Bucket ready'; + " + +volumes: + minio-data: diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java index 488fc60adc2..8aec14e4df3 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java @@ -49,12 +49,11 @@ import org.eclipse.rdf4j.sail.s3.storage.CompactionPolicy; import org.eclipse.rdf4j.sail.s3.storage.Compactor; import org.eclipse.rdf4j.sail.s3.storage.MemTable; +import org.eclipse.rdf4j.sail.s3.storage.MergeIterator; import org.eclipse.rdf4j.sail.s3.storage.ObjectStore; import org.eclipse.rdf4j.sail.s3.storage.ParquetFileBuilder; import org.eclipse.rdf4j.sail.s3.storage.ParquetQuadSource; import org.eclipse.rdf4j.sail.s3.storage.ParquetSchemas; -import org.eclipse.rdf4j.sail.s3.storage.PartitionIndexSelector; -import org.eclipse.rdf4j.sail.s3.storage.PartitionMergeIterator; import org.eclipse.rdf4j.sail.s3.storage.QuadIndex; import org.eclipse.rdf4j.sail.s3.storage.RawEntrySource; import org.eclipse.rdf4j.sail.s3.storage.S3ObjectStore; @@ -65,11 +64,11 @@ /** * {@link SailStore} implementation that stores RDF quads using Parquet files on S3-compatible object storage with - * predicate-based vertical partitioning. + * stats-based pruning (no predicate partitioning). * *

- * Architecture: single in-memory {@link MemTable} in SPOC order → on flush, partition by predicate and write 3 Parquet - * files per partition (SOC, OSC, CSO sort orders) → multi-tier cache (Caffeine heap + disk) → compaction. + * Architecture: single in-memory {@link MemTable} in SPOC order → on flush, write 3 Parquet files per epoch (SPOC, + * OPSC, CSPO sort orders) → multi-tier cache (Caffeine heap + disk) → compaction. *

* *

@@ -80,15 +79,18 @@ class S3SailStore implements SailStore { final Logger logger = LoggerFactory.getLogger(S3SailStore.class); - private static final String[] SORT_ORDERS = { "soc", "osc", "cso" }; + private static final QuadIndex SPOC_INDEX = new QuadIndex("spoc"); + private static final QuadIndex OPSC_INDEX = new QuadIndex("opsc"); + private static final QuadIndex CSPO_INDEX = new QuadIndex("cspo"); + private static final List ALL_INDEXES = List.of(SPOC_INDEX, OPSC_INDEX, CSPO_INDEX); + private static final int DEFAULT_ROW_GROUP_SIZE = 8 * 1024 * 1024; // 8 MiB private static final int DEFAULT_PAGE_SIZE = 64 * 1024; // 64 KiB private final S3ValueStore valueStore; private final S3NamespaceStore namespaceStore; - // Single MemTable in SPOC order (new design: 1x memory, partition on flush) - private final QuadIndex spocIndex; + // Single MemTable in SPOC order private volatile MemTable memTable; private volatile boolean mayHaveInferred; @@ -129,8 +131,7 @@ class S3SailStore implements SailStore { this.pageSize = DEFAULT_PAGE_SIZE; // Single SPOC index for the MemTable - this.spocIndex = new QuadIndex("spoc"); - this.memTable = new MemTable(spocIndex); + this.memTable = new MemTable(SPOC_INDEX); // Initialize persistence if (objectStore != null) { @@ -198,7 +199,7 @@ public void close() throws SailException { } /** - * Flushes active MemTable to Parquet files on the object store, partitioned by predicate. + * Flushes active MemTable to Parquet files on the object store. Writes one file per sort order (SPOC, OPSC, CSPO). */ private void flushToObjectStore() { if (objectStore == null) { @@ -221,62 +222,63 @@ private void flushToObjectStore() { // Freeze active MemTable and swap in fresh one MemTable frozen = memTable; frozen.freeze(); - memTable = new MemTable(spocIndex); - - // Partition by predicate - Map> partitions = frozen.partitionByPredicate(); - - // For each predicate partition, write 3 Parquet files (all sort orders) - for (Map.Entry> partEntry : partitions.entrySet()) { - long predId = partEntry.getKey(); - List entries = partEntry.getValue(); - - // Set predicate label for debugging - Value predValue = valueStore.getValue(predId); - if (predValue != null) { - catalog.getPredicateLabels().put(String.valueOf(predId), predValue.stringValue()); + memTable = new MemTable(SPOC_INDEX); + + // Collect all entries as full quads + List allQuads = new ArrayList<>(frozen.size()); + long[] quad = new long[4]; + for (Map.Entry entry : frozen.getData().entrySet()) { + long[] q = new long[5]; // s, p, o, c, flag + frozen.getIndex().keyToQuad(entry.getKey(), quad); + q[0] = quad[QuadIndex.SUBJ_IDX]; + q[1] = quad[QuadIndex.PRED_IDX]; + q[2] = quad[QuadIndex.OBJ_IDX]; + q[3] = quad[QuadIndex.CONTEXT_IDX]; + q[4] = entry.getValue()[0]; + allQuads.add(q); + } + + // Compute stats across all entries + long minSubject = Long.MAX_VALUE, maxSubject = Long.MIN_VALUE; + long minPredicate = Long.MAX_VALUE, maxPredicate = Long.MIN_VALUE; + long minObject = Long.MAX_VALUE, maxObject = Long.MIN_VALUE; + long minContext = Long.MAX_VALUE, maxContext = Long.MIN_VALUE; + for (long[] q : allQuads) { + minSubject = Math.min(minSubject, q[0]); + maxSubject = Math.max(maxSubject, q[0]); + minPredicate = Math.min(minPredicate, q[1]); + maxPredicate = Math.max(maxPredicate, q[1]); + minObject = Math.min(minObject, q[2]); + maxObject = Math.max(maxObject, q[2]); + minContext = Math.min(minContext, q[3]); + maxContext = Math.max(maxContext, q[3]); + } + + // For each sort order, sort and write one Parquet file + for (QuadIndex sortIndex : ALL_INDEXES) { + String sortSuffix = sortIndex.getFieldSeqString(); + + // Sort entries according to the sort order + List sorted = sortQuadEntries(allQuads, sortIndex); + + // Build Parquet file + ParquetSchemas.SortOrder sortOrder = ParquetSchemas.SortOrder.fromSuffix(sortSuffix); + byte[] parquetData = ParquetFileBuilder.build(sorted, ParquetSchemas.QUAD_SCHEMA, + sortOrder, rowGroupSize, pageSize); + + String s3Key = "data/L0-" + String.format("%05d", epoch) + "-" + sortSuffix + ".parquet"; + + objectStore.put(s3Key, parquetData); + + // Write-through to cache + if (cache != null) { + cache.writeThrough(s3Key, parquetData); } - for (String sortOrder : SORT_ORDERS) { - // Sort entries according to sort order - List sorted = sortEntries(entries, sortOrder); - - // Build Parquet file - List pqEntries = new ArrayList<>(sorted.size()); - for (MemTable.QuadEntry e : sorted) { - pqEntries.add(new ParquetFileBuilder.QuadEntry(e.subject, e.object, e.context, e.flag)); - } - - byte[] parquetData = ParquetFileBuilder.build(pqEntries, ParquetSchemas.PARTITIONED_SCHEMA, - ParquetSchemas.SortOrder.fromSuffix(sortOrder), predId, rowGroupSize, pageSize); - - String s3Key = "data/predicates/" + predId + "/L0-" - + String.format("%05d", epoch) + "-" + sortOrder + ".parquet"; - - objectStore.put(s3Key, parquetData); - - // Write-through to cache - if (cache != null) { - cache.writeThrough(s3Key, parquetData); - } - - // Compute stats - long minSubject = Long.MAX_VALUE, maxSubject = Long.MIN_VALUE; - long minObject = Long.MAX_VALUE, maxObject = Long.MIN_VALUE; - long minContext = Long.MAX_VALUE, maxContext = Long.MIN_VALUE; - for (MemTable.QuadEntry e : sorted) { - minSubject = Math.min(minSubject, e.subject); - maxSubject = Math.max(maxSubject, e.subject); - minObject = Math.min(minObject, e.object); - maxObject = Math.max(maxObject, e.object); - minContext = Math.min(minContext, e.context); - maxContext = Math.max(maxContext, e.context); - } - - catalog.addFile(predId, new Catalog.ParquetFileInfo( - s3Key, 0, sortOrder, sorted.size(), epoch, parquetData.length, - minSubject, maxSubject, minObject, maxObject, minContext, maxContext)); - } + catalog.addFile(new Catalog.ParquetFileInfo( + s3Key, 0, sortSuffix, sorted.size(), epoch, parquetData.length, + minSubject, maxSubject, minPredicate, maxPredicate, + minObject, maxObject, minContext, maxContext)); } // Persist value store and namespaces @@ -293,31 +295,42 @@ private void flushToObjectStore() { } /** - * Sorts entries according to the given sort order. + * Sorts quad entries according to the given sort index. */ - private static List sortEntries(List entries, String sortOrder) { - List sorted = new ArrayList<>(entries); - Comparator cmp; - switch (sortOrder) { - case "osc": - cmp = Comparator.comparingLong((MemTable.QuadEntry e) -> e.object) - .thenComparingLong(e -> e.subject) - .thenComparingLong(e -> e.context); - break; - case "cso": - cmp = Comparator.comparingLong((MemTable.QuadEntry e) -> e.context) - .thenComparingLong(e -> e.subject) - .thenComparingLong(e -> e.object); - break; - case "soc": + private static List sortQuadEntries(List quads, QuadIndex sortIndex) { + List sorted = new ArrayList<>(quads); + String seq = sortIndex.getFieldSeqString(); + sorted.sort((a, b) -> { + for (int i = 0; i < 4; i++) { + int idx = fieldCharToIdx(seq.charAt(i)); + int cmp = Long.compare(a[idx], b[idx]); + if (cmp != 0) { + return cmp; + } + } + return 0; + }); + + List result = new ArrayList<>(sorted.size()); + for (long[] q : sorted) { + result.add(new ParquetFileBuilder.QuadEntry(q[0], q[1], q[2], q[3], (byte) q[4])); + } + return result; + } + + private static int fieldCharToIdx(char c) { + switch (c) { + case 's': + return 0; + case 'p': + return 1; + case 'o': + return 2; + case 'c': + return 3; default: - cmp = Comparator.comparingLong((MemTable.QuadEntry e) -> e.subject) - .thenComparingLong(e -> e.object) - .thenComparingLong(e -> e.context); - break; + throw new IllegalArgumentException("Invalid field: " + c); } - sorted.sort(cmp); - return sorted; } /** @@ -328,25 +341,23 @@ private void runCompactionIfNeeded() { return; } - for (long predId : catalog.getPredicateIds()) { - List files = catalog.getFilesForPredicate(predId); + List files = catalog.getFiles(); - // L0→L1 compaction - if (compactionPolicy.shouldCompactL0(files)) { - List l0Files = CompactionPolicy.filesAtLevel(files, 0); - long compactEpoch = epochCounter.getAndIncrement(); - compactor.compact(predId, l0Files, 0, 1, compactEpoch, catalog); + // L0→L1 compaction + if (compactionPolicy.shouldCompactL0(files)) { + List l0Files = CompactionPolicy.filesAtLevel(files, 0); + long compactEpoch = epochCounter.getAndIncrement(); + compactor.compact(l0Files, 0, 1, compactEpoch, catalog); - // Re-fetch files after compaction and check L1→L2 - files = catalog.getFilesForPredicate(predId); - } + // Re-fetch files after compaction + files = catalog.getFiles(); + } - // L1→L2 compaction - if (compactionPolicy.shouldCompactL1(files)) { - List l1Files = CompactionPolicy.filesAtLevel(files, 1); - long compactEpoch = epochCounter.getAndIncrement(); - compactor.compact(predId, l1Files, 1, 2, compactEpoch, catalog); - } + // L1→L2 compaction + if (compactionPolicy.shouldCompactL1(files)) { + List l1Files = CompactionPolicy.filesAtLevel(files, 1); + long compactEpoch = epochCounter.getAndIncrement(); + compactor.compact(l1Files, 1, 2, compactEpoch, catalog); } // Save catalog after compaction @@ -356,7 +367,7 @@ private void runCompactionIfNeeded() { } /** - * Creates a statement iterator for the given pattern using predicate partitioning. + * Creates a statement iterator for the given pattern using stats-based pruning. */ CloseableIteration createStatementIterator( Resource subj, IRI pred, Value obj, boolean explicit, Resource... contexts) { @@ -431,59 +442,27 @@ CloseableIteration createStatementIterator( } /** - * Creates a merged iterator across MemTable and Parquet files for a given pattern. + * Creates a merged iterator across MemTable and Parquet files for a given pattern. Selects the best QuadIndex, + * prunes files using catalog stats, and merges all sources. */ private Iterator createMergedIterator(long subjID, long predID, long objID, long contextID, boolean explicit) { - boolean subjectBound = subjID >= 0; - boolean objectBound = objID >= 0; - boolean contextBound = contextID >= 0; - - // Select best sort order for within-partition queries - String bestSortOrder = PartitionIndexSelector.selectSortOrder(subjectBound, objectBound, contextBound); - - if (predID >= 0) { - // Predicate bound → single partition - return createPartitionIterator(predID, subjID, objID, contextID, bestSortOrder, explicit); - } else { - // Predicate unbound → fan out to all partitions - Set predIds = catalog.getPredicateIds(); - if (predIds.isEmpty()) { - // Only MemTable data - return memTable.scan(subjID, predID, objID, contextID, explicit); - } - - List> partitionIters = new ArrayList<>(); - for (long pid : predIds) { - partitionIters.add(createPartitionIterator(pid, subjID, objID, contextID, bestSortOrder, explicit)); - } - - // Union all partitions (each partition's iterator handles dedup internally) - return new UnionIterator(partitionIters); - } - } - - /** - * Creates a merged iterator for a single predicate partition. All sources produce 3-varint keys in the partition - * sort order (predicate is implicit in the partition). - */ - private Iterator createPartitionIterator(long predId, long subjID, long objID, long contextID, - String sortOrder, boolean explicit) { - byte expectedFlag = explicit ? MemTable.FLAG_EXPLICIT : MemTable.FLAG_INFERRED; + // Select best index for the query pattern + QuadIndex bestIndex = QuadIndex.getBestIndex(ALL_INDEXES, subjID, predID, objID, contextID); + String sortSuffix = bestIndex.getFieldSeqString(); + // Build sources: MemTable (newest) + Parquet files (newest epoch first) - // All sources produce 3-varint keys in the same partition sort order List sources = new ArrayList<>(); - // MemTable source (always newest) — re-encoded as 3-varint partition keys - sources.add(memTable.asPartitionRawSource(predId, subjID, objID, contextID, sortOrder)); + // MemTable source (always newest) — re-encoded in the best index order + sources.add(memTable.asRawSource(bestIndex, subjID, predID, objID, contextID)); - // Parquet files for this predicate partition and sort order - List files = catalog.getFilesForPredicate(predId); - List sortOrderFiles = files.stream() - .filter(f -> sortOrder.equals(f.getSortOrder())) + // Parquet files for the selected sort order + List sortOrderFiles = catalog.getFilesForSortOrder(sortSuffix); + sortOrderFiles = sortOrderFiles.stream() .sorted(Comparator.comparingLong(Catalog.ParquetFileInfo::getEpoch).reversed()) .toList(); @@ -492,6 +471,9 @@ private Iterator createPartitionIterator(long predId, long subjID, long if (subjID >= 0 && (subjID < fileInfo.getMinSubject() || subjID > fileInfo.getMaxSubject())) { continue; } + if (predID >= 0 && (predID < fileInfo.getMinPredicate() || predID > fileInfo.getMaxPredicate())) { + continue; + } if (objID >= 0 && (objID < fileInfo.getMinObject() || objID > fileInfo.getMaxObject())) { continue; } @@ -505,11 +487,10 @@ private Iterator createPartitionIterator(long predId, long subjID, long continue; } - sources.add(new ParquetQuadSource(fileData, sortOrder, subjID, objID, contextID)); + sources.add(new ParquetQuadSource(fileData, bestIndex, subjID, predID, objID, contextID)); } - // Use PartitionMergeIterator: all sources produce 3-varint keys, predicate injected on decode - return new PartitionMergeIterator(sources, predId, sortOrder, expectedFlag, subjID, objID, contextID); + return new MergeIterator(sources, bestIndex, expectedFlag, subjID, predID, objID, contextID); } // ========================================================================= @@ -875,39 +856,4 @@ public void close() { // no-op } } - - /** - * Simple union iterator that concatenates multiple iterators. Used for fan-out across predicate partitions. - */ - private static class UnionIterator implements Iterator { - private final List> iterators; - private int currentIdx; - - UnionIterator(List> iterators) { - this.iterators = iterators; - this.currentIdx = 0; - advanceToNonEmpty(); - } - - private void advanceToNonEmpty() { - while (currentIdx < iterators.size() && !iterators.get(currentIdx).hasNext()) { - currentIdx++; - } - } - - @Override - public boolean hasNext() { - return currentIdx < iterators.size(); - } - - @Override - public long[] next() { - long[] result = iterators.get(currentIdx).next(); - if (!iterators.get(currentIdx).hasNext()) { - currentIdx++; - advanceToNonEmpty(); - } - return result; - } - } } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java index d1c3d364efb..afccacbf27d 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java @@ -87,7 +87,7 @@ public class S3StoreConfig extends BaseSailConfig { private String s3SecretKey; - private boolean s3ForcePathStyle = true; + private Boolean s3ForcePathStyle; /*--------------* * Constructors * @@ -106,6 +106,31 @@ public S3StoreConfig(String quadIndexes) { * Methods * *---------*/ + /** + * Resolves a configuration value from (in order): environment variable, system property, or null. + * + *

+ * S3 connection settings are shared at the RDF4J instance level so that multiple S3 SAIL repositories can share the + * same bucket. Each repository differentiates itself via {@code s3Prefix}. + *

+ * + *

+ * Environment variables: {@code RDF4J_S3_BUCKET}, {@code RDF4J_S3_ENDPOINT}, {@code RDF4J_S3_REGION}, + * {@code RDF4J_S3_ACCESS_KEY}, {@code RDF4J_S3_SECRET_KEY}, {@code RDF4J_S3_FORCE_PATH_STYLE}. + *

+ */ + private static String resolveEnv(String envVar, String sysProp) { + String val = System.getenv(envVar); + if (val != null && !val.isEmpty()) { + return val; + } + val = System.getProperty(sysProp); + if (val != null && !val.isEmpty()) { + return val; + } + return null; + } + public String getQuadIndexes() { return quadIndexes != null ? quadIndexes : DEFAULT_QUAD_INDEXES; } @@ -179,7 +204,10 @@ public S3StoreConfig setValueIdCacheSize(int valueIdCacheSize) { } public String getS3Bucket() { - return s3Bucket; + if (s3Bucket != null) { + return s3Bucket; + } + return resolveEnv("RDF4J_S3_BUCKET", "rdf4j.s3.bucket"); } public S3StoreConfig setS3Bucket(String s3Bucket) { @@ -188,7 +216,10 @@ public S3StoreConfig setS3Bucket(String s3Bucket) { } public String getS3Endpoint() { - return s3Endpoint; + if (s3Endpoint != null) { + return s3Endpoint; + } + return resolveEnv("RDF4J_S3_ENDPOINT", "rdf4j.s3.endpoint"); } public S3StoreConfig setS3Endpoint(String s3Endpoint) { @@ -197,7 +228,11 @@ public S3StoreConfig setS3Endpoint(String s3Endpoint) { } public String getS3Region() { - return s3Region != null ? s3Region : "us-east-1"; + if (s3Region != null) { + return s3Region; + } + String env = resolveEnv("RDF4J_S3_REGION", "rdf4j.s3.region"); + return env != null ? env : "us-east-1"; } public S3StoreConfig setS3Region(String s3Region) { @@ -215,7 +250,10 @@ public S3StoreConfig setS3Prefix(String s3Prefix) { } public String getS3AccessKey() { - return s3AccessKey; + if (s3AccessKey != null) { + return s3AccessKey; + } + return resolveEnv("RDF4J_S3_ACCESS_KEY", "rdf4j.s3.accessKey"); } public S3StoreConfig setS3AccessKey(String s3AccessKey) { @@ -224,7 +262,10 @@ public S3StoreConfig setS3AccessKey(String s3AccessKey) { } public String getS3SecretKey() { - return s3SecretKey; + if (s3SecretKey != null) { + return s3SecretKey; + } + return resolveEnv("RDF4J_S3_SECRET_KEY", "rdf4j.s3.secretKey"); } public S3StoreConfig setS3SecretKey(String s3SecretKey) { @@ -233,7 +274,11 @@ public S3StoreConfig setS3SecretKey(String s3SecretKey) { } public boolean isS3ForcePathStyle() { - return s3ForcePathStyle; + if (s3ForcePathStyle != null) { + return s3ForcePathStyle; + } + String env = resolveEnv("RDF4J_S3_FORCE_PATH_STYLE", "rdf4j.s3.forcePathStyle"); + return env == null || Boolean.parseBoolean(env); } public S3StoreConfig setS3ForcePathStyle(boolean s3ForcePathStyle) { @@ -242,7 +287,7 @@ public S3StoreConfig setS3ForcePathStyle(boolean s3ForcePathStyle) { } public boolean isS3Configured() { - return s3Bucket != null && !s3Bucket.isEmpty(); + return getS3Bucket() != null && !getS3Bucket().isEmpty(); } @Override @@ -293,7 +338,9 @@ public Resource export(Model m) { if (s3SecretKey != null) { m.add(implNode, S3StoreSchema.S3_SECRET_KEY, vf.createLiteral(s3SecretKey)); } - m.add(implNode, S3StoreSchema.S3_FORCE_PATH_STYLE, vf.createLiteral(s3ForcePathStyle)); + if (s3ForcePathStyle != null) { + m.add(implNode, S3StoreSchema.S3_FORCE_PATH_STYLE, vf.createLiteral(s3ForcePathStyle)); + } return implNode; } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java index 9c6e2bd806e..55b60b898c7 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java @@ -14,10 +14,7 @@ import java.io.UncheckedIOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; -import java.util.Collections; -import java.util.LinkedHashMap; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -26,11 +23,11 @@ import com.fasterxml.jackson.databind.ObjectMapper; /** - * JSON-serialized catalog tracking Parquet files with per-file statistics and predicate partitioning. + * JSON-serialized catalog tracking Parquet files with per-file statistics. * *

- * Evolved from {@link Manifest} to support the Parquet-based storage format with predicate partitioning. Each predicate - * ID maps to a list of {@link ParquetFileInfo} entries describing the Parquet files for that partition. + * All quads are stored in flat files (no predicate partitioning). Each file has statistics including min/max for all + * four quad components (subject, predicate, object, context). * *

S3 Layout

* @@ -43,15 +40,10 @@ * *
  * {
- *   "version": 2,
+ *   "version": 3,
  *   "epoch": 42,
  *   "nextValueId": 12345,
- *   "predicatePartitions": {
- *     "7": [ { file info... } ],
- *     "42": [ { file info... } ]
- *   },
- *   "predicateLabels": { "7": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" },
- *   "unpartitionedFiles": []
+ *   "files": [ { file info... } ]
  * }
  * 
*/ @@ -59,7 +51,7 @@ public class Catalog { @JsonProperty("version") - private int version = 2; + private int version = 3; @JsonProperty("epoch") private long epoch; @@ -67,14 +59,8 @@ public class Catalog { @JsonProperty("nextValueId") private long nextValueId; - @JsonProperty("predicatePartitions") - private Map> predicatePartitions = new LinkedHashMap<>(); - - @JsonProperty("predicateLabels") - private Map predicateLabels = new LinkedHashMap<>(); - - @JsonProperty("unpartitionedFiles") - private List unpartitionedFiles = new ArrayList<>(); + @JsonProperty("files") + private List files = new ArrayList<>(); public Catalog() { } @@ -103,28 +89,12 @@ public void setNextValueId(long nextValueId) { this.nextValueId = nextValueId; } - public Map> getPredicatePartitions() { - return predicatePartitions; - } - - public void setPredicatePartitions(Map> predicatePartitions) { - this.predicatePartitions = predicatePartitions; + public List getFiles() { + return files; } - public Map getPredicateLabels() { - return predicateLabels; - } - - public void setPredicateLabels(Map predicateLabels) { - this.predicateLabels = predicateLabels; - } - - public List getUnpartitionedFiles() { - return unpartitionedFiles; - } - - public void setUnpartitionedFiles(List unpartitionedFiles) { - this.unpartitionedFiles = unpartitionedFiles; + public void setFiles(List files) { + this.files = files; } /** @@ -179,53 +149,38 @@ public void save(ObjectStore store, ObjectMapper mapper, long epoch) { } /** - * Returns the set of predicate IDs that have partitioned files. - * - * @return set of predicate IDs parsed from the partition keys - */ - public Set getPredicateIds() { - return predicatePartitions.keySet() - .stream() - .map(Long::parseLong) - .collect(Collectors.toSet()); - } - - /** - * Returns the list of Parquet files for the given predicate ID. + * Adds a Parquet file to the catalog. * - * @param predicateId the predicate value ID - * @return the list of file info entries, or an empty list if no files exist for this predicate + * @param info the file info to add */ - public List getFilesForPredicate(long predicateId) { - return predicatePartitions.getOrDefault(String.valueOf(predicateId), Collections.emptyList()); + public void addFile(ParquetFileInfo info) { + files.add(info); } /** - * Adds a Parquet file to the partition for the given predicate. + * Removes Parquet files by their S3 keys. * - * @param predicateId the predicate value ID - * @param info the file info to add + * @param s3Keys the set of S3 keys to remove */ - public void addFile(long predicateId, ParquetFileInfo info) { - predicatePartitions.computeIfAbsent(String.valueOf(predicateId), k -> new ArrayList<>()).add(info); + public void removeFiles(Set s3Keys) { + files.removeIf(f -> s3Keys.contains(f.getS3Key())); } /** - * Removes Parquet files from the partition for the given predicate by their S3 keys. + * Returns all files for the given sort order. * - * @param predicateId the predicate value ID - * @param s3Keys the set of S3 keys to remove + * @param sortOrder the sort order suffix (e.g. "spoc", "opsc", "cspo") + * @return list of files matching the sort order */ - public void removeFiles(long predicateId, Set s3Keys) { - List files = predicatePartitions.get(String.valueOf(predicateId)); - if (files != null) { - files.removeIf(f -> s3Keys.contains(f.getS3Key())); - } + public List getFilesForSortOrder(String sortOrder) { + return files.stream() + .filter(f -> sortOrder.equals(f.getSortOrder())) + .collect(Collectors.toList()); } /** * Metadata about a single Parquet file in the catalog, including its location, sort order, size, and min/max - * statistics for subject, object, and context columns. + * statistics for subject, predicate, object, and context columns. */ @JsonIgnoreProperties(ignoreUnknown = true) public static class ParquetFileInfo { @@ -254,6 +209,12 @@ public static class ParquetFileInfo { @JsonProperty("maxSubject") private long maxSubject; + @JsonProperty("minPredicate") + private long minPredicate; + + @JsonProperty("maxPredicate") + private long maxPredicate; + @JsonProperty("minObject") private long minObject; @@ -272,6 +233,7 @@ public ParquetFileInfo() { public ParquetFileInfo(String s3Key, int level, String sortOrder, long rowCount, long epoch, long sizeBytes, long minSubject, long maxSubject, + long minPredicate, long maxPredicate, long minObject, long maxObject, long minContext, long maxContext) { this.s3Key = s3Key; @@ -282,6 +244,8 @@ public ParquetFileInfo(String s3Key, int level, String sortOrder, long rowCount, this.sizeBytes = sizeBytes; this.minSubject = minSubject; this.maxSubject = maxSubject; + this.minPredicate = minPredicate; + this.maxPredicate = maxPredicate; this.minObject = minObject; this.maxObject = maxObject; this.minContext = minContext; @@ -352,6 +316,22 @@ public void setMaxSubject(long maxSubject) { this.maxSubject = maxSubject; } + public long getMinPredicate() { + return minPredicate; + } + + public void setMinPredicate(long minPredicate) { + this.minPredicate = minPredicate; + } + + public long getMaxPredicate() { + return maxPredicate; + } + + public void setMaxPredicate(long maxPredicate) { + this.maxPredicate = maxPredicate; + } + public long getMinObject() { return minObject; } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java index 0e83f4fbb29..b69054c8895 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java @@ -21,8 +21,8 @@ import org.slf4j.LoggerFactory; /** - * Performs merge compaction on Parquet files within a predicate partition. Merges files at a source level into one set - * of files at the target level, per sort order. + * Performs merge compaction on Parquet files. Merges files at a source level into one set of files at the target level, + * per sort order. * *
    *
  • L0→L1: merge all L0 files per sort order, tombstones preserved
  • @@ -32,7 +32,7 @@ public class Compactor { private static final Logger logger = LoggerFactory.getLogger(Compactor.class); - private static final String[] SORT_ORDERS = { "soc", "osc", "cso" }; + private static final String[] SORT_ORDERS = { "spoc", "opsc", "cspo" }; private final ObjectStore objectStore; private final TieredCache cache; @@ -49,15 +49,14 @@ public Compactor(ObjectStore objectStore, TieredCache cache, int rowGroupSize, i /** * Compacts files at the source level into a single set of files at the target level. * - * @param predicateId the predicate partition being compacted - * @param sourceFiles all files at the source level in this partition + * @param sourceFiles all files at the source level * @param sourceLevel the source level (0 or 1) * @param targetLevel the target level (1 or 2) * @param epoch the epoch for the new compacted files * @param catalog the catalog to update * @return result containing new files created and old files removed */ - public CompactionResult compact(long predicateId, List sourceFiles, + public CompactionResult compact(List sourceFiles, int sourceLevel, int targetLevel, long epoch, Catalog catalog) { boolean suppressTombstones = (targetLevel == 2); @@ -65,6 +64,8 @@ public CompactionResult compact(long predicateId, List Set oldKeys = new HashSet<>(); for (String sortOrder : SORT_ORDERS) { + QuadIndex quadIndex = new QuadIndex(sortOrder); + // Collect source files for this sort order, ordered newest-first (highest epoch first) List sortOrderFiles = sourceFiles.stream() .filter(f -> sortOrder.equals(f.getSortOrder())) @@ -88,7 +89,7 @@ public CompactionResult compact(long predicateId, List logger.warn("Missing Parquet file during compaction: {}", fileInfo.getS3Key()); continue; } - sources.add(new ParquetQuadSource(fileData, sortOrder)); + sources.add(new ParquetQuadSource(fileData, quadIndex)); } if (sources.isEmpty()) { @@ -96,38 +97,35 @@ public CompactionResult compact(long predicateId, List } // Merge and collect entries - List merged = mergeEntries(sources, suppressTombstones); + List merged = mergeEntries(sources, quadIndex, suppressTombstones); if (merged.isEmpty()) { continue; } - // Convert to ParquetFileBuilder.QuadEntry - List parquetEntries = new ArrayList<>(); - for (MemTable.QuadEntry e : merged) { - parquetEntries.add(new ParquetFileBuilder.QuadEntry(e.subject, e.object, e.context, e.flag)); - } - // Write merged Parquet file - ParquetSchemas.SortOrder parsedSortOrder = ParquetSchemas.SortOrder.valueOf(sortOrder.toUpperCase()); - String s3Key = "data/predicates/" + predicateId + "/L" + targetLevel + "-" + ParquetSchemas.SortOrder parsedSortOrder = ParquetSchemas.SortOrder.fromSuffix(sortOrder); + String s3Key = "data/L" + targetLevel + "-" + String.format("%05d", epoch) + "-" + sortOrder + ".parquet"; - byte[] parquetData = ParquetFileBuilder.build(parquetEntries, ParquetSchemas.PARTITIONED_SCHEMA, - parsedSortOrder, predicateId, rowGroupSize, pageSize); + byte[] parquetData = ParquetFileBuilder.build(merged, ParquetSchemas.QUAD_SCHEMA, + parsedSortOrder, rowGroupSize, pageSize); objectStore.put(s3Key, parquetData); if (cache != null) { cache.writeThrough(s3Key, parquetData); } - // Compute stats from sorted entries + // Compute stats from merged entries long minSubject = Long.MAX_VALUE, maxSubject = Long.MIN_VALUE; + long minPredicate = Long.MAX_VALUE, maxPredicate = Long.MIN_VALUE; long minObject = Long.MAX_VALUE, maxObject = Long.MIN_VALUE; long minContext = Long.MAX_VALUE, maxContext = Long.MIN_VALUE; - for (MemTable.QuadEntry e : merged) { + for (ParquetFileBuilder.QuadEntry e : merged) { minSubject = Math.min(minSubject, e.subject); maxSubject = Math.max(maxSubject, e.subject); + minPredicate = Math.min(minPredicate, e.predicate); + maxPredicate = Math.max(maxPredicate, e.predicate); minObject = Math.min(minObject, e.object); maxObject = Math.max(maxObject, e.object); minContext = Math.min(minContext, e.context); @@ -136,13 +134,14 @@ public CompactionResult compact(long predicateId, List newFiles.add(new Catalog.ParquetFileInfo(s3Key, targetLevel, sortOrder, merged.size(), epoch, parquetData.length, - minSubject, maxSubject, minObject, maxObject, minContext, maxContext)); + minSubject, maxSubject, minPredicate, maxPredicate, + minObject, maxObject, minContext, maxContext)); } // Update catalog: remove old files, add new ones - catalog.removeFiles(predicateId, oldKeys); + catalog.removeFiles(oldKeys); for (Catalog.ParquetFileInfo newFile : newFiles) { - catalog.addFile(predicateId, newFile); + catalog.addFile(newFile); } // Delete old S3 files and invalidate cache @@ -153,23 +152,18 @@ public CompactionResult compact(long predicateId, List } } - logger.info("Compacted predicate {} L{}→L{}: {} files merged into {} files", - predicateId, sourceLevel, targetLevel, oldKeys.size(), newFiles.size()); + logger.info("Compacted L{}→L{}: {} files merged into {} files", + sourceLevel, targetLevel, oldKeys.size(), newFiles.size()); return new CompactionResult(newFiles, oldKeys); } - private List mergeEntries(List sources, boolean suppressTombstones) { - List result = new ArrayList<>(); - - // Simple K-way merge: use a priority queue approach - // Each source is already sorted. We merge them, dedup by key, newest wins. - // For simplicity, read all into one list then dedup. - // Since compaction is a background operation, this is acceptable. + private List mergeEntries(List sources, QuadIndex quadIndex, + boolean suppressTombstones) { + List result = new ArrayList<>(); - // Use ParquetQuadSource entries directly // Sources are ordered newest-first, so for dedup, first occurrence wins - java.util.TreeMap deduped = new java.util.TreeMap<>(); + java.util.TreeMap deduped = new java.util.TreeMap<>(); for (RawEntrySource source : sources) { while (source.hasNext()) { byte[] key = source.peekKey(); @@ -177,14 +171,13 @@ private List mergeEntries(List sources, bool // Only insert if not already present (first = newest wins) CompactKey ck = new CompactKey(key); if (!deduped.containsKey(ck)) { - // Decode the key to get quad values - // The key format from ParquetQuadSource encodes (subject, object, context) as varints - java.nio.ByteBuffer bb = java.nio.ByteBuffer.wrap(key); - long v1 = Varint.readUnsigned(bb); - long v2 = Varint.readUnsigned(bb); - long v3 = Varint.readUnsigned(bb); + // Decode 4-varint key to quad values + long[] quad = new long[4]; + quadIndex.keyToQuad(key, quad); if (!suppressTombstones || flag != MemTable.FLAG_TOMBSTONE) { - deduped.put(ck, new MemTable.QuadEntry(v1, v2, v3, flag)); + deduped.put(ck, new ParquetFileBuilder.QuadEntry( + quad[QuadIndex.SUBJ_IDX], quad[QuadIndex.PRED_IDX], + quad[QuadIndex.OBJ_IDX], quad[QuadIndex.CONTEXT_IDX], flag)); } } source.advance(); @@ -192,7 +185,7 @@ private List mergeEntries(List sources, bool } if (suppressTombstones) { - for (MemTable.QuadEntry e : deduped.values()) { + for (ParquetFileBuilder.QuadEntry e : deduped.values()) { if (e.flag != MemTable.FLAG_TOMBSTONE) { result.add(e); } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java index bd5f4b7e512..56b844da6c0 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java @@ -14,7 +14,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; @@ -204,8 +203,8 @@ public Map getData() { } /** - * Returns a {@link RawEntrySource} over the given key range. Includes tombstones (no flag filtering). Used by - * {@link MergeIterator}. + * Returns a {@link RawEntrySource} over the given key range using this table's native index. Includes tombstones + * (no flag filtering). Used by {@link MergeIterator}. */ public RawEntrySource asRawSource(long s, long p, long o, long c) { byte[] minKey = index.getMinKeyBytes(s, p, o, c); @@ -214,107 +213,63 @@ public RawEntrySource asRawSource(long s, long p, long o, long c) { return new RawSourceImpl(range); } - private static class RawSourceImpl implements RawEntrySource { - private final Iterator> delegate; - private Map.Entry current; - - RawSourceImpl(ConcurrentNavigableMap range) { - this.delegate = range.entrySet().iterator(); - if (delegate.hasNext()) { - current = delegate.next(); - } - } - - @Override - public boolean hasNext() { - return current != null; - } - - @Override - public byte[] peekKey() { - return current.getKey(); - } - - @Override - public byte peekFlag() { - return current.getValue()[0]; - } - - @Override - public void advance() { - if (delegate.hasNext()) { - current = delegate.next(); - } else { - current = null; - } - } - } - /** - * Returns a {@link RawEntrySource} over MemTable entries matching the given predicate, encoded as 3-varint keys in - * the specified partition sort order. Used by {@link PartitionMergeIterator} to merge MemTable entries with Parquet - * partition files. + * Returns a {@link RawEntrySource} with keys re-encoded in the specified target index order. When the target index + * matches this table's native index, delegates to {@link #asRawSource(long, long, long, long)} directly. * - * @param predId the predicate ID to filter by - * @param subj subject filter, or -1 for wildcard - * @param obj object filter, or -1 for wildcard - * @param ctx context filter, or -1 for wildcard - * @param sortOrder the partition sort order ("soc", "osc", or "cso") - * @return a RawEntrySource with 3-varint keys in the specified sort order + * @param targetIndex the desired key encoding order + * @param s subject filter, or -1 for wildcard + * @param p predicate filter, or -1 for wildcard + * @param o object filter, or -1 for wildcard + * @param c context filter, or -1 for wildcard + * @return a RawEntrySource with keys in the target index order */ - public RawEntrySource asPartitionRawSource(long predId, long subj, long obj, long ctx, String sortOrder) { - // Scan the SPOC MemTable for entries matching the given predicate - byte[] minKey = index.getMinKeyBytes(subj <= 0 ? 0 : subj, predId, obj <= 0 ? 0 : obj, - ctx < 0 ? 0 : ctx); - byte[] maxKey = index.getMaxKeyBytes(subj <= 0 ? Long.MAX_VALUE : subj, predId, - obj <= 0 ? Long.MAX_VALUE : obj, ctx < 0 ? Long.MAX_VALUE : ctx); + public RawEntrySource asRawSource(QuadIndex targetIndex, long s, long p, long o, long c) { + if (targetIndex.getFieldSeqString().equals(index.getFieldSeqString())) { + return asRawSource(s, p, o, c); + } + + // Scan all matching entries, re-encode in target order, sort + byte[] minKey = index.getMinKeyBytes(s, p, o, c); + byte[] maxKey = index.getMaxKeyBytes(s, p, o, c); ConcurrentNavigableMap range = data.subMap(minKey, true, maxKey, true); - // Collect matching entries, re-encode as 3-varint partition keys - List entries = new ArrayList<>(); + List entries = new ArrayList<>(); long[] quad = new long[4]; for (Map.Entry entry : range.entrySet()) { index.keyToQuad(entry.getKey(), quad); - // Verify predicate matches (range scan may include adjacent predicates) - if (quad[QuadIndex.PRED_IDX] != predId) { - continue; - } - // Apply additional filters - if (subj >= 0 && quad[QuadIndex.SUBJ_IDX] != subj) { - continue; - } - if (obj >= 0 && quad[QuadIndex.OBJ_IDX] != obj) { + // Apply additional filters (range scan may include extra entries) + if ((s >= 0 && quad[QuadIndex.SUBJ_IDX] != s) + || (p >= 0 && quad[QuadIndex.PRED_IDX] != p) + || (o >= 0 && quad[QuadIndex.OBJ_IDX] != o) + || (c >= 0 && quad[QuadIndex.CONTEXT_IDX] != c)) { continue; } - if (ctx >= 0 && quad[QuadIndex.CONTEXT_IDX] != ctx) { - continue; - } - byte[] partitionKey = ParquetQuadSource.encodeKey(sortOrder, - quad[QuadIndex.SUBJ_IDX], quad[QuadIndex.OBJ_IDX], quad[QuadIndex.CONTEXT_IDX]); - entries.add(new PartitionEntry(partitionKey, entry.getValue()[0])); + byte[] newKey = targetIndex.toKeyBytes( + quad[QuadIndex.SUBJ_IDX], quad[QuadIndex.PRED_IDX], + quad[QuadIndex.OBJ_IDX], quad[QuadIndex.CONTEXT_IDX]); + entries.add(new ReorderedEntry(newKey, entry.getValue()[0])); } - // Sort by partition key (entries may not be in partition sort order) - entries.sort((a, b) -> java.util.Arrays.compareUnsigned(a.key, b.key)); - - return new PartitionRawSourceImpl(entries); + entries.sort((a, b) -> Arrays.compareUnsigned(a.key, b.key)); + return new ReorderedRawSource(entries); } - private static class PartitionEntry { + private static class ReorderedEntry { final byte[] key; final byte flag; - PartitionEntry(byte[] key, byte flag) { + ReorderedEntry(byte[] key, byte flag) { this.key = key; this.flag = flag; } } - private static class PartitionRawSourceImpl implements RawEntrySource { - private final List entries; + private static class ReorderedRawSource implements RawEntrySource { + private final List entries; private int pos; - PartitionRawSourceImpl(List entries) { + ReorderedRawSource(List entries) { this.entries = entries; this.pos = 0; } @@ -340,39 +295,39 @@ public void advance() { } } - /** - * Partitions entries by predicate ID. Returns a map from predicate ID to a list of {@link QuadEntry} records - * containing (subject, object, context, flag). Used during Parquet flush to write per-predicate partition files. - * - * @return map from predicate ID to list of quad entries (without predicate column) - */ - public Map> partitionByPredicate() { - Map> result = new LinkedHashMap<>(); - long[] quad = new long[4]; - for (Map.Entry entry : data.entrySet()) { - index.keyToQuad(entry.getKey(), quad); - long predId = quad[QuadIndex.PRED_IDX]; - result.computeIfAbsent(predId, k -> new ArrayList<>()) - .add(new QuadEntry(quad[QuadIndex.SUBJ_IDX], quad[QuadIndex.OBJ_IDX], - quad[QuadIndex.CONTEXT_IDX], entry.getValue()[0])); + private static class RawSourceImpl implements RawEntrySource { + private final Iterator> delegate; + private Map.Entry current; + + RawSourceImpl(ConcurrentNavigableMap range) { + this.delegate = range.entrySet().iterator(); + if (delegate.hasNext()) { + current = delegate.next(); + } } - return result; - } - /** - * A quad entry with predicate removed (implicit in partition). Used for Parquet file writing. - */ - public static class QuadEntry { - public final long subject; - public final long object; - public final long context; - public final byte flag; - - public QuadEntry(long subject, long object, long context, byte flag) { - this.subject = subject; - this.object = object; - this.context = context; - this.flag = flag; + @Override + public boolean hasNext() { + return current != null; + } + + @Override + public byte[] peekKey() { + return current.getKey(); + } + + @Override + public byte peekFlag() { + return current.getValue()[0]; + } + + @Override + public void advance() { + if (delegate.hasNext()) { + current = delegate.next(); + } else { + current = null; + } } } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFileBuilder.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFileBuilder.java index d2f4379b3d9..97b34aceda5 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFileBuilder.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFileBuilder.java @@ -36,7 +36,7 @@ * *
      * List<QuadEntry> entries = ...;
    - * byte[] parquetBytes = ParquetFileBuilder.build(entries, SortOrder.SOC);
    + * byte[] parquetBytes = ParquetFileBuilder.build(entries, SortOrder.SPOC);
      * 
    */ public final class ParquetFileBuilder { @@ -52,11 +52,8 @@ private ParquetFileBuilder() { } /** - * A quad entry to be written to a Parquet file. - * - *

    - * For partitioned schemas the predicate is implicit in the partition path, so only subject, object, context, and - * flag are stored. For unpartitioned schemas, the predicate field is also written. + * A quad entry to be written to a Parquet file. All 5 fields (subject, predicate, object, context, flag) are + * stored. */ public static class QuadEntry { public final long subject; @@ -66,19 +63,7 @@ public static class QuadEntry { public final byte flag; /** - * Creates a quad entry for partitioned files (predicate implicit in path). - * - * @param subject the subject value ID - * @param object the object value ID - * @param context the context value ID - * @param flag the entry flag (e.g. insert vs tombstone) - */ - public QuadEntry(long subject, long object, long context, byte flag) { - this(subject, -1, object, context, flag); - } - - /** - * Creates a quad entry for unpartitioned files (predicate stored explicitly). + * Creates a quad entry with all components. * * @param subject the subject value ID * @param predicate the predicate value ID @@ -99,14 +84,14 @@ public QuadEntry(long subject, long predicate, long object, long context, byte f * Builds a Parquet file from the given entries using default settings. * *

    - * Uses {@link ParquetSchemas#PARTITIONED_SCHEMA}, 8 MiB row group size, and 64 KiB page size. + * Uses {@link ParquetSchemas#QUAD_SCHEMA}, 8 MiB row group size, and 64 KiB page size. * * @param entries the quad entries to write (must already be sorted) * @param sortOrder the sort order of the entries * @return the serialized Parquet file as a byte array */ public static byte[] build(List entries, ParquetSchemas.SortOrder sortOrder) { - return build(entries, ParquetSchemas.PARTITIONED_SCHEMA, sortOrder, -1, + return build(entries, ParquetSchemas.QUAD_SCHEMA, sortOrder, DEFAULT_ROW_GROUP_SIZE, DEFAULT_PAGE_SIZE); } @@ -116,13 +101,12 @@ public static byte[] build(List entries, ParquetSchemas.SortOrder sor * @param entries the quad entries to write (must already be sorted) * @param schema the Parquet schema to use * @param sortOrder the sort order of the entries - * @param predicateId the predicate ID for partitioned files (ignored for unpartitioned) * @param rowGroupSize the row group size in bytes * @param pageSize the page size in bytes * @return the serialized Parquet file as a byte array */ public static byte[] build(List entries, MessageType schema, - ParquetSchemas.SortOrder sortOrder, long predicateId, + ParquetSchemas.SortOrder sortOrder, int rowGroupSize, int pageSize) { try { ByteArrayOutputFile outputFile = new ByteArrayOutputFile(); @@ -152,12 +136,10 @@ public static byte[] build(List entries, MessageType schema, private static class QuadEntryWriteSupport extends WriteSupport { private final MessageType schema; - private final boolean hasPredicateColumn; private RecordConsumer recordConsumer; QuadEntryWriteSupport(MessageType schema) { this.schema = schema; - this.hasPredicateColumn = schema.containsField(ParquetSchemas.COL_PREDICATE); } @Override @@ -187,13 +169,11 @@ public void write(QuadEntry entry) { recordConsumer.endField(ParquetSchemas.COL_SUBJECT, fieldIndex); fieldIndex++; - // predicate (only for unpartitioned schema) - if (hasPredicateColumn) { - recordConsumer.startField(ParquetSchemas.COL_PREDICATE, fieldIndex); - recordConsumer.addLong(entry.predicate); - recordConsumer.endField(ParquetSchemas.COL_PREDICATE, fieldIndex); - fieldIndex++; - } + // predicate + recordConsumer.startField(ParquetSchemas.COL_PREDICATE, fieldIndex); + recordConsumer.addLong(entry.predicate); + recordConsumer.endField(ParquetSchemas.COL_PREDICATE, fieldIndex); + fieldIndex++; // object recordConsumer.startField(ParquetSchemas.COL_OBJECT, fieldIndex); diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetQuadSource.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetQuadSource.java index 3824e9f1001..934691984bc 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetQuadSource.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetQuadSource.java @@ -12,7 +12,6 @@ import java.io.IOException; import java.io.UncheckedIOException; -import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; @@ -29,11 +28,11 @@ /** * A {@link RawEntrySource} that reads entries from an in-memory Parquet file. Entries are sorted according to the - * file's sort order (soc, osc, cso) and emitted as varint-encoded byte[] keys with 1-byte flag values. + * file's sort order and emitted as 4-varint-encoded byte[] keys with 1-byte flag values. * *

    - * The key format encodes (value1, value2, value3) as varints in the sort order of the file. For example, an "soc" file - * produces keys as varint(subject)||varint(object)||varint(context). + * The key format encodes all four quad components (s, p, o, c) as varints in the order defined by the + * {@link QuadIndex}. *

    */ public class ParquetQuadSource implements RawEntrySource { @@ -45,10 +44,10 @@ public class ParquetQuadSource implements RawEntrySource { * Creates a source from Parquet file bytes. * * @param parquetData the complete Parquet file as byte[] - * @param sortOrder the sort order of the file ("soc", "osc", or "cso") + * @param quadIndex the quad index defining the key encoding order */ - public ParquetQuadSource(byte[] parquetData, String sortOrder) { - this.entries = readAllEntries(parquetData, sortOrder); + public ParquetQuadSource(byte[] parquetData, QuadIndex quadIndex) { + this.entries = readAllEntries(parquetData, quadIndex, -1, -1, -1, -1); this.pos = 0; } @@ -56,27 +55,15 @@ public ParquetQuadSource(byte[] parquetData, String sortOrder) { * Creates a source from Parquet file bytes with filtering. * * @param parquetData the complete Parquet file as byte[] - * @param sortOrder the sort order of the file + * @param quadIndex the quad index defining the key encoding order * @param subject subject filter, or -1 for wildcard + * @param predicate predicate filter, or -1 for wildcard * @param object object filter, or -1 for wildcard * @param context context filter, or -1 for wildcard */ - public ParquetQuadSource(byte[] parquetData, String sortOrder, long subject, long object, long context) { - List all = readAllEntries(parquetData, sortOrder); - if (subject >= 0 || object >= 0 || context >= 0) { - List filtered = new ArrayList<>(); - for (Entry e : all) { - if ((subject >= 0 && e.subject != subject) - || (object >= 0 && e.object != object) - || (context >= 0 && e.context != context)) { - continue; - } - filtered.add(e); - } - this.entries = filtered; - } else { - this.entries = all; - } + public ParquetQuadSource(byte[] parquetData, QuadIndex quadIndex, + long subject, long predicate, long object, long context) { + this.entries = readAllEntries(parquetData, quadIndex, subject, predicate, object, context); this.pos = 0; } @@ -100,7 +87,8 @@ public void advance() { pos++; } - private static List readAllEntries(byte[] parquetData, String sortOrder) { + private static List readAllEntries(byte[] parquetData, QuadIndex quadIndex, + long filterS, long filterP, long filterO, long filterC) { List result = new ArrayList<>(); ByteArrayInputFile inputFile = new ByteArrayInputFile(parquetData); @@ -120,12 +108,21 @@ private static List readAllEntries(byte[] parquetData, String sortOrder) for (long i = 0; i < rows; i++) { Group group = recordReader.read(); long subject = group.getLong(ParquetSchemas.COL_SUBJECT, 0); + long predicate = group.getLong(ParquetSchemas.COL_PREDICATE, 0); long object = group.getLong(ParquetSchemas.COL_OBJECT, 0); long context = group.getLong(ParquetSchemas.COL_CONTEXT, 0); int flag = group.getInteger(ParquetSchemas.COL_FLAG, 0); - byte[] key = encodeKey(sortOrder, subject, object, context); - result.add(new Entry(key, (byte) flag, subject, object, context)); + // Apply filters + if ((filterS >= 0 && subject != filterS) + || (filterP >= 0 && predicate != filterP) + || (filterO >= 0 && object != filterO) + || (filterC >= 0 && context != filterC)) { + continue; + } + + byte[] key = quadIndex.toKeyBytes(subject, predicate, object, context); + result.add(new Entry(key, (byte) flag)); } } } catch (IOException e) { @@ -135,51 +132,13 @@ private static List readAllEntries(byte[] parquetData, String sortOrder) return result; } - /** - * Encodes a key in the given sort order as varints. - */ - static byte[] encodeKey(String sortOrder, long subject, long object, long context) { - long v1, v2, v3; - switch (sortOrder) { - case "osc": - v1 = object; - v2 = subject; - v3 = context; - break; - case "cso": - v1 = context; - v2 = subject; - v3 = object; - break; - case "soc": - default: - v1 = subject; - v2 = object; - v3 = context; - break; - } - - int len = Varint.calcLengthUnsigned(v1) + Varint.calcLengthUnsigned(v2) + Varint.calcLengthUnsigned(v3); - ByteBuffer bb = ByteBuffer.allocate(len); - Varint.writeUnsigned(bb, v1); - Varint.writeUnsigned(bb, v2); - Varint.writeUnsigned(bb, v3); - return bb.array(); - } - private static class Entry { final byte[] key; final byte flag; - final long subject; - final long object; - final long context; - Entry(byte[] key, byte flag, long subject, long object, long context) { + Entry(byte[] key, byte flag) { this.key = key; this.flag = flag; - this.subject = subject; - this.object = object; - this.context = context; } } } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetSchemas.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetSchemas.java index d8c6e1d8b9c..817b12c952b 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetSchemas.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetSchemas.java @@ -18,13 +18,8 @@ * Parquet schema definitions for quad storage. * *

    - * Two schemas are provided: - *

      - *
    • {@link #PARTITIONED_SCHEMA} - for files within {@code predicates/{id}/} directories, where the predicate is - * implicit in the partition path.
    • - *
    • {@link #UNPARTITIONED_SCHEMA} - for files in {@code _unpartitioned/}, which include an explicit predicate - * column.
    • - *
    + * All files use {@link #QUAD_SCHEMA} with 5 columns (subject, predicate, object, context, flag). Three sort orders + * determine the key encoding: SPOC (subject-leading), OPSC (object-leading), and CSPO (context-leading). */ public final class ParquetSchemas { @@ -44,25 +39,9 @@ public final class ParquetSchemas { public static final String COL_FLAG = "flag"; /** - * Schema for partitioned Parquet files stored under {@code predicates/{id}/}. The predicate is implicit in the - * directory path and not stored as a column. + * Schema for all Parquet files. Includes all 5 columns: subject, predicate, object, context, flag. */ - public static final MessageType PARTITIONED_SCHEMA = Types.buildMessage() - .required(PrimitiveTypeName.INT64) - .named(COL_SUBJECT) - .required(PrimitiveTypeName.INT64) - .named(COL_OBJECT) - .required(PrimitiveTypeName.INT64) - .named(COL_CONTEXT) - .required(PrimitiveTypeName.INT32) - .named(COL_FLAG) - .named("quad_partitioned"); - - /** - * Schema for unpartitioned Parquet files stored under {@code _unpartitioned/}. Includes an explicit predicate - * column. - */ - public static final MessageType UNPARTITIONED_SCHEMA = Types.buildMessage() + public static final MessageType QUAD_SCHEMA = Types.buildMessage() .required(PrimitiveTypeName.INT64) .named(COL_SUBJECT) .required(PrimitiveTypeName.INT64) @@ -73,20 +52,18 @@ public final class ParquetSchemas { .named(COL_CONTEXT) .required(PrimitiveTypeName.INT32) .named(COL_FLAG) - .named("quad_unpartitioned"); + .named("quad"); /** * Sort orders for quad entries within a Parquet file. */ public enum SortOrder { - /** Subject-Object-Context ordering (partitioned). */ - SOC("soc"), - /** Object-Subject-Context ordering (partitioned). */ - OSC("osc"), - /** Context-Subject-Object ordering (partitioned). */ - CSO("cso"), - /** Subject-Predicate-Object-Context ordering (unpartitioned). */ - SPOC("spoc"); + /** Subject-Predicate-Object-Context ordering. */ + SPOC("spoc"), + /** Object-Predicate-Subject-Context ordering. */ + OPSC("opsc"), + /** Context-Subject-Predicate-Object ordering. */ + CSPO("cspo"); private final String suffix; @@ -106,7 +83,7 @@ public String suffix() { /** * Returns the SortOrder for the given suffix string. * - * @param suffix the suffix (e.g. "soc", "osc", "cso", "spoc") + * @param suffix the suffix (e.g. "spoc", "opsc", "cspo") * @return the matching SortOrder * @throws IllegalArgumentException if no match found */ diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/PartitionIndexSelector.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/PartitionIndexSelector.java deleted file mode 100644 index 58746fd0c6f..00000000000 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/PartitionIndexSelector.java +++ /dev/null @@ -1,122 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2025 Eclipse RDF4J contributors. - * - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Distribution License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/org/documents/edl-v10.php. - * - * SPDX-License-Identifier: BSD-3-Clause - *******************************************************************************/ -package org.eclipse.rdf4j.sail.s3.storage; - -/** - * Selects the best sort order for queries within a predicate partition. - * - *

    - * Within a predicate partition the predicate component is implicit, so query optimization selects among - * three-dimensional sort orders over the remaining components: subject, object, and context. - * - *

      - *
    • soc - sorted by (subject, object, context)
    • - *
    • osc - sorted by (object, subject, context)
    • - *
    • cso - sorted by (context, subject, object)
    • - *
    - * - *

    - * The selection strategy counts leading bound components for each sort order and picks the order with the highest - * score. On ties, {@code soc} is preferred as the default. - */ -public class PartitionIndexSelector { - - private PartitionIndexSelector() { - // utility class - } - - /** - * Selects the best sort order for a within-partition query. - * - *

    - * Within a partition, predicate is fixed, so we pick from: - *

      - *
    • soc: sort by (subject, object, context)
    • - *
    • osc: sort by (object, subject, context)
    • - *
    • cso: sort by (context, subject, object)
    • - *
    - * - * Each sort order is scored by counting its leading bound components. The order with the highest score wins. Ties - * are broken in favor of {@code soc}. - * - * @param subjectBound true if the subject is bound in the query - * @param objectBound true if the object is bound in the query - * @param contextBound true if the context is bound in the query - * @return the sort order suffix string: "soc", "osc", or "cso" - */ - public static String selectSortOrder(boolean subjectBound, boolean objectBound, boolean contextBound) { - // Score each sort order by counting leading bound components - - // soc: subject -> object -> context - int socScore = 0; - if (subjectBound) { - socScore++; - if (objectBound) { - socScore++; - if (contextBound) { - socScore++; - } - } - } - - // osc: object -> subject -> context - int oscScore = 0; - if (objectBound) { - oscScore++; - if (subjectBound) { - oscScore++; - if (contextBound) { - oscScore++; - } - } - } - - // cso: context -> subject -> object - int csoScore = 0; - if (contextBound) { - csoScore++; - if (subjectBound) { - csoScore++; - if (objectBound) { - csoScore++; - } - } - } - - // Pick highest score; ties prefer soc (default) - if (oscScore > socScore && oscScore > csoScore) { - return "osc"; - } - if (csoScore > socScore && csoScore > oscScore) { - return "cso"; - } - return "soc"; - } - - /** - * Returns the column order for a given sort order suffix. Used when sorting entries before writing to Parquet. - * - * @param sortOrder the sort order suffix: "soc", "osc", or "cso" - * @return array of column names in sort priority order - */ - public static String[] getColumnOrder(String sortOrder) { - switch (sortOrder) { - case "soc": - return new String[] { "subject", "object", "context" }; - case "osc": - return new String[] { "object", "subject", "context" }; - case "cso": - return new String[] { "context", "subject", "object" }; - default: - return new String[] { "subject", "object", "context" }; - } - } -} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/PartitionMergeIterator.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/PartitionMergeIterator.java deleted file mode 100644 index 3109503cb72..00000000000 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/PartitionMergeIterator.java +++ /dev/null @@ -1,183 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2025 Eclipse RDF4J contributors. - * - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Distribution License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/org/documents/edl-v10.php. - * - * SPDX-License-Identifier: BSD-3-Clause - *******************************************************************************/ -package org.eclipse.rdf4j.sail.s3.storage; - -import java.nio.ByteBuffer; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; -import java.util.PriorityQueue; - -/** - * K-way merge iterator for within-partition queries. Works with 3-varint keys (subject, object, context encoded in - * partition sort order) where the predicate is implicit in the partition directory. - * - *

    - * Sources are ordered newest-to-oldest. Deduplicates entries with the same key (newest wins), suppresses tombstones, - * and filters by expected flag. - *

    - */ -public class PartitionMergeIterator implements Iterator { - - private final long predicateId; - private final String sortOrder; - private final byte expectedFlag; - private final long patternS, patternO, patternC; - private final PriorityQueue heap; - private long[] next; - - /** - * @param sources list of sources ordered newest-to-oldest (index 0 = newest) - * @param predicateId the predicate ID for this partition (injected into results) - * @param sortOrder the sort order of all sources ("soc", "osc", or "cso") - * @param expectedFlag the flag to match (FLAG_EXPLICIT or FLAG_INFERRED) - * @param s subject pattern, or -1 for wildcard - * @param o object pattern, or -1 for wildcard - * @param c context pattern, or -1 for wildcard - */ - public PartitionMergeIterator(List sources, long predicateId, String sortOrder, - byte expectedFlag, long s, long o, long c) { - this.predicateId = predicateId; - this.sortOrder = sortOrder; - this.expectedFlag = expectedFlag; - this.patternS = s; - this.patternO = o; - this.patternC = c; - this.heap = new PriorityQueue<>(); - - for (int i = 0; i < sources.size(); i++) { - RawEntrySource src = sources.get(i); - if (src.hasNext()) { - heap.add(new SourceCursor(src, i)); - } - } - - advance(); - } - - private void advance() { - next = null; - while (!heap.isEmpty()) { - // Pop minimum key - SourceCursor min = heap.poll(); - byte[] winningKey = min.source.peekKey().clone(); - byte winningFlag = min.source.peekFlag(); - - // Advance the winning source - min.source.advance(); - if (min.source.hasNext()) { - heap.add(min); - } - - // Drain all sources with the same key (deduplication) - while (!heap.isEmpty() && Arrays.compareUnsigned(heap.peek().source.peekKey(), winningKey) == 0) { - SourceCursor dup = heap.poll(); - dup.source.advance(); - if (dup.source.hasNext()) { - heap.add(dup); - } - } - - // Tombstone suppression - if (winningFlag == MemTable.FLAG_TOMBSTONE) { - continue; - } - - // Flag filter - if (winningFlag != expectedFlag) { - continue; - } - - // Decode 3-varint key to (subject, object, context) based on sort order - long[] quad = decodePartitionKey(winningKey, sortOrder, predicateId); - - // Pattern filter - if ((patternS >= 0 && quad[QuadIndex.SUBJ_IDX] != patternS) - || (patternO >= 0 && quad[QuadIndex.OBJ_IDX] != patternO) - || (patternC >= 0 && quad[QuadIndex.CONTEXT_IDX] != patternC)) { - continue; - } - - next = quad; - return; - } - } - - /** - * Decodes a 3-varint partition key into a full SPOC quad array. - */ - static long[] decodePartitionKey(byte[] key, String sortOrder, long predicateId) { - ByteBuffer bb = ByteBuffer.wrap(key); - long v1 = Varint.readUnsigned(bb); - long v2 = Varint.readUnsigned(bb); - long v3 = Varint.readUnsigned(bb); - - long[] quad = new long[4]; - quad[QuadIndex.PRED_IDX] = predicateId; - - switch (sortOrder) { - case "osc": - quad[QuadIndex.OBJ_IDX] = v1; - quad[QuadIndex.SUBJ_IDX] = v2; - quad[QuadIndex.CONTEXT_IDX] = v3; - break; - case "cso": - quad[QuadIndex.CONTEXT_IDX] = v1; - quad[QuadIndex.SUBJ_IDX] = v2; - quad[QuadIndex.OBJ_IDX] = v3; - break; - case "soc": - default: - quad[QuadIndex.SUBJ_IDX] = v1; - quad[QuadIndex.OBJ_IDX] = v2; - quad[QuadIndex.CONTEXT_IDX] = v3; - break; - } - - return quad; - } - - @Override - public boolean hasNext() { - return next != null; - } - - @Override - public long[] next() { - if (next == null) { - throw new NoSuchElementException(); - } - long[] result = next; - advance(); - return result; - } - - private static class SourceCursor implements Comparable { - final RawEntrySource source; - final int sourceIndex; // lower = newer - - SourceCursor(RawEntrySource source, int sourceIndex) { - this.source = source; - this.sourceIndex = sourceIndex; - } - - @Override - public int compareTo(SourceCursor other) { - int keyCmp = Arrays.compareUnsigned(this.source.peekKey(), other.source.peekKey()); - if (keyCmp != 0) { - return keyCmp; - } - // Ties broken by source index: lower = newer = wins (poll first) - return Integer.compare(this.sourceIndex, other.sourceIndex); - } - } -} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java index bb0aef89ec6..181db168173 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java @@ -27,10 +27,10 @@ */ public class QuadIndex { - static final int SUBJ_IDX = 0; - static final int PRED_IDX = 1; - static final int OBJ_IDX = 2; - static final int CONTEXT_IDX = 3; + public static final int SUBJ_IDX = 0; + public static final int PRED_IDX = 1; + public static final int OBJ_IDX = 2; + public static final int CONTEXT_IDX = 3; static final int MAX_KEY_LENGTH = 4 * 9; // 4 varints, max 9 bytes each diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceTest.java index c89ae8327d1..4a8374ac5fc 100644 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceTest.java +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceTest.java @@ -13,6 +13,9 @@ import static org.junit.jupiter.api.Assertions.*; import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.model.IRI; @@ -181,6 +184,159 @@ void deleteAndRestart_deletedQuadsGone() throws Exception { } } + @Test + void multiplePredicates_allQueriesWork() throws Exception { + FileSystemObjectStore store = new FileSystemObjectStore(tempDir); + S3StoreConfig config = new S3StoreConfig(); + + IRI s1 = VF.createIRI("http://example.org/s1"); + IRI s2 = VF.createIRI("http://example.org/s2"); + IRI p1 = VF.createIRI("http://example.org/name"); + IRI p2 = VF.createIRI("http://example.org/age"); + IRI o1 = VF.createIRI("http://example.org/Alice"); + IRI o2 = VF.createIRI("http://example.org/30"); + + // Write data with multiple predicates, flush, restart + { + S3SailStore sailStore = new S3SailStore(config, store); + var source = sailStore.getExplicitSailSource(); + var sink = source.sink(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + + sink.approve(s1, p1, o1, null); + sink.approve(s1, p2, o2, null); + sink.approve(s2, p1, o2, null); + sink.flush(); + sailStore.close(); + } + + // Restart and verify queries + { + S3SailStore sailStore = new S3SailStore(config, store); + var source = sailStore.getExplicitSailSource(); + var dataset = source.dataset(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + + // All statements + List all = drain(dataset.getStatements(null, null, null)); + assertEquals(3, all.size()); + + // By predicate (p1) + List byP1 = drain(dataset.getStatements(null, p1, null)); + assertEquals(2, byP1.size()); + for (Statement st : byP1) { + assertEquals(p1.stringValue(), st.getPredicate().stringValue()); + } + + // By predicate (p2) + List byP2 = drain(dataset.getStatements(null, p2, null)); + assertEquals(1, byP2.size()); + assertEquals(p2.stringValue(), byP2.get(0).getPredicate().stringValue()); + + // By subject + List byS1 = drain(dataset.getStatements(s1, null, null)); + assertEquals(2, byS1.size()); + + // By subject + predicate + List byS1P1 = drain(dataset.getStatements(s1, p1, null)); + assertEquals(1, byS1P1.size()); + + // By object + List byO2 = drain(dataset.getStatements(null, null, o2)); + assertEquals(2, byO2.size()); + + dataset.close(); + sailStore.close(); + } + } + + @Test + void fileLayout_flatDataDirectory() throws Exception { + FileSystemObjectStore store = new FileSystemObjectStore(tempDir); + S3StoreConfig config = new S3StoreConfig(); + + IRI s = VF.createIRI("http://example.org/s1"); + IRI p = VF.createIRI("http://example.org/p1"); + IRI o = VF.createIRI("http://example.org/o1"); + + { + S3SailStore sailStore = new S3SailStore(config, store); + var source = sailStore.getExplicitSailSource(); + var sink = source.sink(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + sink.approve(s, p, o, null); + sink.flush(); + sailStore.close(); + } + + // Verify flat file paths (no predicates/ directory) + List dataFiles = store.list("data/"); + assertFalse(dataFiles.isEmpty(), "Should have data files"); + for (String key : dataFiles) { + assertFalse(key.contains("predicates/"), "Should not have predicate partitions: " + key); + assertTrue(key.startsWith("data/L0-"), "Should start with data/L0-: " + key); + assertTrue(key.endsWith(".parquet"), "Should end with .parquet: " + key); + } + + // Should have 3 files (one per sort order) + assertEquals(3, dataFiles.size(), "Should have 3 files (spoc, opsc, cspo)"); + + // Check sort orders are present + List suffixes = dataFiles.stream() + .map(k -> k.substring(k.lastIndexOf('-') + 1, k.lastIndexOf('.'))) + .collect(Collectors.toList()); + assertTrue(suffixes.contains("spoc"), "Missing spoc file"); + assertTrue(suffixes.contains("opsc"), "Missing opsc file"); + assertTrue(suffixes.contains("cspo"), "Missing cspo file"); + } + + @Test + void contextQuery_afterRestart() throws Exception { + FileSystemObjectStore store = new FileSystemObjectStore(tempDir); + S3StoreConfig config = new S3StoreConfig(); + + IRI s = VF.createIRI("http://example.org/s1"); + IRI p = VF.createIRI("http://example.org/p1"); + IRI o = VF.createIRI("http://example.org/o1"); + IRI g1 = VF.createIRI("http://example.org/graph1"); + IRI g2 = VF.createIRI("http://example.org/graph2"); + + { + S3SailStore sailStore = new S3SailStore(config, store); + var source = sailStore.getExplicitSailSource(); + var sink = source.sink(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + sink.approve(s, p, o, g1); + sink.approve(s, p, o, g2); + sink.flush(); + sailStore.close(); + } + + { + S3SailStore sailStore = new S3SailStore(config, store); + var source = sailStore.getExplicitSailSource(); + var dataset = source.dataset(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + + // Query by context g1 + List byG1 = drain( + dataset.getStatements(null, null, null, new org.eclipse.rdf4j.model.Resource[] { g1 })); + assertEquals(1, byG1.size()); + assertEquals(g1.stringValue(), byG1.get(0).getContext().stringValue()); + + // Query all + List all = drain(dataset.getStatements(null, null, null)); + assertEquals(2, all.size()); + + dataset.close(); + sailStore.close(); + } + } + + private List drain(CloseableIteration iter) { + List result = new ArrayList<>(); + while (iter.hasNext()) { + result.add(iter.next()); + } + iter.close(); + return result; + } + @Test void namespacePersistence() throws Exception { FileSystemObjectStore store = new FileSystemObjectStore(tempDir); diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/CatalogTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/CatalogTest.java new file mode 100644 index 00000000000..4e7b41700ba --- /dev/null +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/CatalogTest.java @@ -0,0 +1,150 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import static org.junit.jupiter.api.Assertions.*; + +import java.nio.file.Path; +import java.util.List; +import java.util.Set; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import com.fasterxml.jackson.databind.ObjectMapper; + +/** + * Tests for {@link Catalog} v3 — flat file list with per-file predicate statistics. + */ +class CatalogTest { + + @TempDir + Path tempDir; + + private final ObjectMapper mapper = new ObjectMapper(); + + @Test + void newCatalog_version3() { + Catalog catalog = new Catalog(); + assertEquals(3, catalog.getVersion()); + } + + @Test + void addFile_appearsInFileList() { + Catalog catalog = new Catalog(); + Catalog.ParquetFileInfo info = makeFileInfo("data/L0-00001-spoc.parquet", "spoc", 0, 1); + catalog.addFile(info); + + assertEquals(1, catalog.getFiles().size()); + assertEquals("data/L0-00001-spoc.parquet", catalog.getFiles().get(0).getS3Key()); + } + + @Test + void removeFiles_removesMatchingKeys() { + Catalog catalog = new Catalog(); + catalog.addFile(makeFileInfo("data/L0-00001-spoc.parquet", "spoc", 0, 1)); + catalog.addFile(makeFileInfo("data/L0-00001-opsc.parquet", "opsc", 0, 1)); + catalog.addFile(makeFileInfo("data/L0-00002-spoc.parquet", "spoc", 0, 2)); + + catalog.removeFiles(Set.of("data/L0-00001-spoc.parquet", "data/L0-00001-opsc.parquet")); + + assertEquals(1, catalog.getFiles().size()); + assertEquals("data/L0-00002-spoc.parquet", catalog.getFiles().get(0).getS3Key()); + } + + @Test + void getFilesForSortOrder_filtersCorrectly() { + Catalog catalog = new Catalog(); + catalog.addFile(makeFileInfo("data/L0-00001-spoc.parquet", "spoc", 0, 1)); + catalog.addFile(makeFileInfo("data/L0-00001-opsc.parquet", "opsc", 0, 1)); + catalog.addFile(makeFileInfo("data/L0-00001-cspo.parquet", "cspo", 0, 1)); + catalog.addFile(makeFileInfo("data/L0-00002-spoc.parquet", "spoc", 0, 2)); + + List spocFiles = catalog.getFilesForSortOrder("spoc"); + assertEquals(2, spocFiles.size()); + + List opscFiles = catalog.getFilesForSortOrder("opsc"); + assertEquals(1, opscFiles.size()); + + List cspoFiles = catalog.getFilesForSortOrder("cspo"); + assertEquals(1, cspoFiles.size()); + } + + @Test + void saveAndLoad_roundTrip() { + FileSystemObjectStore store = new FileSystemObjectStore(tempDir); + Catalog catalog = new Catalog(); + catalog.setNextValueId(42); + catalog.addFile(makeFileInfo("data/L0-00001-spoc.parquet", "spoc", 0, 1)); + catalog.addFile(makeFileInfo("data/L0-00001-opsc.parquet", "opsc", 0, 1)); + catalog.save(store, mapper, 5); + + Catalog loaded = Catalog.load(store, mapper); + + assertEquals(3, loaded.getVersion()); + assertEquals(5, loaded.getEpoch()); + assertEquals(42, loaded.getNextValueId()); + assertEquals(2, loaded.getFiles().size()); + } + + @Test + void parquetFileInfo_predicateStats() { + Catalog.ParquetFileInfo info = new Catalog.ParquetFileInfo( + "data/L0-00001-spoc.parquet", 0, "spoc", 100, 1, 4096, + 1, 50, // subject + 10, 20, // predicate + 5, 40, // object + 0, 99 // context + ); + + assertEquals(10, info.getMinPredicate()); + assertEquals(20, info.getMaxPredicate()); + assertEquals(1, info.getMinSubject()); + assertEquals(50, info.getMaxSubject()); + } + + @Test + void saveAndLoad_preservesPredicateStats() { + FileSystemObjectStore store = new FileSystemObjectStore(tempDir); + Catalog catalog = new Catalog(); + catalog.addFile(new Catalog.ParquetFileInfo( + "data/L0-00001-spoc.parquet", 0, "spoc", 100, 1, 4096, + 1, 50, 10, 20, 5, 40, 0, 99)); + catalog.save(store, mapper, 1); + + Catalog loaded = Catalog.load(store, mapper); + Catalog.ParquetFileInfo info = loaded.getFiles().get(0); + + assertEquals(10, info.getMinPredicate()); + assertEquals(20, info.getMaxPredicate()); + assertEquals(1, info.getMinSubject()); + assertEquals(50, info.getMaxSubject()); + assertEquals(5, info.getMinObject()); + assertEquals(40, info.getMaxObject()); + assertEquals(0, info.getMinContext()); + assertEquals(99, info.getMaxContext()); + } + + @Test + void loadEmpty_returnsDefaultCatalog() { + FileSystemObjectStore store = new FileSystemObjectStore(tempDir); + Catalog loaded = Catalog.load(store, mapper); + + assertEquals(3, loaded.getVersion()); + assertEquals(0, loaded.getEpoch()); + assertTrue(loaded.getFiles().isEmpty()); + } + + private Catalog.ParquetFileInfo makeFileInfo(String s3Key, String sortOrder, int level, long epoch) { + return new Catalog.ParquetFileInfo(s3Key, level, sortOrder, 10, epoch, 1024, + 1, 100, 1, 100, 1, 100, 0, 100); + } +} diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MemTableReorderTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MemTableReorderTest.java new file mode 100644 index 00000000000..f7af42af634 --- /dev/null +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MemTableReorderTest.java @@ -0,0 +1,163 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import static org.junit.jupiter.api.Assertions.*; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.jupiter.api.Test; + +/** + * Tests for {@link MemTable#asRawSource(QuadIndex, long, long, long, long)} — re-encoding keys from native SPOC order + * into a different target index order. + */ +class MemTableReorderTest { + + private final QuadIndex spoc = new QuadIndex("spoc"); + private final QuadIndex opsc = new QuadIndex("opsc"); + private final QuadIndex cspo = new QuadIndex("cspo"); + + @Test + void sameIndex_delegatesToNativeSource() { + MemTable mt = new MemTable(spoc); + mt.put(10, 20, 30, 40, true); + mt.put(1, 2, 3, 4, true); + + RawEntrySource source = mt.asRawSource(spoc, -1, -1, -1, -1); + List results = drain(source, spoc); + + assertEquals(2, results.size()); + // SPOC order: (1,2,3,4) before (10,20,30,40) + assertArrayEquals(new long[] { 1, 2, 3, 4 }, results.get(0)); + assertArrayEquals(new long[] { 10, 20, 30, 40 }, results.get(1)); + } + + @Test + void reorderToOPSC_sortsByObjectFirst() { + MemTable mt = new MemTable(spoc); + mt.put(1, 2, 30, 4, true); // object=30 + mt.put(5, 6, 10, 8, true); // object=10 + mt.put(9, 10, 20, 12, true); // object=20 + + RawEntrySource source = mt.asRawSource(opsc, -1, -1, -1, -1); + List results = drain(source, opsc); + + assertEquals(3, results.size()); + // OPSC order: sorted by object: 10, 20, 30 + assertEquals(10, results.get(0)[QuadIndex.OBJ_IDX]); + assertEquals(20, results.get(1)[QuadIndex.OBJ_IDX]); + assertEquals(30, results.get(2)[QuadIndex.OBJ_IDX]); + } + + @Test + void reorderToCSPO_sortsByContextFirst() { + MemTable mt = new MemTable(spoc); + mt.put(1, 2, 3, 30, true); // context=30 + mt.put(4, 5, 6, 10, true); // context=10 + mt.put(7, 8, 9, 20, true); // context=20 + + RawEntrySource source = mt.asRawSource(cspo, -1, -1, -1, -1); + List results = drain(source, cspo); + + assertEquals(3, results.size()); + // CSPO order: sorted by context: 10, 20, 30 + assertEquals(10, results.get(0)[QuadIndex.CONTEXT_IDX]); + assertEquals(20, results.get(1)[QuadIndex.CONTEXT_IDX]); + assertEquals(30, results.get(2)[QuadIndex.CONTEXT_IDX]); + } + + @Test + void reorderedSource_preservesAllComponents() { + MemTable mt = new MemTable(spoc); + mt.put(11, 22, 33, 44, true); + + RawEntrySource source = mt.asRawSource(opsc, -1, -1, -1, -1); + List results = drain(source, opsc); + + assertEquals(1, results.size()); + assertEquals(11, results.get(0)[QuadIndex.SUBJ_IDX]); + assertEquals(22, results.get(0)[QuadIndex.PRED_IDX]); + assertEquals(33, results.get(0)[QuadIndex.OBJ_IDX]); + assertEquals(44, results.get(0)[QuadIndex.CONTEXT_IDX]); + } + + @Test + void reorderedSource_appliesSubjectFilter() { + MemTable mt = new MemTable(spoc); + mt.put(1, 2, 3, 0, true); + mt.put(5, 6, 7, 0, true); + + RawEntrySource source = mt.asRawSource(opsc, 1, -1, -1, -1); + List results = drain(source, opsc); + + assertEquals(1, results.size()); + assertEquals(1, results.get(0)[QuadIndex.SUBJ_IDX]); + } + + @Test + void reorderedSource_appliesPredicateFilter() { + MemTable mt = new MemTable(spoc); + mt.put(1, 10, 3, 0, true); + mt.put(2, 20, 4, 0, true); + mt.put(3, 10, 5, 0, true); + + RawEntrySource source = mt.asRawSource(cspo, -1, 10, -1, -1); + List results = drain(source, cspo); + + assertEquals(2, results.size()); + for (long[] q : results) { + assertEquals(10, q[QuadIndex.PRED_IDX]); + } + } + + @Test + void reorderedSource_includesAliveAndTombstones() { + MemTable mt = new MemTable(spoc); + mt.put(1, 2, 3, 0, true); + mt.remove(5, 6, 7, 0, true); + + RawEntrySource source = mt.asRawSource(opsc, -1, -1, -1, -1); + + int count = 0; + boolean foundTombstone = false; + while (source.hasNext()) { + if (source.peekFlag() == MemTable.FLAG_TOMBSTONE) { + foundTombstone = true; + } + source.advance(); + count++; + } + + assertEquals(2, count); + assertTrue(foundTombstone, "RawEntrySource should include tombstones"); + } + + @Test + void reorderedSource_emptyTable_returnsEmpty() { + MemTable mt = new MemTable(spoc); + + RawEntrySource source = mt.asRawSource(opsc, -1, -1, -1, -1); + assertFalse(source.hasNext()); + } + + private List drain(RawEntrySource source, QuadIndex decodeIndex) { + List result = new ArrayList<>(); + while (source.hasNext()) { + long[] quad = new long[4]; + decodeIndex.keyToQuad(source.peekKey(), quad); + result.add(quad); + source.advance(); + } + return result; + } +} diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/ParquetRoundTripTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/ParquetRoundTripTest.java new file mode 100644 index 00000000000..0f74d9fea4e --- /dev/null +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/ParquetRoundTripTest.java @@ -0,0 +1,199 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import static org.junit.jupiter.api.Assertions.*; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.jupiter.api.Test; + +/** + * Tests for Parquet write/read round-trips using {@link ParquetFileBuilder} and {@link ParquetQuadSource} with the + * {@link ParquetSchemas#QUAD_SCHEMA} (5 columns, 4-varint keys). + */ +class ParquetRoundTripTest { + + @Test + void roundTrip_spocOrder_allFieldsPreserved() { + QuadIndex spoc = new QuadIndex("spoc"); + List entries = List.of( + new ParquetFileBuilder.QuadEntry(1, 2, 3, 4, MemTable.FLAG_EXPLICIT), + new ParquetFileBuilder.QuadEntry(5, 6, 7, 8, MemTable.FLAG_INFERRED), + new ParquetFileBuilder.QuadEntry(9, 10, 11, 0, MemTable.FLAG_TOMBSTONE)); + + byte[] parquetData = ParquetFileBuilder.build(entries, ParquetSchemas.SortOrder.SPOC); + ParquetQuadSource source = new ParquetQuadSource(parquetData, spoc); + + List results = drainWithFlags(source, spoc); + assertEquals(3, results.size()); + + // First entry: (1,2,3,4) FLAG_EXPLICIT + assertArrayEquals(new long[] { 1, 2, 3, 4, MemTable.FLAG_EXPLICIT }, results.get(0)); + // Second entry: (5,6,7,8) FLAG_INFERRED + assertArrayEquals(new long[] { 5, 6, 7, 8, MemTable.FLAG_INFERRED }, results.get(1)); + // Third entry: (9,10,11,0) FLAG_TOMBSTONE + assertArrayEquals(new long[] { 9, 10, 11, 0, MemTable.FLAG_TOMBSTONE }, results.get(2)); + } + + @Test + void roundTrip_opscOrder_keysSortedByObject() { + QuadIndex opsc = new QuadIndex("opsc"); + // Written sorted in OPSC order (by object: 10, 20, 30) + List entries = List.of( + new ParquetFileBuilder.QuadEntry(100, 200, 10, 0, MemTable.FLAG_EXPLICIT), + new ParquetFileBuilder.QuadEntry(300, 400, 20, 0, MemTable.FLAG_EXPLICIT), + new ParquetFileBuilder.QuadEntry(500, 600, 30, 0, MemTable.FLAG_EXPLICIT)); + + byte[] parquetData = ParquetFileBuilder.build(entries, ParquetSchemas.SortOrder.OPSC); + ParquetQuadSource source = new ParquetQuadSource(parquetData, opsc); + + List results = drain(source, opsc); + assertEquals(3, results.size()); + // Keys should be in OPSC order (object first) + assertEquals(10, results.get(0)[QuadIndex.OBJ_IDX]); + assertEquals(20, results.get(1)[QuadIndex.OBJ_IDX]); + assertEquals(30, results.get(2)[QuadIndex.OBJ_IDX]); + } + + @Test + void roundTrip_cspoOrder_keysSortedByContext() { + QuadIndex cspo = new QuadIndex("cspo"); + // Written sorted in CSPO order (by context: 5, 10, 15) + List entries = List.of( + new ParquetFileBuilder.QuadEntry(1, 2, 3, 5, MemTable.FLAG_EXPLICIT), + new ParquetFileBuilder.QuadEntry(4, 5, 6, 10, MemTable.FLAG_EXPLICIT), + new ParquetFileBuilder.QuadEntry(7, 8, 9, 15, MemTable.FLAG_EXPLICIT)); + + byte[] parquetData = ParquetFileBuilder.build(entries, ParquetSchemas.SortOrder.CSPO); + ParquetQuadSource source = new ParquetQuadSource(parquetData, cspo); + + List results = drain(source, cspo); + assertEquals(3, results.size()); + assertEquals(5, results.get(0)[QuadIndex.CONTEXT_IDX]); + assertEquals(10, results.get(1)[QuadIndex.CONTEXT_IDX]); + assertEquals(15, results.get(2)[QuadIndex.CONTEXT_IDX]); + } + + @Test + void roundTrip_filterBySubject() { + QuadIndex spoc = new QuadIndex("spoc"); + List entries = List.of( + new ParquetFileBuilder.QuadEntry(1, 2, 3, 0, MemTable.FLAG_EXPLICIT), + new ParquetFileBuilder.QuadEntry(5, 6, 7, 0, MemTable.FLAG_EXPLICIT), + new ParquetFileBuilder.QuadEntry(10, 11, 12, 0, MemTable.FLAG_EXPLICIT)); + + byte[] parquetData = ParquetFileBuilder.build(entries, ParquetSchemas.SortOrder.SPOC); + ParquetQuadSource source = new ParquetQuadSource(parquetData, spoc, 5, -1, -1, -1); + + List results = drain(source, spoc); + assertEquals(1, results.size()); + assertEquals(5, results.get(0)[QuadIndex.SUBJ_IDX]); + } + + @Test + void roundTrip_filterByPredicate() { + QuadIndex spoc = new QuadIndex("spoc"); + List entries = List.of( + new ParquetFileBuilder.QuadEntry(1, 10, 3, 0, MemTable.FLAG_EXPLICIT), + new ParquetFileBuilder.QuadEntry(2, 20, 4, 0, MemTable.FLAG_EXPLICIT), + new ParquetFileBuilder.QuadEntry(3, 10, 5, 0, MemTable.FLAG_EXPLICIT)); + + byte[] parquetData = ParquetFileBuilder.build(entries, ParquetSchemas.SortOrder.SPOC); + ParquetQuadSource source = new ParquetQuadSource(parquetData, spoc, -1, 10, -1, -1); + + List results = drain(source, spoc); + assertEquals(2, results.size()); + for (long[] q : results) { + assertEquals(10, q[QuadIndex.PRED_IDX]); + } + } + + @Test + void roundTrip_filterByMultipleComponents() { + QuadIndex spoc = new QuadIndex("spoc"); + List entries = List.of( + new ParquetFileBuilder.QuadEntry(1, 2, 3, 4, MemTable.FLAG_EXPLICIT), + new ParquetFileBuilder.QuadEntry(1, 2, 99, 4, MemTable.FLAG_EXPLICIT), + new ParquetFileBuilder.QuadEntry(1, 99, 3, 4, MemTable.FLAG_EXPLICIT)); + + byte[] parquetData = ParquetFileBuilder.build(entries, ParquetSchemas.SortOrder.SPOC); + ParquetQuadSource source = new ParquetQuadSource(parquetData, spoc, 1, 2, 3, 4); + + List results = drain(source, spoc); + assertEquals(1, results.size()); + assertArrayEquals(new long[] { 1, 2, 3, 4 }, results.get(0)); + } + + @Test + void roundTrip_emptyFile() { + QuadIndex spoc = new QuadIndex("spoc"); + byte[] parquetData = ParquetFileBuilder.build(List.of(), ParquetSchemas.SortOrder.SPOC); + ParquetQuadSource source = new ParquetQuadSource(parquetData, spoc); + assertFalse(source.hasNext()); + } + + @Test + void mergeIterator_acrossParquetSources() { + QuadIndex spoc = new QuadIndex("spoc"); + + // File 1: newer epoch + List file1 = List.of( + new ParquetFileBuilder.QuadEntry(1, 2, 3, 0, MemTable.FLAG_EXPLICIT), + new ParquetFileBuilder.QuadEntry(5, 6, 7, 0, MemTable.FLAG_EXPLICIT)); + byte[] data1 = ParquetFileBuilder.build(file1, ParquetSchemas.SortOrder.SPOC); + + // File 2: older epoch, overlaps on (1,2,3,0) + List file2 = List.of( + new ParquetFileBuilder.QuadEntry(1, 2, 3, 0, MemTable.FLAG_INFERRED), + new ParquetFileBuilder.QuadEntry(10, 11, 12, 0, MemTable.FLAG_EXPLICIT)); + byte[] data2 = ParquetFileBuilder.build(file2, ParquetSchemas.SortOrder.SPOC); + + List sources = List.of( + new ParquetQuadSource(data1, spoc), + new ParquetQuadSource(data2, spoc)); + + MergeIterator iter = new MergeIterator(sources, spoc, MemTable.FLAG_EXPLICIT, -1, -1, -1, -1); + List results = new ArrayList<>(); + while (iter.hasNext()) { + results.add(iter.next()); + } + + // (1,2,3,0) from newer file is explicit → included + // (5,6,7,0) explicit → included + // (10,11,12,0) explicit → included + assertEquals(3, results.size()); + } + + private List drain(ParquetQuadSource source, QuadIndex decodeIndex) { + List result = new ArrayList<>(); + while (source.hasNext()) { + long[] quad = new long[4]; + decodeIndex.keyToQuad(source.peekKey(), quad); + result.add(quad); + source.advance(); + } + return result; + } + + private List drainWithFlags(ParquetQuadSource source, QuadIndex decodeIndex) { + List result = new ArrayList<>(); + while (source.hasNext()) { + long[] quad = new long[4]; + decodeIndex.keyToQuad(source.peekKey(), quad); + long[] withFlag = new long[] { quad[0], quad[1], quad[2], quad[3], source.peekFlag() }; + result.add(withFlag); + source.advance(); + } + return result; + } +} diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndexSelectionTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndexSelectionTest.java new file mode 100644 index 00000000000..8fa8b7082b0 --- /dev/null +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndexSelectionTest.java @@ -0,0 +1,106 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import static org.junit.jupiter.api.Assertions.*; + +import java.util.List; + +import org.junit.jupiter.api.Test; + +/** + * Tests for {@link QuadIndex#getBestIndex(List, long, long, long, long)} — ensures the best sort order is selected for + * different query patterns. + */ +class QuadIndexSelectionTest { + + private static final QuadIndex SPOC = new QuadIndex("spoc"); + private static final QuadIndex OPSC = new QuadIndex("opsc"); + private static final QuadIndex CSPO = new QuadIndex("cspo"); + private static final List ALL = List.of(SPOC, OPSC, CSPO); + + @Test + void subjectBound_selectsSPOC() { + QuadIndex best = QuadIndex.getBestIndex(ALL, 1, -1, -1, -1); + assertEquals("spoc", best.getFieldSeqString()); + } + + @Test + void objectBound_selectsOPSC() { + QuadIndex best = QuadIndex.getBestIndex(ALL, -1, -1, 1, -1); + assertEquals("opsc", best.getFieldSeqString()); + } + + @Test + void contextBound_selectsCSPO() { + QuadIndex best = QuadIndex.getBestIndex(ALL, -1, -1, -1, 1); + assertEquals("cspo", best.getFieldSeqString()); + } + + @Test + void subjectAndPredicateBound_selectsSPOC() { + QuadIndex best = QuadIndex.getBestIndex(ALL, 1, 2, -1, -1); + assertEquals("spoc", best.getFieldSeqString()); + } + + @Test + void allBound_selectsSPOC() { + QuadIndex best = QuadIndex.getBestIndex(ALL, 1, 2, 3, 4); + assertEquals("spoc", best.getFieldSeqString()); + } + + @Test + void noneBound_selectsSPOC_asDefault() { + QuadIndex best = QuadIndex.getBestIndex(ALL, -1, -1, -1, -1); + // All have score 0; SPOC is first in the list so it wins ties + assertEquals("spoc", best.getFieldSeqString()); + } + + @Test + void predicateOnlyBound_selectsSPOC() { + // Predicate is second in SPOC, second in OPSC, third in CSPO — all have score 0 + // SPOC wins as first in list + QuadIndex best = QuadIndex.getBestIndex(ALL, -1, 5, -1, -1); + assertEquals("spoc", best.getFieldSeqString()); + } + + @Test + void objectAndPredicate_selectsOPSC() { + // OPSC: o(bound)=1, p(bound)=2, score=2 + // SPOC: s(unbound)=0, score=0 + // CSPO: c(unbound)=0, score=0 + QuadIndex best = QuadIndex.getBestIndex(ALL, -1, 5, 10, -1); + assertEquals("opsc", best.getFieldSeqString()); + } + + @Test + void contextAndSubject_selectsCSPO() { + // CSPO: c(bound)=1, s(bound)=2, score=2 + // SPOC: s(bound)=1, p(unbound), score=1 + // OPSC: o(unbound), score=0 + QuadIndex best = QuadIndex.getBestIndex(ALL, 1, -1, -1, 5); + assertEquals("cspo", best.getFieldSeqString()); + } + + @Test + void patternScore_countsLeadingBound() { + assertEquals(4, SPOC.getPatternScore(1, 2, 3, 4)); + assertEquals(2, SPOC.getPatternScore(1, 2, -1, -1)); + assertEquals(1, SPOC.getPatternScore(1, -1, -1, -1)); + assertEquals(0, SPOC.getPatternScore(-1, 2, 3, 4)); // s unbound → 0 + + assertEquals(2, OPSC.getPatternScore(-1, 2, 3, -1)); // o=3 bound, p=2 bound → 2 + assertEquals(0, OPSC.getPatternScore(1, -1, -1, -1)); // o unbound → 0 + + assertEquals(1, CSPO.getPatternScore(-1, -1, -1, 5)); // c=5 bound → 1 + assertEquals(3, CSPO.getPatternScore(1, 2, -1, 5)); // c=5, s=1, p=2 → 3 + } +} diff --git a/tools/workbench/src/main/webapp/transformations/create-s3.xsl b/tools/workbench/src/main/webapp/transformations/create-s3.xsl new file mode 100644 index 00000000000..911e25193b1 --- /dev/null +++ b/tools/workbench/src/main/webapp/transformations/create-s3.xsl @@ -0,0 +1,75 @@ + + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + +
    + + + +
    + + + +
    S3 Prefix + +
    + + +
    +
    + +
    + +
    diff --git a/tools/workbench/src/main/webapp/transformations/create.xsl b/tools/workbench/src/main/webapp/transformations/create.xsl index d2ced21ed14..08705d11ab9 100644 --- a/tools/workbench/src/main/webapp/transformations/create.xsl +++ b/tools/workbench/src/main/webapp/transformations/create.xsl @@ -74,6 +74,7 @@ + From eeabec97ebe18b7142aba84beb34152ef894d9f2 Mon Sep 17 00:00:00 2001 From: Chengxu Bian Date: Fri, 27 Feb 2026 23:06:25 -0500 Subject: [PATCH 05/10] refactor: simplify S3 SAIL codebase and promote FileSystemObjectStore - Promote FileSystemObjectStore from test to production, enabling 3-mode backend selection (S3 / filesystem / in-memory) via config - Extract QuadStats value type to deduplicate stats computation in S3SailStore and Compactor - Add QuadIndex.matches() helper, eliminating 4-place quad-filter duplication across MergeIterator, MemTable, and ParquetQuadSource - Extract hasPersistence(), queryQuads(), resolveValueId() helpers in S3SailStore to remove repeated guard logic - Split flushToObjectStore() into focused methods - Merge CompactionPolicy.shouldCompactL0/L1 into shouldCompact() - Remove unused explicit param from MemTable.remove() - Delete dead ParquetFilterBuilder (zero usages) - Fix QuadIndex wildcard sentinel inconsistency in getMaxKey - Narrow Throwable catch to Exception in S3Store - Add dataDir config field for filesystem persistence mode --- .../eclipse/rdf4j/repository/config/s3.ttl | 3 +- .../eclipse/rdf4j/sail/s3/S3SailStore.java | 231 +++++++----------- .../org/eclipse/rdf4j/sail/s3/S3Store.java | 9 +- .../rdf4j/sail/s3/config/S3StoreConfig.java | 20 ++ .../rdf4j/sail/s3/config/S3StoreSchema.java | 6 + .../rdf4j/sail/s3/storage/Catalog.java | 7 + .../sail/s3/storage/CompactionPolicy.java | 22 +- .../rdf4j/sail/s3/storage/Compactor.java | 37 +-- .../s3/storage/FileSystemObjectStore.java | 26 +- .../rdf4j/sail/s3/storage/MemTable.java | 22 +- .../rdf4j/sail/s3/storage/MergeIterator.java | 5 +- .../sail/s3/storage/ParquetFilterBuilder.java | 82 ------- .../sail/s3/storage/ParquetQuadSource.java | 10 +- .../rdf4j/sail/s3/storage/QuadIndex.java | 45 +++- .../rdf4j/sail/s3/storage/QuadStats.java | 80 ++++++ .../sail/s3/storage/MemTableReorderTest.java | 2 +- .../sail/s3/storage/MergeIteratorTest.java | 4 +- .../main/webapp/transformations/create-s3.xsl | 7 + 18 files changed, 301 insertions(+), 317 deletions(-) rename core/sail/s3/src/{test => main}/java/org/eclipse/rdf4j/sail/s3/storage/FileSystemObjectStore.java (78%) delete mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFilterBuilder.java create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadStats.java diff --git a/core/repository/api/src/main/resources/org/eclipse/rdf4j/repository/config/s3.ttl b/core/repository/api/src/main/resources/org/eclipse/rdf4j/repository/config/s3.ttl index 3806dc0bed4..78da5e4c7fc 100644 --- a/core/repository/api/src/main/resources/org/eclipse/rdf4j/repository/config/s3.ttl +++ b/core/repository/api/src/main/resources/org/eclipse/rdf4j/repository/config/s3.ttl @@ -20,6 +20,7 @@ config:rep.type "openrdf:SailRepository" ; config:sail.impl [ config:sail.type "rdf4j:S3Store" ; - s3:s3Prefix "{%S3 Prefix|%}" + s3:s3Prefix "{%S3 Prefix|%}" ; + s3:dataDir "{%Data Directory|%}" ] ]. diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java index 8aec14e4df3..565fce11ca5 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java @@ -48,6 +48,7 @@ import org.eclipse.rdf4j.sail.s3.storage.Catalog; import org.eclipse.rdf4j.sail.s3.storage.CompactionPolicy; import org.eclipse.rdf4j.sail.s3.storage.Compactor; +import org.eclipse.rdf4j.sail.s3.storage.FileSystemObjectStore; import org.eclipse.rdf4j.sail.s3.storage.MemTable; import org.eclipse.rdf4j.sail.s3.storage.MergeIterator; import org.eclipse.rdf4j.sail.s3.storage.ObjectStore; @@ -55,6 +56,7 @@ import org.eclipse.rdf4j.sail.s3.storage.ParquetQuadSource; import org.eclipse.rdf4j.sail.s3.storage.ParquetSchemas; import org.eclipse.rdf4j.sail.s3.storage.QuadIndex; +import org.eclipse.rdf4j.sail.s3.storage.QuadStats; import org.eclipse.rdf4j.sail.s3.storage.RawEntrySource; import org.eclipse.rdf4j.sail.s3.storage.S3ObjectStore; import org.slf4j.Logger; @@ -97,7 +99,7 @@ class S3SailStore implements SailStore { // Persistence fields (null when S3 is not configured) private final ObjectStore objectStore; private final ObjectMapper jsonMapper; - private Catalog catalog; + private final Catalog catalog; private final AtomicLong epochCounter; private final long memTableFlushSize; private final TieredCache cache; @@ -112,11 +114,20 @@ class S3SailStore implements SailStore { private final ReentrantLock sinkStoreAccessLock = new ReentrantLock(); S3SailStore(S3StoreConfig config) { - this(config, config.isS3Configured() - ? new S3ObjectStore(config.getS3Bucket(), config.getS3Endpoint(), config.getS3Region(), - config.getS3Prefix(), config.getS3AccessKey(), config.getS3SecretKey(), - config.isS3ForcePathStyle()) - : null); + this(config, createObjectStore(config)); + } + + private static ObjectStore createObjectStore(S3StoreConfig config) { + if (config.isS3Configured()) { + return new S3ObjectStore(config.getS3Bucket(), config.getS3Endpoint(), config.getS3Region(), + config.getS3Prefix(), config.getS3AccessKey(), config.getS3SecretKey(), + config.isS3ForcePathStyle()); + } + String dataDir = config.getDataDir(); + if (dataDir != null && !dataDir.isEmpty()) { + return new FileSystemObjectStore(Path.of(dataDir)); + } + return null; // in-memory only } /** @@ -154,6 +165,7 @@ class S3SailStore implements SailStore { namespaceStore.deserialize(objectStore, jsonMapper); } else { this.jsonMapper = null; + this.catalog = null; this.epochCounter = null; this.cache = null; this.compactionPolicy = null; @@ -206,25 +218,24 @@ private void flushToObjectStore() { return; } - if (memTable.size() == 0) { - // Still persist value store and namespaces - long epoch = epochCounter.getAndIncrement(); - valueStore.serialize(objectStore); - namespaceStore.serialize(objectStore, jsonMapper); - catalog.setNextValueId(valueStore.getNextId()); - catalog.setEpoch(epoch); - catalog.save(objectStore, jsonMapper, epoch); - return; - } - long epoch = epochCounter.getAndIncrement(); - // Freeze active MemTable and swap in fresh one - MemTable frozen = memTable; - frozen.freeze(); - memTable = new MemTable(SPOC_INDEX); + if (memTable.size() > 0) { + // Freeze active MemTable and swap in fresh one + MemTable frozen = memTable; + frozen.freeze(); + memTable = new MemTable(SPOC_INDEX); - // Collect all entries as full quads + List allQuads = collectQuads(frozen); + QuadStats stats = QuadStats.fromQuads(allQuads); + writeParquetFiles(epoch, allQuads, stats); + } + + persistMetadata(epoch); + runCompactionIfNeeded(); + } + + private static List collectQuads(MemTable frozen) { List allQuads = new ArrayList<>(frozen.size()); long[] quad = new long[4]; for (Map.Entry entry : frozen.getData().entrySet()) { @@ -237,61 +248,36 @@ private void flushToObjectStore() { q[4] = entry.getValue()[0]; allQuads.add(q); } + return allQuads; + } - // Compute stats across all entries - long minSubject = Long.MAX_VALUE, maxSubject = Long.MIN_VALUE; - long minPredicate = Long.MAX_VALUE, maxPredicate = Long.MIN_VALUE; - long minObject = Long.MAX_VALUE, maxObject = Long.MIN_VALUE; - long minContext = Long.MAX_VALUE, maxContext = Long.MIN_VALUE; - for (long[] q : allQuads) { - minSubject = Math.min(minSubject, q[0]); - maxSubject = Math.max(maxSubject, q[0]); - minPredicate = Math.min(minPredicate, q[1]); - maxPredicate = Math.max(maxPredicate, q[1]); - minObject = Math.min(minObject, q[2]); - maxObject = Math.max(maxObject, q[2]); - minContext = Math.min(minContext, q[3]); - maxContext = Math.max(maxContext, q[3]); - } - - // For each sort order, sort and write one Parquet file + private void writeParquetFiles(long epoch, List allQuads, QuadStats stats) { for (QuadIndex sortIndex : ALL_INDEXES) { String sortSuffix = sortIndex.getFieldSeqString(); - - // Sort entries according to the sort order List sorted = sortQuadEntries(allQuads, sortIndex); - // Build Parquet file ParquetSchemas.SortOrder sortOrder = ParquetSchemas.SortOrder.fromSuffix(sortSuffix); byte[] parquetData = ParquetFileBuilder.build(sorted, ParquetSchemas.QUAD_SCHEMA, sortOrder, rowGroupSize, pageSize); String s3Key = "data/L0-" + String.format("%05d", epoch) + "-" + sortSuffix + ".parquet"; - objectStore.put(s3Key, parquetData); - // Write-through to cache if (cache != null) { cache.writeThrough(s3Key, parquetData); } catalog.addFile(new Catalog.ParquetFileInfo( - s3Key, 0, sortSuffix, sorted.size(), epoch, parquetData.length, - minSubject, maxSubject, minPredicate, maxPredicate, - minObject, maxObject, minContext, maxContext)); + s3Key, 0, sortSuffix, sorted.size(), epoch, parquetData.length, stats)); } + } - // Persist value store and namespaces + private void persistMetadata(long epoch) { valueStore.serialize(objectStore); namespaceStore.serialize(objectStore, jsonMapper); - - // Atomic catalog update catalog.setNextValueId(valueStore.getNextId()); catalog.setEpoch(epoch); catalog.save(objectStore, jsonMapper, epoch); - - // Check compaction triggers - runCompactionIfNeeded(); } /** @@ -302,7 +288,7 @@ private static List sortQuadEntries(List q String seq = sortIndex.getFieldSeqString(); sorted.sort((a, b) -> { for (int i = 0; i < 4; i++) { - int idx = fieldCharToIdx(seq.charAt(i)); + int idx = QuadIndex.fieldCharToIdx(seq.charAt(i)); int cmp = Long.compare(a[idx], b[idx]); if (cmp != 0) { return cmp; @@ -318,21 +304,6 @@ private static List sortQuadEntries(List q return result; } - private static int fieldCharToIdx(char c) { - switch (c) { - case 's': - return 0; - case 'p': - return 1; - case 'o': - return 2; - case 'c': - return 3; - default: - throw new IllegalArgumentException("Invalid field: " + c); - } - } - /** * Checks compaction triggers and runs compaction if needed. */ @@ -344,17 +315,15 @@ private void runCompactionIfNeeded() { List files = catalog.getFiles(); // L0→L1 compaction - if (compactionPolicy.shouldCompactL0(files)) { + if (compactionPolicy.shouldCompact(files, 0)) { List l0Files = CompactionPolicy.filesAtLevel(files, 0); long compactEpoch = epochCounter.getAndIncrement(); compactor.compact(l0Files, 0, 1, compactEpoch, catalog); - - // Re-fetch files after compaction files = catalog.getFiles(); } // L1→L2 compaction - if (compactionPolicy.shouldCompactL1(files)) { + if (compactionPolicy.shouldCompact(files, 1)) { List l1Files = CompactionPolicy.filesAtLevel(files, 1); long compactEpoch = epochCounter.getAndIncrement(); compactor.compact(l1Files, 1, 2, compactEpoch, catalog); @@ -366,6 +335,30 @@ private void runCompactionIfNeeded() { catalog.save(objectStore, jsonMapper, epoch); } + private boolean hasPersistence() { + return objectStore != null; + } + + /** + * Queries quads using the best available source (merged Parquet + MemTable, or MemTable only). + */ + private Iterator queryQuads(long s, long p, long o, long c, boolean explicit) { + return hasPersistence() + ? createMergedIterator(s, p, o, c, explicit) + : memTable.scan(s, p, o, c, explicit); + } + + /** + * Resolves a Value to its stored ID. Returns UNKNOWN_ID if the value is null, or the stored ID (which may be + * UNKNOWN_ID if the value is not in the store). + */ + private long resolveValueId(Value value) { + if (value == null) { + return S3ValueStore.UNKNOWN_ID; + } + return valueStore.getId(value); + } + /** * Creates a statement iterator for the given pattern using stats-based pruning. */ @@ -376,28 +369,19 @@ CloseableIteration createStatementIterator( return new EmptyIteration<>(); } - long subjID = S3ValueStore.UNKNOWN_ID; - if (subj != null) { - subjID = valueStore.getId(subj); - if (subjID == S3ValueStore.UNKNOWN_ID) { - return new EmptyIteration<>(); - } + long subjID = resolveValueId(subj); + if (subj != null && subjID == S3ValueStore.UNKNOWN_ID) { + return new EmptyIteration<>(); } - long predID = S3ValueStore.UNKNOWN_ID; - if (pred != null) { - predID = valueStore.getId(pred); - if (predID == S3ValueStore.UNKNOWN_ID) { - return new EmptyIteration<>(); - } + long predID = resolveValueId(pred); + if (pred != null && predID == S3ValueStore.UNKNOWN_ID) { + return new EmptyIteration<>(); } - long objID = S3ValueStore.UNKNOWN_ID; - if (obj != null) { - objID = valueStore.getId(obj); - if (objID == S3ValueStore.UNKNOWN_ID) { - return new EmptyIteration<>(); - } + long objID = resolveValueId(obj); + if (obj != null && objID == S3ValueStore.UNKNOWN_ID) { + return new EmptyIteration<>(); } List contextIDList = new ArrayList<>(contexts.length == 0 ? 1 : contexts.length); @@ -420,17 +404,10 @@ CloseableIteration createStatementIterator( return new EmptyIteration<>(); } - boolean hasPersistence = objectStore != null && catalog != null; - ArrayList> perContextIterList = new ArrayList<>(contextIDList.size()); for (long contextID : contextIDList) { - Iterator quads; - if (hasPersistence) { - quads = createMergedIterator(subjID, predID, objID, contextID, explicit); - } else { - quads = memTable.scan(subjID, predID, objID, contextID, explicit); - } + Iterator quads = queryQuads(subjID, predID, objID, contextID, explicit); perContextIterList.add(new QuadToStatementIteration(quads, valueStore)); } @@ -665,34 +642,19 @@ private long removeStatements(Resource subj, IRI pred, Value obj, boolean explic sinkStoreAccessLock.lock(); try { - final long subjID; - if (subj != null) { - subjID = valueStore.getId(subj); - if (subjID == S3ValueStore.UNKNOWN_ID) { - return 0; - } - } else { - subjID = S3ValueStore.UNKNOWN_ID; + long subjID = resolveValueId(subj); + if (subj != null && subjID == S3ValueStore.UNKNOWN_ID) { + return 0; } - final long predID; - if (pred != null) { - predID = valueStore.getId(pred); - if (predID == S3ValueStore.UNKNOWN_ID) { - return 0; - } - } else { - predID = S3ValueStore.UNKNOWN_ID; + long predID = resolveValueId(pred); + if (pred != null && predID == S3ValueStore.UNKNOWN_ID) { + return 0; } - final long objID; - if (obj != null) { - objID = valueStore.getId(obj); - if (objID == S3ValueStore.UNKNOWN_ID) { - return 0; - } - } else { - objID = S3ValueStore.UNKNOWN_ID; + long objID = resolveValueId(obj); + if (obj != null && objID == S3ValueStore.UNKNOWN_ID) { + return 0; } final long[] contextIds; @@ -713,21 +675,16 @@ private long removeStatements(Resource subj, IRI pred, Value obj, boolean explic long removeCount = 0; for (long contextId : contextIds) { - boolean hasPersistence = objectStore != null && catalog != null; - - Iterator iter; - if (hasPersistence) { - iter = createMergedIterator(subjID, predID, objID, contextId, explicit); - } else { - iter = memTable.scan(subjID, predID, objID, contextId, explicit); - } + Iterator iter = queryQuads(subjID, predID, objID, contextId, explicit); + // Buffer results before removing to avoid ConcurrentModificationException + // when the iterator is backed by the MemTable's own map List toRemove = new ArrayList<>(); while (iter.hasNext()) { toRemove.add(iter.next()); } for (long[] quad : toRemove) { - memTable.remove(quad[0], quad[1], quad[2], quad[3], explicit); + memTable.remove(quad[0], quad[1], quad[2], quad[3]); removeCount++; } } @@ -764,15 +721,7 @@ public CloseableIteration getNamespaces() { @Override public CloseableIteration getContextIDs() throws SailException { - // Scan all quads and collect distinct non-null contexts - boolean hasPersistence = objectStore != null && catalog != null; - - Iterator allQuads; - if (hasPersistence) { - allQuads = createMergedIterator(-1, -1, -1, -1, explicit); - } else { - allQuads = memTable.scan(-1, -1, -1, -1, explicit); - } + Iterator allQuads = queryQuads(-1, -1, -1, -1, explicit); return new FilterIteration( new ConvertingIteration( diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3Store.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3Store.java index d7c84ad0d22..7de2ac603d1 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3Store.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3Store.java @@ -35,11 +35,12 @@ import org.slf4j.LoggerFactory; /** - * A SAIL implementation that stores RDF data on S3-compatible object storage using an LSM-tree architecture. + * A SAIL implementation that stores RDF data on S3-compatible object storage using an LSM-tree architecture with + * Parquet files, stats-based pruning, and multi-tier caching (Caffeine heap + disk LRU + S3). * *

    - * Phase 1b: In-memory only. Data is stored in sorted MemTables and is not yet persisted to S3. This enables passing the - * SAIL compliance tests with the core storage engine. + * Supports three modes: S3 persistence (bucket configured), local filesystem persistence (dataDir configured), or pure + * in-memory (neither configured). *

    * * @implNote the S3 store is in an experimental state: its existence, signature or behavior may change without warning @@ -154,7 +155,7 @@ public SailSource getInferredSailSource() { } } }; - } catch (Throwable e) { + } catch (Exception e) { throw new SailException(e); } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java index afccacbf27d..78a7b55c0a7 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java @@ -89,6 +89,8 @@ public class S3StoreConfig extends BaseSailConfig { private Boolean s3ForcePathStyle; + private String dataDir; + /*--------------* * Constructors * *--------------*/ @@ -290,6 +292,18 @@ public boolean isS3Configured() { return getS3Bucket() != null && !getS3Bucket().isEmpty(); } + public String getDataDir() { + if (dataDir != null) { + return dataDir; + } + return resolveEnv("RDF4J_S3_DATA_DIR", "rdf4j.s3.dataDir"); + } + + public S3StoreConfig setDataDir(String dataDir) { + this.dataDir = dataDir; + return this; + } + @Override public Resource export(Model m) { Resource implNode = super.export(m); @@ -341,6 +355,9 @@ public Resource export(Model m) { if (s3ForcePathStyle != null) { m.add(implNode, S3StoreSchema.S3_FORCE_PATH_STYLE, vf.createLiteral(s3ForcePathStyle)); } + if (dataDir != null) { + m.add(implNode, S3StoreSchema.DATA_DIR, vf.createLiteral(dataDir)); + } return implNode; } @@ -440,6 +457,9 @@ public void parse(Model m, Resource implNode) throws SailConfigException { Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.S3_FORCE_PATH_STYLE, null)) .ifPresent(lit -> setS3ForcePathStyle(lit.booleanValue())); + + Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.DATA_DIR, null)) + .ifPresent(lit -> setDataDir(lit.getLabel())); } catch (ModelException e) { throw new SailConfigException(e.getMessage(), e); } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreSchema.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreSchema.java index cc0d2f7730f..876271a1f9d 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreSchema.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreSchema.java @@ -78,6 +78,11 @@ public class S3StoreSchema { public final static IRI S3_FORCE_PATH_STYLE; + /** + * http://rdf4j.org/config/sail/s3#dataDir + */ + public final static IRI DATA_DIR; + static { ValueFactory factory = SimpleValueFactory.getInstance(); QUAD_INDEXES = factory.createIRI(NAMESPACE, "quadIndexes"); @@ -95,5 +100,6 @@ public class S3StoreSchema { S3_ACCESS_KEY = factory.createIRI(NAMESPACE, "s3AccessKey"); S3_SECRET_KEY = factory.createIRI(NAMESPACE, "s3SecretKey"); S3_FORCE_PATH_STYLE = factory.createIRI(NAMESPACE, "s3ForcePathStyle"); + DATA_DIR = factory.createIRI(NAMESPACE, "dataDir"); } } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java index 55b60b898c7..5036adc99d8 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java @@ -230,6 +230,13 @@ public static class ParquetFileInfo { public ParquetFileInfo() { } + public ParquetFileInfo(String s3Key, int level, String sortOrder, long rowCount, + long epoch, long sizeBytes, QuadStats stats) { + this(s3Key, level, sortOrder, rowCount, epoch, sizeBytes, + stats.minSubject, stats.maxSubject, stats.minPredicate, stats.maxPredicate, + stats.minObject, stats.maxObject, stats.minContext, stats.maxContext); + } + public ParquetFileInfo(String s3Key, int level, String sortOrder, long rowCount, long epoch, long sizeBytes, long minSubject, long maxSubject, diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/CompactionPolicy.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/CompactionPolicy.java index bd749b3904b..c2006bd4609 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/CompactionPolicy.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/CompactionPolicy.java @@ -39,23 +39,15 @@ public CompactionPolicy(int l0Threshold, int l1Threshold) { } /** - * Checks if L0→L1 compaction should run for the given predicate partition files. + * Checks if compaction should run at the given level. * - * @param files all files in the predicate partition - * @return true if the number of distinct L0 epochs >= l0Threshold + * @param files all catalog files + * @param level the source level (0 or 1) + * @return true if the number of distinct epochs at that level >= threshold */ - public boolean shouldCompactL0(List files) { - return countEpochsAtLevel(files, 0) >= l0Threshold; - } - - /** - * Checks if L1→L2 compaction should run for the given predicate partition files. - * - * @param files all files in the predicate partition - * @return true if the number of distinct L1 epochs >= l1Threshold - */ - public boolean shouldCompactL1(List files) { - return countEpochsAtLevel(files, 1) >= l1Threshold; + public boolean shouldCompact(List files, int level) { + int threshold = level == 0 ? l0Threshold : l1Threshold; + return countEpochsAtLevel(files, level) >= threshold; } private static int countEpochsAtLevel(List files, int level) { diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java index b69054c8895..2e901ff1c20 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java @@ -116,26 +116,9 @@ public CompactionResult compact(List sourceFiles, cache.writeThrough(s3Key, parquetData); } - // Compute stats from merged entries - long minSubject = Long.MAX_VALUE, maxSubject = Long.MIN_VALUE; - long minPredicate = Long.MAX_VALUE, maxPredicate = Long.MIN_VALUE; - long minObject = Long.MAX_VALUE, maxObject = Long.MIN_VALUE; - long minContext = Long.MAX_VALUE, maxContext = Long.MIN_VALUE; - for (ParquetFileBuilder.QuadEntry e : merged) { - minSubject = Math.min(minSubject, e.subject); - maxSubject = Math.max(maxSubject, e.subject); - minPredicate = Math.min(minPredicate, e.predicate); - maxPredicate = Math.max(maxPredicate, e.predicate); - minObject = Math.min(minObject, e.object); - maxObject = Math.max(maxObject, e.object); - minContext = Math.min(minContext, e.context); - maxContext = Math.max(maxContext, e.context); - } - + QuadStats stats = QuadStats.fromEntries(merged); newFiles.add(new Catalog.ParquetFileInfo(s3Key, targetLevel, sortOrder, merged.size(), - epoch, parquetData.length, - minSubject, maxSubject, minPredicate, maxPredicate, - minObject, maxObject, minContext, maxContext)); + epoch, parquetData.length, stats)); } // Update catalog: remove old files, add new ones @@ -160,18 +143,14 @@ public CompactionResult compact(List sourceFiles, private List mergeEntries(List sources, QuadIndex quadIndex, boolean suppressTombstones) { - List result = new ArrayList<>(); - // Sources are ordered newest-first, so for dedup, first occurrence wins java.util.TreeMap deduped = new java.util.TreeMap<>(); for (RawEntrySource source : sources) { while (source.hasNext()) { byte[] key = source.peekKey(); byte flag = source.peekFlag(); - // Only insert if not already present (first = newest wins) CompactKey ck = new CompactKey(key); if (!deduped.containsKey(ck)) { - // Decode 4-varint key to quad values long[] quad = new long[4]; quadIndex.keyToQuad(key, quad); if (!suppressTombstones || flag != MemTable.FLAG_TOMBSTONE) { @@ -184,17 +163,7 @@ private List mergeEntries(List sou } } - if (suppressTombstones) { - for (ParquetFileBuilder.QuadEntry e : deduped.values()) { - if (e.flag != MemTable.FLAG_TOMBSTONE) { - result.add(e); - } - } - } else { - result.addAll(deduped.values()); - } - - return result; + return new ArrayList<>(deduped.values()); } private static class CompactKey implements Comparable { diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/FileSystemObjectStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/FileSystemObjectStore.java similarity index 78% rename from core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/FileSystemObjectStore.java rename to core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/FileSystemObjectStore.java index b434757cc2c..4fdc54ab375 100644 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/FileSystemObjectStore.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/FileSystemObjectStore.java @@ -11,16 +11,17 @@ package org.eclipse.rdf4j.sail.s3.storage; import java.io.IOException; +import java.io.RandomAccessFile; import java.io.UncheckedIOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.stream.Stream; /** - * Test double for {@link ObjectStore} backed by the local filesystem. + * {@link ObjectStore} implementation backed by the local filesystem. Stores each key as a file under the configured + * root directory, creating subdirectories as needed. */ public class FileSystemObjectStore implements ObjectStore { @@ -60,13 +61,24 @@ public byte[] get(String key) { @Override public byte[] getRange(String key, long offset, long length) { - byte[] full = get(key); - if (full == null) { + Path target = resolve(key); + if (!Files.exists(target)) { return null; } - int start = (int) offset; - int end = (int) Math.min(start + length, full.length); - return Arrays.copyOfRange(full, start, end); + try (RandomAccessFile raf = new RandomAccessFile(target.toFile(), "r")) { + long fileLen = raf.length(); + int start = (int) Math.min(offset, fileLen); + int readLen = (int) Math.min(length, fileLen - start); + if (readLen <= 0) { + return new byte[0]; + } + raf.seek(start); + byte[] buf = new byte[readLen]; + raf.readFully(buf); + return buf; + } catch (IOException e) { + throw new UncheckedIOException(e); + } } @Override diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java index 56b844da6c0..6b456924d4d 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java @@ -87,14 +87,13 @@ public void put(long s, long p, long o, long c, boolean explicit) { /** * Removes a quad by writing a tombstone. * - * @param s subject ID - * @param p predicate ID - * @param o object ID - * @param c context ID - * @param explicit true for explicit, false for inferred (currently unused; tombstone applies to either) + * @param s subject ID + * @param p predicate ID + * @param o object ID + * @param c context ID * @throws IllegalStateException if the table is frozen */ - public void remove(long s, long p, long o, long c, boolean explicit) { + public void remove(long s, long p, long o, long c) { checkNotFrozen(); byte[] key = index.toKeyBytes(s, p, o, c); data.put(key, VALUE_TOMBSTONE); @@ -238,11 +237,7 @@ public RawEntrySource asRawSource(QuadIndex targetIndex, long s, long p, long o, long[] quad = new long[4]; for (Map.Entry entry : range.entrySet()) { index.keyToQuad(entry.getKey(), quad); - // Apply additional filters (range scan may include extra entries) - if ((s >= 0 && quad[QuadIndex.SUBJ_IDX] != s) - || (p >= 0 && quad[QuadIndex.PRED_IDX] != p) - || (o >= 0 && quad[QuadIndex.OBJ_IDX] != o) - || (c >= 0 && quad[QuadIndex.CONTEXT_IDX] != c)) { + if (!QuadIndex.matches(quad, s, p, o, c)) { continue; } byte[] newKey = targetIndex.toKeyBytes( @@ -370,10 +365,7 @@ private void advance() { } long[] quad = new long[4]; quadIndex.keyToQuad(entry.getKey(), quad); - if ((patternS >= 0 && quad[QuadIndex.SUBJ_IDX] != patternS) - || (patternP >= 0 && quad[QuadIndex.PRED_IDX] != patternP) - || (patternO >= 0 && quad[QuadIndex.OBJ_IDX] != patternO) - || (patternC >= 0 && quad[QuadIndex.CONTEXT_IDX] != patternC)) { + if (!QuadIndex.matches(quad, patternS, patternP, patternO, patternC)) { continue; } next = quad; diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MergeIterator.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MergeIterator.java index f94893f7834..c0844bafdae 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MergeIterator.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MergeIterator.java @@ -94,10 +94,7 @@ private void advance() { long[] quad = new long[4]; quadIndex.keyToQuad(winningKey, quad); - if ((patternS >= 0 && quad[QuadIndex.SUBJ_IDX] != patternS) - || (patternP >= 0 && quad[QuadIndex.PRED_IDX] != patternP) - || (patternO >= 0 && quad[QuadIndex.OBJ_IDX] != patternO) - || (patternC >= 0 && quad[QuadIndex.CONTEXT_IDX] != patternC)) { + if (!QuadIndex.matches(quad, patternS, patternP, patternO, patternC)) { continue; } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFilterBuilder.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFilterBuilder.java deleted file mode 100644 index 5f32dfe01c7..00000000000 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFilterBuilder.java +++ /dev/null @@ -1,82 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2025 Eclipse RDF4J contributors. - * - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Distribution License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/org/documents/edl-v10.php. - * - * SPDX-License-Identifier: BSD-3-Clause - *******************************************************************************/ -package org.eclipse.rdf4j.sail.s3.storage; - -import static org.apache.parquet.filter2.predicate.FilterApi.and; -import static org.apache.parquet.filter2.predicate.FilterApi.eq; -import static org.apache.parquet.filter2.predicate.FilterApi.longColumn; - -import org.apache.parquet.filter2.compat.FilterCompat; -import org.apache.parquet.filter2.predicate.FilterPredicate; - -/** - * Builds Parquet {@link FilterPredicate}s from quad query patterns. Bound components (>= 0) become equality filters; - * unbound components (-1) are omitted. - */ -public class ParquetFilterBuilder { - - /** - * Builds a Parquet filter for a within-partition query (predicate is implicit). - * - * @param subject subject ID, or -1 for wildcard - * @param object object ID, or -1 for wildcard - * @param context context ID, or -1 for wildcard - * @return a FilterCompat.Filter, or FilterCompat.NOOP if no filters apply - */ - public static FilterCompat.Filter buildPartitionedFilter(long subject, long object, long context) { - FilterPredicate predicate = null; - - if (subject >= 0) { - predicate = eq(longColumn(ParquetSchemas.COL_SUBJECT), subject); - } - if (object >= 0) { - FilterPredicate objFilter = eq(longColumn(ParquetSchemas.COL_OBJECT), object); - predicate = predicate != null ? and(predicate, objFilter) : objFilter; - } - if (context >= 0) { - FilterPredicate ctxFilter = eq(longColumn(ParquetSchemas.COL_CONTEXT), context); - predicate = predicate != null ? and(predicate, ctxFilter) : ctxFilter; - } - - return predicate != null ? FilterCompat.get(predicate) : FilterCompat.NOOP; - } - - /** - * Builds a Parquet filter for an unpartitioned file query (all 4 components). - * - * @param subject subject ID, or -1 for wildcard - * @param predId predicate ID, or -1 for wildcard - * @param object object ID, or -1 for wildcard - * @param context context ID, or -1 for wildcard - * @return a FilterCompat.Filter, or FilterCompat.NOOP if no filters apply - */ - public static FilterCompat.Filter buildUnpartitionedFilter(long subject, long predId, long object, long context) { - FilterPredicate predicate = null; - - if (subject >= 0) { - predicate = eq(longColumn(ParquetSchemas.COL_SUBJECT), subject); - } - if (predId >= 0) { - FilterPredicate pFilter = eq(longColumn(ParquetSchemas.COL_PREDICATE), predId); - predicate = predicate != null ? and(predicate, pFilter) : pFilter; - } - if (object >= 0) { - FilterPredicate objFilter = eq(longColumn(ParquetSchemas.COL_OBJECT), object); - predicate = predicate != null ? and(predicate, objFilter) : objFilter; - } - if (context >= 0) { - FilterPredicate ctxFilter = eq(longColumn(ParquetSchemas.COL_CONTEXT), context); - predicate = predicate != null ? and(predicate, ctxFilter) : ctxFilter; - } - - return predicate != null ? FilterCompat.get(predicate) : FilterCompat.NOOP; - } -} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetQuadSource.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetQuadSource.java index 934691984bc..07f4a9a4c39 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetQuadSource.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetQuadSource.java @@ -47,8 +47,7 @@ public class ParquetQuadSource implements RawEntrySource { * @param quadIndex the quad index defining the key encoding order */ public ParquetQuadSource(byte[] parquetData, QuadIndex quadIndex) { - this.entries = readAllEntries(parquetData, quadIndex, -1, -1, -1, -1); - this.pos = 0; + this(parquetData, quadIndex, -1, -1, -1, -1); } /** @@ -113,11 +112,8 @@ private static List readAllEntries(byte[] parquetData, QuadIndex quadInde long context = group.getLong(ParquetSchemas.COL_CONTEXT, 0); int flag = group.getInteger(ParquetSchemas.COL_FLAG, 0); - // Apply filters - if ((filterS >= 0 && subject != filterS) - || (filterP >= 0 && predicate != filterP) - || (filterO >= 0 && object != filterO) - || (filterC >= 0 && context != filterC)) { + long[] quad = { subject, predicate, object, context }; + if (!QuadIndex.matches(quad, filterS, filterP, filterO, filterC)) { continue; } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java index 181db168173..0a46d764040 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java @@ -35,6 +35,7 @@ public class QuadIndex { static final int MAX_KEY_LENGTH = 4 * 9; // 4 varints, max 9 bytes each private final char[] fieldSeq; + private final String fieldSeqString; private final int[] indexMap; /** @@ -48,6 +49,7 @@ public QuadIndex(String fieldSeq) { throw new IllegalArgumentException("Field sequence must be exactly 4 characters: " + fieldSeq); } this.fieldSeq = fieldSeq.toCharArray(); + this.fieldSeqString = fieldSeq; this.indexMap = buildIndexMap(this.fieldSeq); } @@ -62,7 +64,7 @@ public char[] getFieldSeq() { * Returns the field sequence as a String. */ public String getFieldSeqString() { - return new String(fieldSeq); + return fieldSeqString; } /** @@ -227,7 +229,7 @@ public void getMaxKey(ByteBuffer bb, long subj, long pred, long obj, long contex subj <= 0 ? Long.MAX_VALUE : subj, pred <= 0 ? Long.MAX_VALUE : pred, obj <= 0 ? Long.MAX_VALUE : obj, - context < 0 ? Long.MAX_VALUE : context); + context <= 0 ? Long.MAX_VALUE : context); } /** @@ -238,7 +240,7 @@ public byte[] getMaxKeyBytes(long subj, long pred, long obj, long context) { subj <= 0 ? Long.MAX_VALUE : subj, pred <= 0 ? Long.MAX_VALUE : pred, obj <= 0 ? Long.MAX_VALUE : obj, - context < 0 ? Long.MAX_VALUE : context); + context <= 0 ? Long.MAX_VALUE : context); } /** @@ -296,9 +298,44 @@ public static Set parseIndexSpecList(String indexSpecStr) { return indexes; } + /** + * Tests whether a decoded quad matches the given pattern. Unbound components (< 0) are treated as wildcards. + * + * @param quad a long[4] array in SPOC order + * @param s subject pattern, or -1 for wildcard + * @param p predicate pattern, or -1 for wildcard + * @param o object pattern, or -1 for wildcard + * @param c context pattern, or -1 for wildcard + * @return true if all bound components match + */ + public static boolean matches(long[] quad, long s, long p, long o, long c) { + return (s < 0 || quad[SUBJ_IDX] == s) + && (p < 0 || quad[PRED_IDX] == p) + && (o < 0 || quad[OBJ_IDX] == o) + && (c < 0 || quad[CONTEXT_IDX] == c); + } + + /** + * Maps a field character ('s', 'p', 'o', 'c') to the corresponding array index (0-3). + */ + public static int fieldCharToIdx(char c) { + switch (c) { + case 's': + return SUBJ_IDX; + case 'p': + return PRED_IDX; + case 'o': + return OBJ_IDX; + case 'c': + return CONTEXT_IDX; + default: + throw new IllegalArgumentException("Invalid field: " + c); + } + } + @Override public String toString() { - return new String(fieldSeq); + return fieldSeqString; } private static int[] buildIndexMap(char[] fieldSeq) { diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadStats.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadStats.java new file mode 100644 index 00000000000..5f147b8cfe3 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadStats.java @@ -0,0 +1,80 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.util.List; + +/** + * Min/max statistics for all four quad components (subject, predicate, object, context). + */ +public final class QuadStats { + + public final long minSubject, maxSubject; + public final long minPredicate, maxPredicate; + public final long minObject, maxObject; + public final long minContext, maxContext; + + public QuadStats(long minSubject, long maxSubject, + long minPredicate, long maxPredicate, + long minObject, long maxObject, + long minContext, long maxContext) { + this.minSubject = minSubject; + this.maxSubject = maxSubject; + this.minPredicate = minPredicate; + this.maxPredicate = maxPredicate; + this.minObject = minObject; + this.maxObject = maxObject; + this.minContext = minContext; + this.maxContext = maxContext; + } + + /** + * Computes min/max stats from a list of long[5] arrays (s, p, o, c, flag). + */ + public static QuadStats fromQuads(List quads) { + long minS = Long.MAX_VALUE, maxS = Long.MIN_VALUE; + long minP = Long.MAX_VALUE, maxP = Long.MIN_VALUE; + long minO = Long.MAX_VALUE, maxO = Long.MIN_VALUE; + long minC = Long.MAX_VALUE, maxC = Long.MIN_VALUE; + for (long[] q : quads) { + minS = Math.min(minS, q[0]); + maxS = Math.max(maxS, q[0]); + minP = Math.min(minP, q[1]); + maxP = Math.max(maxP, q[1]); + minO = Math.min(minO, q[2]); + maxO = Math.max(maxO, q[2]); + minC = Math.min(minC, q[3]); + maxC = Math.max(maxC, q[3]); + } + return new QuadStats(minS, maxS, minP, maxP, minO, maxO, minC, maxC); + } + + /** + * Computes min/max stats from a list of QuadEntry objects. + */ + public static QuadStats fromEntries(List entries) { + long minS = Long.MAX_VALUE, maxS = Long.MIN_VALUE; + long minP = Long.MAX_VALUE, maxP = Long.MIN_VALUE; + long minO = Long.MAX_VALUE, maxO = Long.MIN_VALUE; + long minC = Long.MAX_VALUE, maxC = Long.MIN_VALUE; + for (ParquetFileBuilder.QuadEntry e : entries) { + minS = Math.min(minS, e.subject); + maxS = Math.max(maxS, e.subject); + minP = Math.min(minP, e.predicate); + maxP = Math.max(maxP, e.predicate); + minO = Math.min(minO, e.object); + maxO = Math.max(maxO, e.object); + minC = Math.min(minC, e.context); + maxC = Math.max(maxC, e.context); + } + return new QuadStats(minS, maxS, minP, maxP, minO, maxO, minC, maxC); + } +} diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MemTableReorderTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MemTableReorderTest.java index f7af42af634..41c883553ef 100644 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MemTableReorderTest.java +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MemTableReorderTest.java @@ -124,7 +124,7 @@ void reorderedSource_appliesPredicateFilter() { void reorderedSource_includesAliveAndTombstones() { MemTable mt = new MemTable(spoc); mt.put(1, 2, 3, 0, true); - mt.remove(5, 6, 7, 0, true); + mt.remove(5, 6, 7, 0); RawEntrySource source = mt.asRawSource(opsc, -1, -1, -1, -1); diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MergeIteratorTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MergeIteratorTest.java index b5fa1076a1d..bd4eaee8507 100644 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MergeIteratorTest.java +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MergeIteratorTest.java @@ -45,7 +45,7 @@ void newerSourceWins() { @Test void tombstoneSuppression() { MemTable newer = new MemTable(spoc); - newer.remove(1, 2, 3, 0, true); // tombstone + newer.remove(1, 2, 3, 0); // tombstone MemTable older = new MemTable(spoc); older.put(1, 2, 3, 0, true); // explicit @@ -152,7 +152,7 @@ void tombstoneInNewerShadowsOlder() { olderData.put(4, 5, 6, 0, true); MemTable memTable = new MemTable(spoc); - memTable.remove(1, 2, 3, 0, true); // tombstone shadows older entry + memTable.remove(1, 2, 3, 0); // tombstone shadows older entry List sources = Arrays.asList( memTable.asRawSource(-1, -1, -1, -1), diff --git a/tools/workbench/src/main/webapp/transformations/create-s3.xsl b/tools/workbench/src/main/webapp/transformations/create-s3.xsl index 911e25193b1..a83c5e32ec1 100644 --- a/tools/workbench/src/main/webapp/transformations/create-s3.xsl +++ b/tools/workbench/src/main/webapp/transformations/create-s3.xsl @@ -55,6 +55,13 @@ + + Data Directory + + + + + From fde0b71528d04ac626a52f73273f8b28e5712b74 Mon Sep 17 00:00:00 2001 From: Chengxu Bian Date: Sat, 28 Feb 2026 12:54:27 -0500 Subject: [PATCH 06/10] fix: thread safety, crash safety, and code cleanup for S3 SAIL - Catalog: volatile copy-on-write for thread-safe concurrent reads - S3SailStore: save catalog before values for crash-safe ordering; persist namespaces/values even when memTable is empty; delete old compaction files only after catalog is saved - QuadIndex: fix context=0 treated as wildcard in range scans - QuadStats: filter tombstones from stats computation - FileSystemObjectStore: atomic writes via temp-file-then-rename - L2DiskCache: volatile lastAccessNanos, synchronized eviction - Remove dead config fields (quadIndexes, blockSize, valueCacheSize, valueIdCacheSize) from S3StoreConfig and S3StoreSchema - Unify ALL_INDEXES with SortOrder.values() single source of truth - Fix inline FQNs and wildcard imports across main and test sources - Fix stale javadocs in CompactionPolicy, MemTable, S3SailDataset --- .../eclipse/rdf4j/sail/s3/S3SailStore.java | 134 ++++---- .../org/eclipse/rdf4j/sail/s3/S3Store.java | 3 +- .../eclipse/rdf4j/sail/s3/S3ValueStore.java | 67 ++-- .../rdf4j/sail/s3/cache/L2DiskCache.java | 4 +- .../rdf4j/sail/s3/cache/TieredCache.java | 17 +- .../rdf4j/sail/s3/config/S3StoreConfig.java | 293 ++++-------------- .../rdf4j/sail/s3/config/S3StoreFactory.java | 9 +- .../rdf4j/sail/s3/config/S3StoreSchema.java | 24 -- .../sail/s3/storage/ByteArrayOutputFile.java | 7 +- .../rdf4j/sail/s3/storage/Catalog.java | 31 +- .../sail/s3/storage/CompactionPolicy.java | 6 +- .../rdf4j/sail/s3/storage/Compactor.java | 44 ++- .../s3/storage/FileSystemObjectStore.java | 7 +- .../rdf4j/sail/s3/storage/MemTable.java | 12 +- .../sail/s3/storage/ParquetFileBuilder.java | 35 +-- .../rdf4j/sail/s3/storage/QuadEntry.java | 30 ++ .../rdf4j/sail/s3/storage/QuadIndex.java | 27 +- .../rdf4j/sail/s3/storage/QuadStats.java | 10 +- .../rdf4j/sail/s3/storage/S3ObjectStore.java | 40 +-- .../rdf4j/sail/s3/S3PersistenceMinioIT.java | 9 +- .../rdf4j/sail/s3/S3PersistenceTest.java | 31 +- .../rdf4j/sail/s3/S3SparqlOrderByTest.java | 2 +- .../rdf4j/sail/s3/S3StoreConnectionTest.java | 2 +- .../sail/s3/S3StoreIsolationLevelTest.java | 2 +- .../rdf4j/sail/s3/S3StoreRepositoryTest.java | 2 +- .../eclipse/rdf4j/sail/s3/S3StoreTest.java | 2 +- .../s3/S3ValueStoreSerializationTest.java | 4 +- .../rdf4j/sail/s3/storage/CatalogTest.java | 3 +- .../sail/s3/storage/MemTableReorderTest.java | 5 +- .../sail/s3/storage/MergeIteratorTest.java | 4 +- .../sail/s3/storage/ParquetRoundTripTest.java | 64 ++-- .../s3/storage/QuadIndexSelectionTest.java | 2 +- 32 files changed, 379 insertions(+), 553 deletions(-) create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadEntry.java diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java index 565fce11ca5..1b0c9c0d28d 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java @@ -14,6 +14,7 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.Comparator; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -55,6 +56,7 @@ import org.eclipse.rdf4j.sail.s3.storage.ParquetFileBuilder; import org.eclipse.rdf4j.sail.s3.storage.ParquetQuadSource; import org.eclipse.rdf4j.sail.s3.storage.ParquetSchemas; +import org.eclipse.rdf4j.sail.s3.storage.QuadEntry; import org.eclipse.rdf4j.sail.s3.storage.QuadIndex; import org.eclipse.rdf4j.sail.s3.storage.QuadStats; import org.eclipse.rdf4j.sail.s3.storage.RawEntrySource; @@ -79,12 +81,19 @@ */ class S3SailStore implements SailStore { - final Logger logger = LoggerFactory.getLogger(S3SailStore.class); + private static final Logger logger = LoggerFactory.getLogger(S3SailStore.class); private static final QuadIndex SPOC_INDEX = new QuadIndex("spoc"); - private static final QuadIndex OPSC_INDEX = new QuadIndex("opsc"); - private static final QuadIndex CSPO_INDEX = new QuadIndex("cspo"); - private static final List ALL_INDEXES = List.of(SPOC_INDEX, OPSC_INDEX, CSPO_INDEX); + private static final List ALL_INDEXES; + + static { + ParquetSchemas.SortOrder[] orders = ParquetSchemas.SortOrder.values(); + List indexes = new ArrayList<>(orders.length); + for (ParquetSchemas.SortOrder order : orders) { + indexes.add(new QuadIndex(order.suffix())); + } + ALL_INDEXES = List.copyOf(indexes); + } private static final int DEFAULT_ROW_GROUP_SIZE = 8 * 1024 * 1024; // 8 MiB private static final int DEFAULT_PAGE_SIZE = 64 * 1024; // 64 KiB @@ -218,18 +227,24 @@ private void flushToObjectStore() { return; } + // Always persist namespaces and values (they may have changed without any quad writes) + valueStore.serialize(objectStore); + namespaceStore.serialize(objectStore, jsonMapper); + + if (memTable.size() == 0) { + return; // no quads to flush — avoid wasting epoch numbers and S3 writes + } + long epoch = epochCounter.getAndIncrement(); - if (memTable.size() > 0) { - // Freeze active MemTable and swap in fresh one - MemTable frozen = memTable; - frozen.freeze(); - memTable = new MemTable(SPOC_INDEX); + // Freeze active MemTable and swap in fresh one + MemTable frozen = memTable; + frozen.freeze(); + memTable = new MemTable(SPOC_INDEX); - List allQuads = collectQuads(frozen); - QuadStats stats = QuadStats.fromQuads(allQuads); - writeParquetFiles(epoch, allQuads, stats); - } + List allQuads = collectQuads(frozen); + QuadStats stats = QuadStats.fromQuads(allQuads); + writeParquetFiles(epoch, allQuads, stats); persistMetadata(epoch); runCompactionIfNeeded(); @@ -254,7 +269,7 @@ private static List collectQuads(MemTable frozen) { private void writeParquetFiles(long epoch, List allQuads, QuadStats stats) { for (QuadIndex sortIndex : ALL_INDEXES) { String sortSuffix = sortIndex.getFieldSeqString(); - List sorted = sortQuadEntries(allQuads, sortIndex); + List sorted = sortQuadEntries(allQuads, sortIndex); ParquetSchemas.SortOrder sortOrder = ParquetSchemas.SortOrder.fromSuffix(sortSuffix); byte[] parquetData = ParquetFileBuilder.build(sorted, ParquetSchemas.QUAD_SCHEMA, @@ -273,17 +288,20 @@ private void writeParquetFiles(long epoch, List allQuads, QuadStats stat } private void persistMetadata(long epoch) { - valueStore.serialize(objectStore); - namespaceStore.serialize(objectStore, jsonMapper); + // Save catalog first: if we crash after catalog but before values, + // on restart we have new nextValueId but old values — IDs are gaps (safe). + // The reverse (values first, catalog second) risks ID reuse on crash (corruption). catalog.setNextValueId(valueStore.getNextId()); catalog.setEpoch(epoch); catalog.save(objectStore, jsonMapper, epoch); + valueStore.serialize(objectStore); + namespaceStore.serialize(objectStore, jsonMapper); } /** * Sorts quad entries according to the given sort index. */ - private static List sortQuadEntries(List quads, QuadIndex sortIndex) { + private static List sortQuadEntries(List quads, QuadIndex sortIndex) { List sorted = new ArrayList<>(quads); String seq = sortIndex.getFieldSeqString(); sorted.sort((a, b) -> { @@ -297,9 +315,9 @@ private static List sortQuadEntries(List q return 0; }); - List result = new ArrayList<>(sorted.size()); + List result = new ArrayList<>(sorted.size()); for (long[] q : sorted) { - result.add(new ParquetFileBuilder.QuadEntry(q[0], q[1], q[2], q[3], (byte) q[4])); + result.add(new QuadEntry(q[0], q[1], q[2], q[3], (byte) q[4])); } return result; } @@ -312,13 +330,14 @@ private void runCompactionIfNeeded() { return; } + List results = new ArrayList<>(); List files = catalog.getFiles(); // L0→L1 compaction if (compactionPolicy.shouldCompact(files, 0)) { List l0Files = CompactionPolicy.filesAtLevel(files, 0); long compactEpoch = epochCounter.getAndIncrement(); - compactor.compact(l0Files, 0, 1, compactEpoch, catalog); + results.add(compactor.compact(l0Files, 0, 1, compactEpoch, catalog)); files = catalog.getFiles(); } @@ -326,13 +345,25 @@ private void runCompactionIfNeeded() { if (compactionPolicy.shouldCompact(files, 1)) { List l1Files = CompactionPolicy.filesAtLevel(files, 1); long compactEpoch = epochCounter.getAndIncrement(); - compactor.compact(l1Files, 1, 2, compactEpoch, catalog); + results.add(compactor.compact(l1Files, 1, 2, compactEpoch, catalog)); } - // Save catalog after compaction - long epoch = epochCounter.getAndIncrement(); - catalog.setEpoch(epoch); - catalog.save(objectStore, jsonMapper, epoch); + if (!results.isEmpty()) { + // Save catalog BEFORE deleting old files — crash-safe ordering + long epoch = epochCounter.getAndIncrement(); + catalog.setEpoch(epoch); + catalog.save(objectStore, jsonMapper, epoch); + + // Now safe to delete old files + for (Compactor.CompactionResult result : results) { + for (String key : result.getDeletedKeys()) { + objectStore.delete(key); + if (cache != null) { + cache.invalidate(key); + } + } + } + } } private boolean hasPersistence() { @@ -575,24 +606,9 @@ public void approve(Resource subj, IRI pred, Value obj, Resource ctx) throws Sai public void approveAll(Set approved, Set approvedContexts) { sinkStoreAccessLock.lock(); try { - for (Statement statement : approved) { - Resource subj = statement.getSubject(); - IRI pred = statement.getPredicate(); - Value obj = statement.getObject(); - Resource context = statement.getContext(); - - long s = valueStore.storeValue(subj); - long p = valueStore.storeValue(pred); - long o = valueStore.storeValue(obj); - long c = context == null ? 0 : valueStore.storeValue(context); - - if (!explicit) { - mayHaveInferred = true; - } - - memTable.put(s, p, o, c, explicit); + for (Statement st : approved) { + storeQuad(st.getSubject(), st.getPredicate(), st.getObject(), explicit, st.getContext()); } - // Size-triggered flush if (objectStore != null && memTable.approximateSizeInBytes() >= memTableFlushSize) { flushToObjectStore(); @@ -621,21 +637,23 @@ public boolean supportsDeprecateByQuery() { private void addStatement(Resource subj, IRI pred, Value obj, boolean explicit, Resource context) { sinkStoreAccessLock.lock(); try { - long s = valueStore.storeValue(subj); - long p = valueStore.storeValue(pred); - long o = valueStore.storeValue(obj); - long c = context == null ? 0 : valueStore.storeValue(context); - - if (!explicit) { - mayHaveInferred = true; - } - - memTable.put(s, p, o, c, explicit); + storeQuad(subj, pred, obj, explicit, context); } finally { sinkStoreAccessLock.unlock(); } } + private void storeQuad(Resource subj, IRI pred, Value obj, boolean explicit, Resource context) { + long s = valueStore.storeValue(subj); + long p = valueStore.storeValue(pred); + long o = valueStore.storeValue(obj); + long c = context == null ? 0 : valueStore.storeValue(context); + if (!explicit) { + mayHaveInferred = true; + } + memTable.put(s, p, o, c, explicit); + } + private long removeStatements(Resource subj, IRI pred, Value obj, boolean explicit, Resource... contexts) { Objects.requireNonNull(contexts, "contexts argument may not be null; either the value should be cast to Resource or an empty array should be supplied"); @@ -735,7 +753,7 @@ protected Resource convert(long[] quad) { return val instanceof Resource ? (Resource) val : null; } }) { - private final java.util.Set seen = new java.util.HashSet<>(); + private final Set seen = new HashSet<>(); @Override protected boolean accept(Resource ctx) { @@ -755,10 +773,13 @@ public CloseableIteration getStatements(Resource subj, IRI return createStatementIterator(subj, pred, obj, explicit, contexts); } + /** + * @throws UnsupportedOperationException always — ordered iteration is not supported + */ @Override public CloseableIteration getStatements(StatementOrder statementOrder, Resource subj, IRI pred, Value obj, Resource... contexts) throws SailException { - throw new UnsupportedOperationException("Not implemented yet"); + throw new UnsupportedOperationException("Ordered iteration is not supported by S3Store"); } @Override @@ -796,6 +817,11 @@ public Statement next() { Resource subj = (Resource) valueStore.getValue(quad[0]); IRI pred = (IRI) valueStore.getValue(quad[1]); Value obj = valueStore.getValue(quad[2]); + if (subj == null || pred == null || obj == null) { + // Value ID exists in Parquet but not in value store — can happen after + // crash recovery when catalog was saved but value file was not. + return null; + } Resource ctx = quad[3] == 0 ? null : (Resource) valueStore.getValue(quad[3]); return valueStore.createStatement(subj, pred, obj, ctx); } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3Store.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3Store.java index 7de2ac603d1..3e4089d5844 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3Store.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3Store.java @@ -18,6 +18,7 @@ import org.eclipse.rdf4j.common.transaction.IsolationLevel; import org.eclipse.rdf4j.common.transaction.IsolationLevels; import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.LinkedHashModel; import org.eclipse.rdf4j.query.algebra.evaluation.EvaluationStrategyFactory; import org.eclipse.rdf4j.query.algebra.evaluation.federation.FederatedServiceResolver; import org.eclipse.rdf4j.query.algebra.evaluation.federation.FederatedServiceResolverClient; @@ -135,7 +136,7 @@ protected void initializeInternal() throws SailException { try { backingStore = new S3SailStore(config); - this.store = new SnapshotSailStore(backingStore, () -> new org.eclipse.rdf4j.model.impl.LinkedHashModel()) { + this.store = new SnapshotSailStore(backingStore, LinkedHashModel::new) { @Override public SailSource getExplicitSailSource() { diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3ValueStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3ValueStore.java index 324b3f379f7..f89b420a624 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3ValueStore.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3ValueStore.java @@ -106,60 +106,26 @@ void serialize(ObjectStore objectStore) { try { ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream out = new DataOutputStream(baos); + ByteBuffer buf = ByteBuffer.allocate(9); // reusable scratch for varints - int count = idToValue.size(); - // Write count as varint - ByteBuffer countBuf = ByteBuffer.allocate(9); - Varint.writeUnsigned(countBuf, count); - out.write(countBuf.array(), 0, countBuf.position()); + writeVarint(out, buf, idToValue.size()); for (Map.Entry entry : idToValue.entrySet()) { - long id = entry.getKey(); + writeVarint(out, buf, entry.getKey()); Value val = entry.getValue(); - // Write id as varint - ByteBuffer idBuf = ByteBuffer.allocate(9); - Varint.writeUnsigned(idBuf, id); - out.write(idBuf.array(), 0, idBuf.position()); - if (val instanceof IRI) { - out.writeByte(0); // type = IRI - byte[] payload = val.stringValue().getBytes(StandardCharsets.UTF_8); - ByteBuffer lenBuf = ByteBuffer.allocate(9); - Varint.writeUnsigned(lenBuf, payload.length); - out.write(lenBuf.array(), 0, lenBuf.position()); - out.write(payload); + out.writeByte(0); + writeBytes(out, buf, val.stringValue().getBytes(StandardCharsets.UTF_8)); } else if (val instanceof Literal) { - out.writeByte(1); // type = Literal + out.writeByte(1); Literal lit = (Literal) val; - byte[] label = lit.getLabel().getBytes(StandardCharsets.UTF_8); - byte[] dt = lit.getDatatype().stringValue().getBytes(StandardCharsets.UTF_8); - String langStr = lit.getLanguage().orElse(""); - byte[] lang = langStr.getBytes(StandardCharsets.UTF_8); - - ByteBuffer buf = ByteBuffer.allocate(9); - - buf.clear(); - Varint.writeUnsigned(buf, label.length); - out.write(buf.array(), 0, buf.position()); - out.write(label); - - buf.clear(); - Varint.writeUnsigned(buf, dt.length); - out.write(buf.array(), 0, buf.position()); - out.write(dt); - - buf.clear(); - Varint.writeUnsigned(buf, lang.length); - out.write(buf.array(), 0, buf.position()); - out.write(lang); + writeBytes(out, buf, lit.getLabel().getBytes(StandardCharsets.UTF_8)); + writeBytes(out, buf, lit.getDatatype().stringValue().getBytes(StandardCharsets.UTF_8)); + writeBytes(out, buf, lit.getLanguage().orElse("").getBytes(StandardCharsets.UTF_8)); } else if (val instanceof BNode) { - out.writeByte(2); // type = BNode - byte[] payload = ((BNode) val).getID().getBytes(StandardCharsets.UTF_8); - ByteBuffer lenBuf = ByteBuffer.allocate(9); - Varint.writeUnsigned(lenBuf, payload.length); - out.write(lenBuf.array(), 0, lenBuf.position()); - out.write(payload); + out.writeByte(2); + writeBytes(out, buf, ((BNode) val).getID().getBytes(StandardCharsets.UTF_8)); } else { throw new IllegalStateException("Unsupported value type: " + val.getClass()); } @@ -249,4 +215,15 @@ void deserialize(ObjectStore objectStore, long nextValueId) { public void close() { clear(); } + + private static void writeVarint(DataOutputStream out, ByteBuffer buf, long value) throws IOException { + buf.clear(); + Varint.writeUnsigned(buf, value); + out.write(buf.array(), 0, buf.position()); + } + + private static void writeBytes(DataOutputStream out, ByteBuffer buf, byte[] data) throws IOException { + writeVarint(out, buf, data.length); + out.write(data); + } } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/L2DiskCache.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/L2DiskCache.java index 52d138de8a0..a6ed768def9 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/L2DiskCache.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/L2DiskCache.java @@ -108,7 +108,7 @@ public void remove(String s3Key) { } } - private void evictIfNeeded(long incomingSize) { + private synchronized void evictIfNeeded(long incomingSize) { while (currentSizeBytes.get() + incomingSize > maxSizeBytes && !index.isEmpty()) { // Find LRU entry String lruKey = null; @@ -196,7 +196,7 @@ static class CacheEntry { public long sizeBytes; @JsonProperty("lastAccessNanos") - public long lastAccessNanos; + public volatile long lastAccessNanos; public CacheEntry() { // for Jackson deserialization diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/TieredCache.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/TieredCache.java index e78424b8c1e..85b1754aa96 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/TieredCache.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/cache/TieredCache.java @@ -15,8 +15,6 @@ import java.nio.file.Path; import org.eclipse.rdf4j.sail.s3.storage.ObjectStore; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * Unified three-tier cache facade: L1 (heap) -> L2 (disk) -> L3 (S3 / ObjectStore). On a cache miss at a given tier, @@ -24,8 +22,6 @@ */ public class TieredCache implements Closeable { - private static final Logger logger = LoggerFactory.getLogger(TieredCache.class); - private final L1HeapCache l1; private final L2DiskCache l2; // nullable when no disk cache path is configured private final ObjectStore objectStore; @@ -59,10 +55,7 @@ public byte[] get(String s3Key) { // L3 (S3) data = objectStore.get(s3Key); if (data != null) { - l1.put(s3Key, data); // populate L1 - if (l2 != null) { - l2.put(s3Key, data); // populate L2 - } + promoteToUpperTiers(s3Key, data); } return data; } @@ -72,9 +65,13 @@ public byte[] get(String s3Key) { * separately. */ public void writeThrough(String s3Key, byte[] data) { - l1.put(s3Key, data); + promoteToUpperTiers(s3Key, data); + } + + private void promoteToUpperTiers(String key, byte[] data) { + l1.put(key, data); if (l2 != null) { - l2.put(s3Key, data); + l2.put(key, data); } } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java index 78a7b55c0a7..397838e9256 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreConfig.java @@ -10,6 +10,9 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.s3.config; +import java.util.function.Consumer; + +import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Model; import org.eclipse.rdf4j.model.Resource; import org.eclipse.rdf4j.model.ValueFactory; @@ -24,21 +27,11 @@ */ public class S3StoreConfig extends BaseSailConfig { - /** - * The default quad indexes. - */ - public static final String DEFAULT_QUAD_INDEXES = "spoc,posc"; - /** * The default memtable size (64 MiB). */ public static final long DEFAULT_MEM_TABLE_SIZE = 67_108_864; - /** - * The default block size (4 MiB). - */ - public static final int DEFAULT_BLOCK_SIZE = 4_194_304; - /** * The default memory cache size (256 MiB). */ @@ -49,32 +42,14 @@ public class S3StoreConfig extends BaseSailConfig { */ public static final long DEFAULT_DISK_CACHE_SIZE = 10_737_418_240L; - /** - * The default value cache size. - */ - public static final int DEFAULT_VALUE_CACHE_SIZE = 512; - - /** - * The default value id cache size. - */ - public static final int DEFAULT_VALUE_ID_CACHE_SIZE = 128; - - private String quadIndexes; - private long memTableSize = -1; - private int blockSize = -1; - private long memoryCacheSize = -1; private long diskCacheSize = -1; private String diskCachePath; - private int valueCacheSize = -1; - - private int valueIdCacheSize = -1; - private String s3Bucket; private String s3Endpoint; @@ -99,11 +74,6 @@ public S3StoreConfig() { super(S3StoreFactory.SAIL_TYPE); } - public S3StoreConfig(String quadIndexes) { - this(); - setQuadIndexes(quadIndexes); - } - /*---------* * Methods * *---------*/ @@ -133,13 +103,8 @@ private static String resolveEnv(String envVar, String sysProp) { return null; } - public String getQuadIndexes() { - return quadIndexes != null ? quadIndexes : DEFAULT_QUAD_INDEXES; - } - - public S3StoreConfig setQuadIndexes(String quadIndexes) { - this.quadIndexes = quadIndexes; - return this; + private static String resolveField(String field, String envVar, String sysProp) { + return field != null ? field : resolveEnv(envVar, sysProp); } public long getMemTableSize() { @@ -151,15 +116,6 @@ public S3StoreConfig setMemTableSize(long memTableSize) { return this; } - public int getBlockSize() { - return blockSize >= 0 ? blockSize : DEFAULT_BLOCK_SIZE; - } - - public S3StoreConfig setBlockSize(int blockSize) { - this.blockSize = blockSize; - return this; - } - public long getMemoryCacheSize() { return memoryCacheSize >= 0 ? memoryCacheSize : DEFAULT_MEMORY_CACHE_SIZE; } @@ -187,29 +143,8 @@ public S3StoreConfig setDiskCachePath(String diskCachePath) { return this; } - public int getValueCacheSize() { - return valueCacheSize >= 0 ? valueCacheSize : DEFAULT_VALUE_CACHE_SIZE; - } - - public S3StoreConfig setValueCacheSize(int valueCacheSize) { - this.valueCacheSize = valueCacheSize; - return this; - } - - public int getValueIdCacheSize() { - return valueIdCacheSize >= 0 ? valueIdCacheSize : DEFAULT_VALUE_ID_CACHE_SIZE; - } - - public S3StoreConfig setValueIdCacheSize(int valueIdCacheSize) { - this.valueIdCacheSize = valueIdCacheSize; - return this; - } - public String getS3Bucket() { - if (s3Bucket != null) { - return s3Bucket; - } - return resolveEnv("RDF4J_S3_BUCKET", "rdf4j.s3.bucket"); + return resolveField(s3Bucket, "RDF4J_S3_BUCKET", "rdf4j.s3.bucket"); } public S3StoreConfig setS3Bucket(String s3Bucket) { @@ -218,10 +153,7 @@ public S3StoreConfig setS3Bucket(String s3Bucket) { } public String getS3Endpoint() { - if (s3Endpoint != null) { - return s3Endpoint; - } - return resolveEnv("RDF4J_S3_ENDPOINT", "rdf4j.s3.endpoint"); + return resolveField(s3Endpoint, "RDF4J_S3_ENDPOINT", "rdf4j.s3.endpoint"); } public S3StoreConfig setS3Endpoint(String s3Endpoint) { @@ -230,11 +162,8 @@ public S3StoreConfig setS3Endpoint(String s3Endpoint) { } public String getS3Region() { - if (s3Region != null) { - return s3Region; - } - String env = resolveEnv("RDF4J_S3_REGION", "rdf4j.s3.region"); - return env != null ? env : "us-east-1"; + String resolved = resolveField(s3Region, "RDF4J_S3_REGION", "rdf4j.s3.region"); + return resolved != null ? resolved : "us-east-1"; } public S3StoreConfig setS3Region(String s3Region) { @@ -252,10 +181,7 @@ public S3StoreConfig setS3Prefix(String s3Prefix) { } public String getS3AccessKey() { - if (s3AccessKey != null) { - return s3AccessKey; - } - return resolveEnv("RDF4J_S3_ACCESS_KEY", "rdf4j.s3.accessKey"); + return resolveField(s3AccessKey, "RDF4J_S3_ACCESS_KEY", "rdf4j.s3.accessKey"); } public S3StoreConfig setS3AccessKey(String s3AccessKey) { @@ -264,10 +190,7 @@ public S3StoreConfig setS3AccessKey(String s3AccessKey) { } public String getS3SecretKey() { - if (s3SecretKey != null) { - return s3SecretKey; - } - return resolveEnv("RDF4J_S3_SECRET_KEY", "rdf4j.s3.secretKey"); + return resolveField(s3SecretKey, "RDF4J_S3_SECRET_KEY", "rdf4j.s3.secretKey"); } public S3StoreConfig setS3SecretKey(String s3SecretKey) { @@ -293,10 +216,7 @@ public boolean isS3Configured() { } public String getDataDir() { - if (dataDir != null) { - return dataDir; - } - return resolveEnv("RDF4J_S3_DATA_DIR", "rdf4j.s3.dataDir"); + return resolveField(dataDir, "RDF4J_S3_DATA_DIR", "rdf4j.s3.dataDir"); } public S3StoreConfig setDataDir(String dataDir) { @@ -310,158 +230,73 @@ public Resource export(Model m) { ValueFactory vf = SimpleValueFactory.getInstance(); m.setNamespace("s3", S3StoreSchema.NAMESPACE); - if (quadIndexes != null) { - m.add(implNode, S3StoreSchema.QUAD_INDEXES, vf.createLiteral(quadIndexes)); - } - if (memTableSize >= 0) { - m.add(implNode, S3StoreSchema.MEM_TABLE_SIZE, vf.createLiteral(memTableSize)); - } - if (blockSize >= 0) { - m.add(implNode, S3StoreSchema.BLOCK_SIZE, vf.createLiteral(blockSize)); - } - if (memoryCacheSize >= 0) { - m.add(implNode, S3StoreSchema.MEMORY_CACHE_SIZE, vf.createLiteral(memoryCacheSize)); - } - if (diskCacheSize >= 0) { - m.add(implNode, S3StoreSchema.DISK_CACHE_SIZE, vf.createLiteral(diskCacheSize)); - } - if (diskCachePath != null) { - m.add(implNode, S3StoreSchema.DISK_CACHE_PATH, vf.createLiteral(diskCachePath)); - } - if (valueCacheSize >= 0) { - m.add(implNode, S3StoreSchema.VALUE_CACHE_SIZE, vf.createLiteral(valueCacheSize)); - } - if (valueIdCacheSize >= 0) { - m.add(implNode, S3StoreSchema.VALUE_ID_CACHE_SIZE, vf.createLiteral(valueIdCacheSize)); - } - if (s3Bucket != null) { - m.add(implNode, S3StoreSchema.S3_BUCKET, vf.createLiteral(s3Bucket)); - } - if (s3Endpoint != null) { - m.add(implNode, S3StoreSchema.S3_ENDPOINT, vf.createLiteral(s3Endpoint)); - } - if (s3Region != null) { - m.add(implNode, S3StoreSchema.S3_REGION, vf.createLiteral(s3Region)); - } - if (s3Prefix != null) { - m.add(implNode, S3StoreSchema.S3_PREFIX, vf.createLiteral(s3Prefix)); - } - if (s3AccessKey != null) { - m.add(implNode, S3StoreSchema.S3_ACCESS_KEY, vf.createLiteral(s3AccessKey)); - } - if (s3SecretKey != null) { - m.add(implNode, S3StoreSchema.S3_SECRET_KEY, vf.createLiteral(s3SecretKey)); - } + exportLong(m, implNode, vf, S3StoreSchema.MEM_TABLE_SIZE, memTableSize); + exportLong(m, implNode, vf, S3StoreSchema.MEMORY_CACHE_SIZE, memoryCacheSize); + exportLong(m, implNode, vf, S3StoreSchema.DISK_CACHE_SIZE, diskCacheSize); + exportString(m, implNode, vf, S3StoreSchema.DISK_CACHE_PATH, diskCachePath); + exportString(m, implNode, vf, S3StoreSchema.S3_BUCKET, s3Bucket); + exportString(m, implNode, vf, S3StoreSchema.S3_ENDPOINT, s3Endpoint); + exportString(m, implNode, vf, S3StoreSchema.S3_REGION, s3Region); + exportString(m, implNode, vf, S3StoreSchema.S3_PREFIX, s3Prefix); + exportString(m, implNode, vf, S3StoreSchema.S3_ACCESS_KEY, s3AccessKey); + exportString(m, implNode, vf, S3StoreSchema.S3_SECRET_KEY, s3SecretKey); if (s3ForcePathStyle != null) { m.add(implNode, S3StoreSchema.S3_FORCE_PATH_STYLE, vf.createLiteral(s3ForcePathStyle)); } - if (dataDir != null) { - m.add(implNode, S3StoreSchema.DATA_DIR, vf.createLiteral(dataDir)); - } + exportString(m, implNode, vf, S3StoreSchema.DATA_DIR, dataDir); return implNode; } + private static void exportString(Model m, Resource node, ValueFactory vf, IRI prop, String value) { + if (value != null) { + m.add(node, prop, vf.createLiteral(value)); + } + } + + private static void exportLong(Model m, Resource node, ValueFactory vf, IRI prop, long value) { + if (value >= 0) { + m.add(node, prop, vf.createLiteral(value)); + } + } + @Override public void parse(Model m, Resource implNode) throws SailConfigException { super.parse(m, implNode); try { - Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.QUAD_INDEXES, null)) - .ifPresent(lit -> setQuadIndexes(lit.getLabel())); - - Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.MEM_TABLE_SIZE, null)) - .ifPresent(lit -> { - try { - setMemTableSize(lit.longValue()); - } catch (NumberFormatException e) { - throw new SailConfigException( - "Long value required for " + S3StoreSchema.MEM_TABLE_SIZE - + " property, found " + lit); - } - }); - - Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.BLOCK_SIZE, null)) - .ifPresent(lit -> { - try { - setBlockSize(lit.intValue()); - } catch (NumberFormatException e) { - throw new SailConfigException( - "Integer value required for " + S3StoreSchema.BLOCK_SIZE - + " property, found " + lit); - } - }); - - Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.MEMORY_CACHE_SIZE, null)) - .ifPresent(lit -> { - try { - setMemoryCacheSize(lit.longValue()); - } catch (NumberFormatException e) { - throw new SailConfigException( - "Long value required for " + S3StoreSchema.MEMORY_CACHE_SIZE - + " property, found " + lit); - } - }); - - Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.DISK_CACHE_SIZE, null)) - .ifPresent(lit -> { - try { - setDiskCacheSize(lit.longValue()); - } catch (NumberFormatException e) { - throw new SailConfigException( - "Long value required for " + S3StoreSchema.DISK_CACHE_SIZE - + " property, found " + lit); - } - }); - - Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.DISK_CACHE_PATH, null)) - .ifPresent(lit -> setDiskCachePath(lit.getLabel())); - - Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.VALUE_CACHE_SIZE, null)) - .ifPresent(lit -> { - try { - setValueCacheSize(lit.intValue()); - } catch (NumberFormatException e) { - throw new SailConfigException( - "Integer value required for " + S3StoreSchema.VALUE_CACHE_SIZE - + " property, found " + lit); - } - }); - - Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.VALUE_ID_CACHE_SIZE, null)) - .ifPresent(lit -> { - try { - setValueIdCacheSize(lit.intValue()); - } catch (NumberFormatException e) { - throw new SailConfigException( - "Integer value required for " + S3StoreSchema.VALUE_ID_CACHE_SIZE - + " property, found " + lit); - } - }); - Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.S3_BUCKET, null)) - .ifPresent(lit -> setS3Bucket(lit.getLabel())); - - Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.S3_ENDPOINT, null)) - .ifPresent(lit -> setS3Endpoint(lit.getLabel())); - - Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.S3_REGION, null)) - .ifPresent(lit -> setS3Region(lit.getLabel())); - - Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.S3_PREFIX, null)) - .ifPresent(lit -> setS3Prefix(lit.getLabel())); - - Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.S3_ACCESS_KEY, null)) - .ifPresent(lit -> setS3AccessKey(lit.getLabel())); - - Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.S3_SECRET_KEY, null)) - .ifPresent(lit -> setS3SecretKey(lit.getLabel())); - + parseLong(m, implNode, S3StoreSchema.MEM_TABLE_SIZE, this::setMemTableSize); + parseLong(m, implNode, S3StoreSchema.MEMORY_CACHE_SIZE, this::setMemoryCacheSize); + parseLong(m, implNode, S3StoreSchema.DISK_CACHE_SIZE, this::setDiskCacheSize); + parseString(m, implNode, S3StoreSchema.DISK_CACHE_PATH, this::setDiskCachePath); + parseString(m, implNode, S3StoreSchema.S3_BUCKET, this::setS3Bucket); + parseString(m, implNode, S3StoreSchema.S3_ENDPOINT, this::setS3Endpoint); + parseString(m, implNode, S3StoreSchema.S3_REGION, this::setS3Region); + parseString(m, implNode, S3StoreSchema.S3_PREFIX, this::setS3Prefix); + parseString(m, implNode, S3StoreSchema.S3_ACCESS_KEY, this::setS3AccessKey); + parseString(m, implNode, S3StoreSchema.S3_SECRET_KEY, this::setS3SecretKey); Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.S3_FORCE_PATH_STYLE, null)) .ifPresent(lit -> setS3ForcePathStyle(lit.booleanValue())); - - Models.objectLiteral(m.getStatements(implNode, S3StoreSchema.DATA_DIR, null)) - .ifPresent(lit -> setDataDir(lit.getLabel())); + parseString(m, implNode, S3StoreSchema.DATA_DIR, this::setDataDir); } catch (ModelException e) { throw new SailConfigException(e.getMessage(), e); } } + + private static void parseString(Model m, Resource node, IRI prop, Consumer setter) { + Models.objectLiteral(m.getStatements(node, prop, null)) + .ifPresent(lit -> setter.accept(lit.getLabel())); + } + + private static void parseLong(Model m, Resource node, IRI prop, Consumer setter) { + Models.objectLiteral(m.getStatements(node, prop, null)) + .ifPresent(lit -> { + try { + setter.accept(lit.longValue()); + } catch (NumberFormatException e) { + throw new SailConfigException( + "Long value required for " + prop + " property, found " + lit); + } + }); + } + } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreFactory.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreFactory.java index 3b1544f8e35..ab043b9fd61 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreFactory.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreFactory.java @@ -15,16 +15,12 @@ import org.eclipse.rdf4j.sail.config.SailFactory; import org.eclipse.rdf4j.sail.config.SailImplConfig; import org.eclipse.rdf4j.sail.s3.S3Store; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * A {@link SailFactory} that creates {@link S3Store}s based on RDF configuration data. */ public class S3StoreFactory implements SailFactory { - private static final Logger logger = LoggerFactory.getLogger(S3StoreFactory.class); - /** * The type of repositories that are created by this factory. * @@ -53,9 +49,8 @@ public Sail getSail(SailImplConfig config) throws SailConfigException { if (config instanceof S3StoreConfig) { return new S3Store((S3StoreConfig) config); - } else { - logger.warn("Config is instance of {} is not S3StoreConfig.", config.getClass().getName()); - return new S3Store(); } + throw new SailConfigException( + "Expected S3StoreConfig but got " + config.getClass().getName()); } } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreSchema.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreSchema.java index 876271a1f9d..27420f9dad0 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreSchema.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/config/S3StoreSchema.java @@ -24,21 +24,11 @@ public class S3StoreSchema { */ public static final String NAMESPACE = "http://rdf4j.org/config/sail/s3#"; - /** - * http://rdf4j.org/config/sail/s3#quadIndexes - */ - public final static IRI QUAD_INDEXES; - /** * http://rdf4j.org/config/sail/s3#memTableSize */ public final static IRI MEM_TABLE_SIZE; - /** - * http://rdf4j.org/config/sail/s3#blockSize - */ - public final static IRI BLOCK_SIZE; - /** * http://rdf4j.org/config/sail/s3#memoryCacheSize */ @@ -54,16 +44,6 @@ public class S3StoreSchema { */ public final static IRI DISK_CACHE_PATH; - /** - * http://rdf4j.org/config/sail/s3#valueCacheSize - */ - public final static IRI VALUE_CACHE_SIZE; - - /** - * http://rdf4j.org/config/sail/s3#valueIdCacheSize - */ - public final static IRI VALUE_ID_CACHE_SIZE; - public final static IRI S3_BUCKET; public final static IRI S3_ENDPOINT; @@ -85,14 +65,10 @@ public class S3StoreSchema { static { ValueFactory factory = SimpleValueFactory.getInstance(); - QUAD_INDEXES = factory.createIRI(NAMESPACE, "quadIndexes"); MEM_TABLE_SIZE = factory.createIRI(NAMESPACE, "memTableSize"); - BLOCK_SIZE = factory.createIRI(NAMESPACE, "blockSize"); MEMORY_CACHE_SIZE = factory.createIRI(NAMESPACE, "memoryCacheSize"); DISK_CACHE_SIZE = factory.createIRI(NAMESPACE, "diskCacheSize"); DISK_CACHE_PATH = factory.createIRI(NAMESPACE, "diskCachePath"); - VALUE_CACHE_SIZE = factory.createIRI(NAMESPACE, "valueCacheSize"); - VALUE_ID_CACHE_SIZE = factory.createIRI(NAMESPACE, "valueIdCacheSize"); S3_BUCKET = factory.createIRI(NAMESPACE, "s3Bucket"); S3_ENDPOINT = factory.createIRI(NAMESPACE, "s3Endpoint"); S3_REGION = factory.createIRI(NAMESPACE, "s3Region"); diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ByteArrayOutputFile.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ByteArrayOutputFile.java index 2b84ed7f230..f2fca2287fc 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ByteArrayOutputFile.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ByteArrayOutputFile.java @@ -67,34 +67,29 @@ public byte[] toByteArray() { private static class ByteArrayPositionOutputStream extends PositionOutputStream { private final ByteArrayOutputStream baos; - private long pos; ByteArrayPositionOutputStream(ByteArrayOutputStream baos) { this.baos = baos; - this.pos = 0; } @Override public long getPos() { - return pos; + return baos.size(); } @Override public void write(int b) throws IOException { baos.write(b); - pos++; } @Override public void write(byte[] b) throws IOException { baos.write(b); - pos += b.length; } @Override public void write(byte[] b, int off, int len) throws IOException { baos.write(b, off, len); - pos += len; } @Override diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java index 5036adc99d8..9a3abdc1cea 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java @@ -16,7 +16,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Set; -import java.util.stream.Collectors; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonProperty; @@ -60,7 +59,7 @@ public class Catalog { private long nextValueId; @JsonProperty("files") - private List files = new ArrayList<>(); + private volatile List files = new ArrayList<>(); public Catalog() { } @@ -94,7 +93,7 @@ public List getFiles() { } public void setFiles(List files) { - this.files = files; + this.files = new ArrayList<>(files); } /** @@ -149,33 +148,43 @@ public void save(ObjectStore store, ObjectMapper mapper, long epoch) { } /** - * Adds a Parquet file to the catalog. + * Adds a Parquet file to the catalog. Copy-on-write for thread safety. * * @param info the file info to add */ public void addFile(ParquetFileInfo info) { - files.add(info); + List updated = new ArrayList<>(files); + updated.add(info); + files = updated; } /** - * Removes Parquet files by their S3 keys. + * Removes Parquet files by their S3 keys. Copy-on-write for thread safety. * * @param s3Keys the set of S3 keys to remove */ public void removeFiles(Set s3Keys) { - files.removeIf(f -> s3Keys.contains(f.getS3Key())); + List updated = new ArrayList<>(files); + updated.removeIf(f -> s3Keys.contains(f.getS3Key())); + files = updated; } /** - * Returns all files for the given sort order. + * Returns all files for the given sort order. Reads from a volatile snapshot so it is safe to call without external + * synchronization. * * @param sortOrder the sort order suffix (e.g. "spoc", "opsc", "cspo") * @return list of files matching the sort order */ public List getFilesForSortOrder(String sortOrder) { - return files.stream() - .filter(f -> sortOrder.equals(f.getSortOrder())) - .collect(Collectors.toList()); + List snapshot = files; + List result = new ArrayList<>(); + for (ParquetFileInfo f : snapshot) { + if (sortOrder.equals(f.getSortOrder())) { + result.add(f); + } + } + return result; } /** diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/CompactionPolicy.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/CompactionPolicy.java index c2006bd4609..6b300a13174 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/CompactionPolicy.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/CompactionPolicy.java @@ -15,7 +15,7 @@ import java.util.Set; /** - * Determines when compaction should be triggered for a predicate partition. Counts distinct epochs at each level and + * Determines when compaction should be triggered. Counts distinct epochs at each level in the flat file catalog and * compares against configurable thresholds. */ public class CompactionPolicy { @@ -61,9 +61,9 @@ private static int countEpochsAtLevel(List files, int l } /** - * Returns the files at the given level for a predicate partition. + * Returns the files at the given level. * - * @param files all files in the partition + * @param files all catalog files * @param level the target level (0, 1, or 2) * @return files at that level */ diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java index 2e901ff1c20..d5e16518f9b 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java @@ -11,10 +11,12 @@ package org.eclipse.rdf4j.sail.s3.storage; import java.util.ArrayList; +import java.util.Arrays; import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.TreeMap; import org.eclipse.rdf4j.sail.s3.cache.TieredCache; import org.slf4j.Logger; @@ -32,7 +34,7 @@ public class Compactor { private static final Logger logger = LoggerFactory.getLogger(Compactor.class); - private static final String[] SORT_ORDERS = { "spoc", "opsc", "cspo" }; + private static final ParquetSchemas.SortOrder[] SORT_ORDERS = ParquetSchemas.SortOrder.values(); private final ObjectStore objectStore; private final TieredCache cache; @@ -63,12 +65,13 @@ public CompactionResult compact(List sourceFiles, List newFiles = new ArrayList<>(); Set oldKeys = new HashSet<>(); - for (String sortOrder : SORT_ORDERS) { - QuadIndex quadIndex = new QuadIndex(sortOrder); + for (ParquetSchemas.SortOrder sortOrder : SORT_ORDERS) { + String suffix = sortOrder.suffix(); + QuadIndex quadIndex = new QuadIndex(suffix); // Collect source files for this sort order, ordered newest-first (highest epoch first) List sortOrderFiles = sourceFiles.stream() - .filter(f -> sortOrder.equals(f.getSortOrder())) + .filter(f -> suffix.equals(f.getSortOrder())) .sorted(Comparator.comparingLong(Catalog.ParquetFileInfo::getEpoch).reversed()) .toList(); @@ -97,19 +100,18 @@ public CompactionResult compact(List sourceFiles, } // Merge and collect entries - List merged = mergeEntries(sources, quadIndex, suppressTombstones); + List merged = mergeEntries(sources, quadIndex, suppressTombstones); if (merged.isEmpty()) { continue; } // Write merged Parquet file - ParquetSchemas.SortOrder parsedSortOrder = ParquetSchemas.SortOrder.fromSuffix(sortOrder); String s3Key = "data/L" + targetLevel + "-" - + String.format("%05d", epoch) + "-" + sortOrder + ".parquet"; + + String.format("%05d", epoch) + "-" + suffix + ".parquet"; byte[] parquetData = ParquetFileBuilder.build(merged, ParquetSchemas.QUAD_SCHEMA, - parsedSortOrder, rowGroupSize, pageSize); + sortOrder, rowGroupSize, pageSize); objectStore.put(s3Key, parquetData); if (cache != null) { @@ -117,34 +119,28 @@ public CompactionResult compact(List sourceFiles, } QuadStats stats = QuadStats.fromEntries(merged); - newFiles.add(new Catalog.ParquetFileInfo(s3Key, targetLevel, sortOrder, merged.size(), + newFiles.add(new Catalog.ParquetFileInfo(s3Key, targetLevel, suffix, merged.size(), epoch, parquetData.length, stats)); } - // Update catalog: remove old files, add new ones + // Update catalog in memory: remove old files, add new ones. + // Physical deletion of old files is deferred to the caller, after the catalog is saved, + // to prevent data loss if the process crashes between deletion and catalog save. catalog.removeFiles(oldKeys); for (Catalog.ParquetFileInfo newFile : newFiles) { catalog.addFile(newFile); } - // Delete old S3 files and invalidate cache - for (String key : oldKeys) { - objectStore.delete(key); - if (cache != null) { - cache.invalidate(key); - } - } - logger.info("Compacted L{}→L{}: {} files merged into {} files", sourceLevel, targetLevel, oldKeys.size(), newFiles.size()); return new CompactionResult(newFiles, oldKeys); } - private List mergeEntries(List sources, QuadIndex quadIndex, + private List mergeEntries(List sources, QuadIndex quadIndex, boolean suppressTombstones) { // Sources are ordered newest-first, so for dedup, first occurrence wins - java.util.TreeMap deduped = new java.util.TreeMap<>(); + TreeMap deduped = new TreeMap<>(); for (RawEntrySource source : sources) { while (source.hasNext()) { byte[] key = source.peekKey(); @@ -154,7 +150,7 @@ private List mergeEntries(List sou long[] quad = new long[4]; quadIndex.keyToQuad(key, quad); if (!suppressTombstones || flag != MemTable.FLAG_TOMBSTONE) { - deduped.put(ck, new ParquetFileBuilder.QuadEntry( + deduped.put(ck, new QuadEntry( quad[QuadIndex.SUBJ_IDX], quad[QuadIndex.PRED_IDX], quad[QuadIndex.OBJ_IDX], quad[QuadIndex.CONTEXT_IDX], flag)); } @@ -175,7 +171,7 @@ private static class CompactKey implements Comparable { @Override public int compareTo(CompactKey other) { - return java.util.Arrays.compareUnsigned(this.key, other.key); + return Arrays.compareUnsigned(this.key, other.key); } @Override @@ -186,12 +182,12 @@ public boolean equals(Object o) { if (!(o instanceof CompactKey)) { return false; } - return java.util.Arrays.equals(key, ((CompactKey) o).key); + return Arrays.equals(key, ((CompactKey) o).key); } @Override public int hashCode() { - return java.util.Arrays.hashCode(key); + return Arrays.hashCode(key); } } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/FileSystemObjectStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/FileSystemObjectStore.java index 4fdc54ab375..fa4cb4e96c2 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/FileSystemObjectStore.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/FileSystemObjectStore.java @@ -15,6 +15,8 @@ import java.io.UncheckedIOException; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.List; import java.util.stream.Stream; @@ -40,7 +42,10 @@ public void put(String key, byte[] data) { try { Path target = resolve(key); Files.createDirectories(target.getParent()); - Files.write(target, data); + // Atomic write via temp file + rename to prevent corrupt files on crash + Path tmp = target.resolveSibling(target.getFileName() + ".tmp"); + Files.write(tmp, data, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); + Files.move(tmp, target, StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); } catch (IOException e) { throw new UncheckedIOException(e); } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java index 6b456924d4d..413d5d9d213 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java @@ -59,15 +59,6 @@ public MemTable(QuadIndex index) { this.data = new ConcurrentSkipListMap<>(Arrays::compareUnsigned); } - /** - * Creates a frozen (immutable) MemTable from an existing data map. Used internally by {@link #freeze()}. - */ - private MemTable(QuadIndex index, ConcurrentSkipListMap data, boolean frozen) { - this.index = index; - this.data = data; - this.frozen.set(frozen); - } - /** * Stores a quad in the table. * @@ -160,8 +151,7 @@ public long approximateSizeInBytes() { } /** - * Returns a frozen (immutable) snapshot of this table. After freezing, no further writes are accepted on this - * instance. + * Freezes this table in place, preventing further writes. Does not create a copy. * * @return this MemTable, now frozen */ diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFileBuilder.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFileBuilder.java index 97b34aceda5..330c441c908 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFileBuilder.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetFileBuilder.java @@ -15,6 +15,7 @@ import java.util.HashMap; import java.util.List; +import org.apache.hadoop.conf.Configuration; import org.apache.parquet.conf.ParquetConfiguration; import org.apache.parquet.conf.PlainParquetConfiguration; import org.apache.parquet.hadoop.ParquetWriter; @@ -51,35 +52,6 @@ private ParquetFileBuilder() { // utility class } - /** - * A quad entry to be written to a Parquet file. All 5 fields (subject, predicate, object, context, flag) are - * stored. - */ - public static class QuadEntry { - public final long subject; - public final long predicate; - public final long object; - public final long context; - public final byte flag; - - /** - * Creates a quad entry with all components. - * - * @param subject the subject value ID - * @param predicate the predicate value ID - * @param object the object value ID - * @param context the context value ID - * @param flag the entry flag (e.g. insert vs tombstone) - */ - public QuadEntry(long subject, long predicate, long object, long context, byte flag) { - this.subject = subject; - this.predicate = predicate; - this.object = object; - this.context = context; - this.flag = flag; - } - } - /** * Builds a Parquet file from the given entries using default settings. * @@ -143,7 +115,7 @@ private static class QuadEntryWriteSupport extends WriteSupport { } @Override - public WriteContext init(org.apache.hadoop.conf.Configuration configuration) { + public WriteContext init(Configuration configuration) { return new WriteContext(schema, new HashMap<>()); } @@ -216,8 +188,7 @@ protected QuadEntryWriterBuilder self() { } @Override - protected WriteSupport getWriteSupport( - org.apache.hadoop.conf.Configuration conf) { + protected WriteSupport getWriteSupport(Configuration conf) { return new QuadEntryWriteSupport(schema); } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadEntry.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadEntry.java new file mode 100644 index 00000000000..63ee6dd9c37 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadEntry.java @@ -0,0 +1,30 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +/** + * A quad entry with subject, predicate, object, context value IDs and a flag byte. + */ +public final class QuadEntry { + public final long subject; + public final long predicate; + public final long object; + public final long context; + public final byte flag; + + public QuadEntry(long subject, long predicate, long object, long context, byte flag) { + this.subject = subject; + this.predicate = predicate; + this.object = object; + this.context = context; + this.flag = flag; + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java index 0a46d764040..2837cb0692d 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java @@ -188,13 +188,15 @@ public void keyToQuad(byte[] key, long[] quad) { } /** - * Constructs the minimum key for a range scan. Unbound components (-1 or 0) become 0. + * Constructs the minimum key for a range scan. Unbound or zero-valued components become 0 (the lowest valid ID). + * Context ID 0 is the default/null graph sentinel and maps to 0, which is correct for both exact and wildcard + * scans. * * @param bb buffer for writing bytes * @param subj subject ID, or -1 for wildcard * @param pred predicate ID, or -1 for wildcard * @param obj object ID, or -1 for wildcard - * @param context context ID, or -1 for wildcard + * @param context context ID, or -1 for wildcard (0 = default graph) */ public void getMinKey(ByteBuffer bb, long subj, long pred, long obj, long context) { toKey(bb, @@ -216,20 +218,21 @@ public byte[] getMinKeyBytes(long subj, long pred, long obj, long context) { } /** - * Constructs the maximum key for a range scan. Unbound components become Long.MAX_VALUE. + * Constructs the maximum key for a range scan. Unbound components (negative) become Long.MAX_VALUE. Context ID 0 + * (the default/null graph) is a valid bound value, not a wildcard. * * @param bb buffer for writing bytes * @param subj subject ID, or -1 for wildcard * @param pred predicate ID, or -1 for wildcard * @param obj object ID, or -1 for wildcard - * @param context context ID, or -1 for wildcard + * @param context context ID, or -1 for wildcard (0 = default graph, a valid bound value) */ public void getMaxKey(ByteBuffer bb, long subj, long pred, long obj, long context) { toKey(bb, - subj <= 0 ? Long.MAX_VALUE : subj, - pred <= 0 ? Long.MAX_VALUE : pred, - obj <= 0 ? Long.MAX_VALUE : obj, - context <= 0 ? Long.MAX_VALUE : context); + subj < 0 ? Long.MAX_VALUE : subj, + pred < 0 ? Long.MAX_VALUE : pred, + obj < 0 ? Long.MAX_VALUE : obj, + context < 0 ? Long.MAX_VALUE : context); } /** @@ -237,10 +240,10 @@ public void getMaxKey(ByteBuffer bb, long subj, long pred, long obj, long contex */ public byte[] getMaxKeyBytes(long subj, long pred, long obj, long context) { return toKeyBytes( - subj <= 0 ? Long.MAX_VALUE : subj, - pred <= 0 ? Long.MAX_VALUE : pred, - obj <= 0 ? Long.MAX_VALUE : obj, - context <= 0 ? Long.MAX_VALUE : context); + subj < 0 ? Long.MAX_VALUE : subj, + pred < 0 ? Long.MAX_VALUE : pred, + obj < 0 ? Long.MAX_VALUE : obj, + context < 0 ? Long.MAX_VALUE : context); } /** diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadStats.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadStats.java index 5f147b8cfe3..66b248fc0a4 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadStats.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadStats.java @@ -37,7 +37,8 @@ public QuadStats(long minSubject, long maxSubject, } /** - * Computes min/max stats from a list of long[5] arrays (s, p, o, c, flag). + * Computes min/max stats from a list of long[5] arrays (s, p, o, c, flag). Tombstones (flag == 0) are excluded so + * that deleted entries do not inflate the range statistics used for pruning. */ public static QuadStats fromQuads(List quads) { long minS = Long.MAX_VALUE, maxS = Long.MIN_VALUE; @@ -45,6 +46,9 @@ public static QuadStats fromQuads(List quads) { long minO = Long.MAX_VALUE, maxO = Long.MIN_VALUE; long minC = Long.MAX_VALUE, maxC = Long.MIN_VALUE; for (long[] q : quads) { + if (q[4] == MemTable.FLAG_TOMBSTONE) { + continue; + } minS = Math.min(minS, q[0]); maxS = Math.max(maxS, q[0]); minP = Math.min(minP, q[1]); @@ -60,12 +64,12 @@ public static QuadStats fromQuads(List quads) { /** * Computes min/max stats from a list of QuadEntry objects. */ - public static QuadStats fromEntries(List entries) { + public static QuadStats fromEntries(List entries) { long minS = Long.MAX_VALUE, maxS = Long.MIN_VALUE; long minP = Long.MAX_VALUE, maxP = Long.MIN_VALUE; long minO = Long.MAX_VALUE, maxO = Long.MIN_VALUE; long minC = Long.MAX_VALUE, maxC = Long.MIN_VALUE; - for (ParquetFileBuilder.QuadEntry e : entries) { + for (QuadEntry e : entries) { minS = Math.min(minS, e.subject); maxS = Math.max(maxS, e.subject); minP = Math.min(minP, e.predicate); diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/S3ObjectStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/S3ObjectStore.java index 3e9bff4591b..cf3ca3812b5 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/S3ObjectStore.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/S3ObjectStore.java @@ -38,14 +38,19 @@ public class S3ObjectStore implements ObjectStore { public S3ObjectStore(String bucket, String endpoint, String region, String prefix, String accessKey, String secretKey, boolean forcePathStyle) { this.bucket = bucket; - this.prefix = (prefix != null && !prefix.isEmpty() && !prefix.endsWith("/")) ? prefix + "/" - : (prefix != null ? prefix : ""); + if (prefix == null || prefix.isEmpty()) { + this.prefix = ""; + } else if (prefix.endsWith("/")) { + this.prefix = prefix; + } else { + this.prefix = prefix + "/"; + } - MinioClient.Builder builder = MinioClient.builder() + this.client = MinioClient.builder() .endpoint(endpoint) .credentials(accessKey, secretKey) - .region(region); - this.client = builder.build(); + .region(region) + .build(); } private String resolve(String key) { @@ -68,37 +73,32 @@ public void put(String key, byte[] data) { @Override public byte[] get(String key) { - try (InputStream is = client.getObject(GetObjectArgs.builder() + return executeGet(GetObjectArgs.builder() .bucket(bucket) .object(resolve(key)) - .build())) { - return is.readAllBytes(); - } catch (ErrorResponseException e) { - if ("NoSuchKey".equals(e.errorResponse().code())) { - return null; - } - throw new UncheckedIOException(new IOException("Failed to get " + key, e)); - } catch (Exception e) { - throw new UncheckedIOException(new IOException("Failed to get " + key, e)); - } + .build(), key); } @Override public byte[] getRange(String key, long offset, long length) { - try (InputStream is = client.getObject(GetObjectArgs.builder() + return executeGet(GetObjectArgs.builder() .bucket(bucket) .object(resolve(key)) .offset(offset) .length(length) - .build())) { + .build(), key); + } + + private byte[] executeGet(GetObjectArgs args, String key) { + try (InputStream is = client.getObject(args)) { return is.readAllBytes(); } catch (ErrorResponseException e) { if ("NoSuchKey".equals(e.errorResponse().code())) { return null; } - throw new UncheckedIOException(new IOException("Failed to getRange " + key, e)); + throw new UncheckedIOException(new IOException("Failed to get " + key, e)); } catch (Exception e) { - throw new UncheckedIOException(new IOException("Failed to getRange " + key, e)); + throw new UncheckedIOException(new IOException("Failed to get " + key, e)); } } diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceMinioIT.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceMinioIT.java index 5b312053362..62c7ecaaaf2 100644 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceMinioIT.java +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceMinioIT.java @@ -10,9 +10,12 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.s3; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.common.transaction.IsolationLevels; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.model.ValueFactory; @@ -81,7 +84,7 @@ void writeFlushShutdownRestart() throws Exception { S3SailStore sailStore = new S3SailStore(config, objectStore); var source = sailStore.getExplicitSailSource(); - var sink = source.sink(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + var sink = source.sink(IsolationLevels.NONE); sink.approve(s, p, o, null); sink.flush(); sailStore.close(); @@ -94,7 +97,7 @@ void writeFlushShutdownRestart() throws Exception { S3SailStore sailStore = new S3SailStore(config, objectStore); var source = sailStore.getExplicitSailSource(); - var dataset = source.dataset(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + var dataset = source.dataset(IsolationLevels.NONE); CloseableIteration iter = dataset.getStatements(null, null, null); assertTrue(iter.hasNext()); diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceTest.java index 4a8374ac5fc..768da6278d0 100644 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceTest.java +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3PersistenceTest.java @@ -10,7 +10,9 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.s3; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.file.Path; import java.util.ArrayList; @@ -18,6 +20,7 @@ import java.util.stream.Collectors; import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.common.transaction.IsolationLevels; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.model.ValueFactory; @@ -51,7 +54,7 @@ void writeFlushShutdownRestart_quadsReadable() throws Exception { // Add statements using sink var source = sailStore.getExplicitSailSource(); - var sink = source.sink(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + var sink = source.sink(IsolationLevels.NONE); // We need to resolve values through the sail's value factory S3ValueStore vs = (S3ValueStore) svf; @@ -71,7 +74,7 @@ void writeFlushShutdownRestart_quadsReadable() throws Exception { ValueFactory svf = sailStore.getValueFactory(); var source = sailStore.getExplicitSailSource(); - var dataset = source.dataset(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + var dataset = source.dataset(IsolationLevels.NONE); CloseableIteration iter = dataset.getStatements(null, null, null); assertTrue(iter.hasNext(), "Should have at least one statement after restart"); @@ -104,7 +107,7 @@ void multipleFlushes_allDataReadable() throws Exception { S3SailStore sailStore = new S3SailStore(config, store); var source = sailStore.getExplicitSailSource(); - var sink = source.sink(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + var sink = source.sink(IsolationLevels.NONE); sink.approve(s1, p, o, null); sink.flush(); @@ -120,7 +123,7 @@ void multipleFlushes_allDataReadable() throws Exception { S3SailStore sailStore = new S3SailStore(config, store); var source = sailStore.getExplicitSailSource(); - var dataset = source.dataset(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + var dataset = source.dataset(IsolationLevels.NONE); CloseableIteration iter = dataset.getStatements(null, null, null); int count = 0; @@ -151,7 +154,7 @@ void deleteAndRestart_deletedQuadsGone() throws Exception { S3SailStore sailStore = new S3SailStore(config, store); var source = sailStore.getExplicitSailSource(); - var sink = source.sink(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + var sink = source.sink(IsolationLevels.NONE); sink.approve(s1, p, o, null); sink.approve(s2, p, o, null); @@ -170,7 +173,7 @@ void deleteAndRestart_deletedQuadsGone() throws Exception { S3SailStore sailStore = new S3SailStore(config, store); var source = sailStore.getExplicitSailSource(); - var dataset = source.dataset(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + var dataset = source.dataset(IsolationLevels.NONE); CloseableIteration iter = dataset.getStatements(null, null, null); assertTrue(iter.hasNext()); @@ -200,7 +203,7 @@ void multiplePredicates_allQueriesWork() throws Exception { { S3SailStore sailStore = new S3SailStore(config, store); var source = sailStore.getExplicitSailSource(); - var sink = source.sink(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + var sink = source.sink(IsolationLevels.NONE); sink.approve(s1, p1, o1, null); sink.approve(s1, p2, o2, null); @@ -213,7 +216,7 @@ void multiplePredicates_allQueriesWork() throws Exception { { S3SailStore sailStore = new S3SailStore(config, store); var source = sailStore.getExplicitSailSource(); - var dataset = source.dataset(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + var dataset = source.dataset(IsolationLevels.NONE); // All statements List all = drain(dataset.getStatements(null, null, null)); @@ -260,7 +263,7 @@ void fileLayout_flatDataDirectory() throws Exception { { S3SailStore sailStore = new S3SailStore(config, store); var source = sailStore.getExplicitSailSource(); - var sink = source.sink(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + var sink = source.sink(IsolationLevels.NONE); sink.approve(s, p, o, null); sink.flush(); sailStore.close(); @@ -301,7 +304,7 @@ void contextQuery_afterRestart() throws Exception { { S3SailStore sailStore = new S3SailStore(config, store); var source = sailStore.getExplicitSailSource(); - var sink = source.sink(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + var sink = source.sink(IsolationLevels.NONE); sink.approve(s, p, o, g1); sink.approve(s, p, o, g2); sink.flush(); @@ -311,7 +314,7 @@ void contextQuery_afterRestart() throws Exception { { S3SailStore sailStore = new S3SailStore(config, store); var source = sailStore.getExplicitSailSource(); - var dataset = source.dataset(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + var dataset = source.dataset(IsolationLevels.NONE); // Query by context g1 List byG1 = drain( @@ -347,7 +350,7 @@ void namespacePersistence() throws Exception { S3SailStore sailStore = new S3SailStore(config, store); var source = sailStore.getExplicitSailSource(); - var sink = source.sink(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + var sink = source.sink(IsolationLevels.NONE); sink.setNamespace("ex", "http://example.org/"); sink.flush(); sailStore.close(); @@ -358,7 +361,7 @@ void namespacePersistence() throws Exception { S3SailStore sailStore = new S3SailStore(config, store); var source = sailStore.getExplicitSailSource(); - var dataset = source.dataset(org.eclipse.rdf4j.common.transaction.IsolationLevels.NONE); + var dataset = source.dataset(IsolationLevels.NONE); assertEquals("http://example.org/", dataset.getNamespace("ex")); diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3SparqlOrderByTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3SparqlOrderByTest.java index bcea8fb485a..3f3736c072d 100644 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3SparqlOrderByTest.java +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3SparqlOrderByTest.java @@ -19,6 +19,6 @@ public class S3SparqlOrderByTest extends SparqlOrderByTest { @Override protected Repository newRepository() { - return new SailRepository(new S3Store(new S3StoreConfig("spoc"))); + return new SailRepository(new S3Store(new S3StoreConfig())); } } diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreConnectionTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreConnectionTest.java index 256131ec4f4..ec7a292aa5d 100644 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreConnectionTest.java +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreConnectionTest.java @@ -21,6 +21,6 @@ public class S3StoreConnectionTest extends RepositoryConnectionTest { @Override protected Repository createRepository(File dataDir) { - return new SailRepository(new S3Store(new S3StoreConfig("spoc"))); + return new SailRepository(new S3Store(new S3StoreConfig())); } } diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreIsolationLevelTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreIsolationLevelTest.java index 20715c3b7cf..a37fa581dec 100644 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreIsolationLevelTest.java +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreIsolationLevelTest.java @@ -19,6 +19,6 @@ public class S3StoreIsolationLevelTest extends SailIsolationLevelTest { @Override protected NotifyingSail createSail() throws SailException { - return new S3Store(new S3StoreConfig("spoc,posc")); + return new S3Store(new S3StoreConfig()); } } diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreRepositoryTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreRepositoryTest.java index 716520425e6..33c138afef5 100644 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreRepositoryTest.java +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreRepositoryTest.java @@ -19,6 +19,6 @@ public class S3StoreRepositoryTest extends RepositoryTest { @Override protected Repository createRepository() { - return new SailRepository(new S3Store(new S3StoreConfig("spoc"))); + return new SailRepository(new S3Store(new S3StoreConfig())); } } diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreTest.java index 921fd67bb19..4e624b1ec50 100644 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreTest.java +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3StoreTest.java @@ -19,7 +19,7 @@ public class S3StoreTest extends RDFNotifyingStoreTest { @Override protected NotifyingSail createSail() throws SailException { - NotifyingSail sail = new S3Store(new S3StoreConfig("spoc,posc")); + NotifyingSail sail = new S3Store(new S3StoreConfig()); sail.init(); return sail; } diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3ValueStoreSerializationTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3ValueStoreSerializationTest.java index 5e69022d14c..6158ca991f8 100644 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3ValueStoreSerializationTest.java +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/S3ValueStoreSerializationTest.java @@ -10,7 +10,9 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.s3; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.file.Path; diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/CatalogTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/CatalogTest.java index 4e7b41700ba..8945c04e406 100644 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/CatalogTest.java +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/CatalogTest.java @@ -10,7 +10,8 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.s3.storage; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.file.Path; import java.util.List; diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MemTableReorderTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MemTableReorderTest.java index 41c883553ef..86e060f6c99 100644 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MemTableReorderTest.java +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MemTableReorderTest.java @@ -10,7 +10,10 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.s3.storage; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.List; diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MergeIteratorTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MergeIteratorTest.java index bd4eaee8507..6702130c0af 100644 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MergeIteratorTest.java +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/MergeIteratorTest.java @@ -10,7 +10,9 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.s3.storage; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import java.util.ArrayList; import java.util.Arrays; diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/ParquetRoundTripTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/ParquetRoundTripTest.java index 0f74d9fea4e..3b6f24e5741 100644 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/ParquetRoundTripTest.java +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/ParquetRoundTripTest.java @@ -10,7 +10,9 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.s3.storage; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import java.util.ArrayList; import java.util.List; @@ -26,10 +28,10 @@ class ParquetRoundTripTest { @Test void roundTrip_spocOrder_allFieldsPreserved() { QuadIndex spoc = new QuadIndex("spoc"); - List entries = List.of( - new ParquetFileBuilder.QuadEntry(1, 2, 3, 4, MemTable.FLAG_EXPLICIT), - new ParquetFileBuilder.QuadEntry(5, 6, 7, 8, MemTable.FLAG_INFERRED), - new ParquetFileBuilder.QuadEntry(9, 10, 11, 0, MemTable.FLAG_TOMBSTONE)); + List entries = List.of( + new QuadEntry(1, 2, 3, 4, MemTable.FLAG_EXPLICIT), + new QuadEntry(5, 6, 7, 8, MemTable.FLAG_INFERRED), + new QuadEntry(9, 10, 11, 0, MemTable.FLAG_TOMBSTONE)); byte[] parquetData = ParquetFileBuilder.build(entries, ParquetSchemas.SortOrder.SPOC); ParquetQuadSource source = new ParquetQuadSource(parquetData, spoc); @@ -49,10 +51,10 @@ void roundTrip_spocOrder_allFieldsPreserved() { void roundTrip_opscOrder_keysSortedByObject() { QuadIndex opsc = new QuadIndex("opsc"); // Written sorted in OPSC order (by object: 10, 20, 30) - List entries = List.of( - new ParquetFileBuilder.QuadEntry(100, 200, 10, 0, MemTable.FLAG_EXPLICIT), - new ParquetFileBuilder.QuadEntry(300, 400, 20, 0, MemTable.FLAG_EXPLICIT), - new ParquetFileBuilder.QuadEntry(500, 600, 30, 0, MemTable.FLAG_EXPLICIT)); + List entries = List.of( + new QuadEntry(100, 200, 10, 0, MemTable.FLAG_EXPLICIT), + new QuadEntry(300, 400, 20, 0, MemTable.FLAG_EXPLICIT), + new QuadEntry(500, 600, 30, 0, MemTable.FLAG_EXPLICIT)); byte[] parquetData = ParquetFileBuilder.build(entries, ParquetSchemas.SortOrder.OPSC); ParquetQuadSource source = new ParquetQuadSource(parquetData, opsc); @@ -69,10 +71,10 @@ void roundTrip_opscOrder_keysSortedByObject() { void roundTrip_cspoOrder_keysSortedByContext() { QuadIndex cspo = new QuadIndex("cspo"); // Written sorted in CSPO order (by context: 5, 10, 15) - List entries = List.of( - new ParquetFileBuilder.QuadEntry(1, 2, 3, 5, MemTable.FLAG_EXPLICIT), - new ParquetFileBuilder.QuadEntry(4, 5, 6, 10, MemTable.FLAG_EXPLICIT), - new ParquetFileBuilder.QuadEntry(7, 8, 9, 15, MemTable.FLAG_EXPLICIT)); + List entries = List.of( + new QuadEntry(1, 2, 3, 5, MemTable.FLAG_EXPLICIT), + new QuadEntry(4, 5, 6, 10, MemTable.FLAG_EXPLICIT), + new QuadEntry(7, 8, 9, 15, MemTable.FLAG_EXPLICIT)); byte[] parquetData = ParquetFileBuilder.build(entries, ParquetSchemas.SortOrder.CSPO); ParquetQuadSource source = new ParquetQuadSource(parquetData, cspo); @@ -87,10 +89,10 @@ void roundTrip_cspoOrder_keysSortedByContext() { @Test void roundTrip_filterBySubject() { QuadIndex spoc = new QuadIndex("spoc"); - List entries = List.of( - new ParquetFileBuilder.QuadEntry(1, 2, 3, 0, MemTable.FLAG_EXPLICIT), - new ParquetFileBuilder.QuadEntry(5, 6, 7, 0, MemTable.FLAG_EXPLICIT), - new ParquetFileBuilder.QuadEntry(10, 11, 12, 0, MemTable.FLAG_EXPLICIT)); + List entries = List.of( + new QuadEntry(1, 2, 3, 0, MemTable.FLAG_EXPLICIT), + new QuadEntry(5, 6, 7, 0, MemTable.FLAG_EXPLICIT), + new QuadEntry(10, 11, 12, 0, MemTable.FLAG_EXPLICIT)); byte[] parquetData = ParquetFileBuilder.build(entries, ParquetSchemas.SortOrder.SPOC); ParquetQuadSource source = new ParquetQuadSource(parquetData, spoc, 5, -1, -1, -1); @@ -103,10 +105,10 @@ void roundTrip_filterBySubject() { @Test void roundTrip_filterByPredicate() { QuadIndex spoc = new QuadIndex("spoc"); - List entries = List.of( - new ParquetFileBuilder.QuadEntry(1, 10, 3, 0, MemTable.FLAG_EXPLICIT), - new ParquetFileBuilder.QuadEntry(2, 20, 4, 0, MemTable.FLAG_EXPLICIT), - new ParquetFileBuilder.QuadEntry(3, 10, 5, 0, MemTable.FLAG_EXPLICIT)); + List entries = List.of( + new QuadEntry(1, 10, 3, 0, MemTable.FLAG_EXPLICIT), + new QuadEntry(2, 20, 4, 0, MemTable.FLAG_EXPLICIT), + new QuadEntry(3, 10, 5, 0, MemTable.FLAG_EXPLICIT)); byte[] parquetData = ParquetFileBuilder.build(entries, ParquetSchemas.SortOrder.SPOC); ParquetQuadSource source = new ParquetQuadSource(parquetData, spoc, -1, 10, -1, -1); @@ -121,10 +123,10 @@ void roundTrip_filterByPredicate() { @Test void roundTrip_filterByMultipleComponents() { QuadIndex spoc = new QuadIndex("spoc"); - List entries = List.of( - new ParquetFileBuilder.QuadEntry(1, 2, 3, 4, MemTable.FLAG_EXPLICIT), - new ParquetFileBuilder.QuadEntry(1, 2, 99, 4, MemTable.FLAG_EXPLICIT), - new ParquetFileBuilder.QuadEntry(1, 99, 3, 4, MemTable.FLAG_EXPLICIT)); + List entries = List.of( + new QuadEntry(1, 2, 3, 4, MemTable.FLAG_EXPLICIT), + new QuadEntry(1, 2, 99, 4, MemTable.FLAG_EXPLICIT), + new QuadEntry(1, 99, 3, 4, MemTable.FLAG_EXPLICIT)); byte[] parquetData = ParquetFileBuilder.build(entries, ParquetSchemas.SortOrder.SPOC); ParquetQuadSource source = new ParquetQuadSource(parquetData, spoc, 1, 2, 3, 4); @@ -147,15 +149,15 @@ void mergeIterator_acrossParquetSources() { QuadIndex spoc = new QuadIndex("spoc"); // File 1: newer epoch - List file1 = List.of( - new ParquetFileBuilder.QuadEntry(1, 2, 3, 0, MemTable.FLAG_EXPLICIT), - new ParquetFileBuilder.QuadEntry(5, 6, 7, 0, MemTable.FLAG_EXPLICIT)); + List file1 = List.of( + new QuadEntry(1, 2, 3, 0, MemTable.FLAG_EXPLICIT), + new QuadEntry(5, 6, 7, 0, MemTable.FLAG_EXPLICIT)); byte[] data1 = ParquetFileBuilder.build(file1, ParquetSchemas.SortOrder.SPOC); // File 2: older epoch, overlaps on (1,2,3,0) - List file2 = List.of( - new ParquetFileBuilder.QuadEntry(1, 2, 3, 0, MemTable.FLAG_INFERRED), - new ParquetFileBuilder.QuadEntry(10, 11, 12, 0, MemTable.FLAG_EXPLICIT)); + List file2 = List.of( + new QuadEntry(1, 2, 3, 0, MemTable.FLAG_INFERRED), + new QuadEntry(10, 11, 12, 0, MemTable.FLAG_EXPLICIT)); byte[] data2 = ParquetFileBuilder.build(file2, ParquetSchemas.SortOrder.SPOC); List sources = List.of( diff --git a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndexSelectionTest.java b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndexSelectionTest.java index 8fa8b7082b0..eb25cff98fd 100644 --- a/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndexSelectionTest.java +++ b/core/sail/s3/src/test/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndexSelectionTest.java @@ -10,7 +10,7 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.s3.storage; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertEquals; import java.util.List; From 4da224163356870964dbeb2a9d818fbd17265b2a Mon Sep 17 00:00:00 2001 From: Chengxu Bian Date: Sun, 1 Mar 2026 01:02:47 -0500 Subject: [PATCH 07/10] refactor: simplify and optimize S3 SAIL internals - Eliminate double serialization of values/namespaces on flush - Make MemTable.approximateSizeInBytes() O(1) via AtomicLong counter - Precompute field indices in sort comparator to avoid hot-loop switch - Remove duplicate rowGroupSize/pageSize fields from S3SailStore/Compactor - Centralize storage key literals into named constants - Add named type discriminator constants in S3ValueStore - Unify QuadStats accumulation with shared Accumulator inner class - Centralize data key generation in Catalog.dataKey() - Restrict ParquetFileInfo 14-param constructor to package-private --- .../rdf4j/sail/s3/S3NamespaceStore.java | 9 ++- .../eclipse/rdf4j/sail/s3/S3SailStore.java | 62 +++++++++---------- .../eclipse/rdf4j/sail/s3/S3ValueStore.java | 21 ++++--- .../rdf4j/sail/s3/storage/Catalog.java | 20 ++++-- .../rdf4j/sail/s3/storage/Compactor.java | 12 +--- .../rdf4j/sail/s3/storage/MemTable.java | 26 +++++--- .../rdf4j/sail/s3/storage/QuadStats.java | 51 +++++++-------- 7 files changed, 109 insertions(+), 92 deletions(-) diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3NamespaceStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3NamespaceStore.java index 02e85883117..fbf52a039c4 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3NamespaceStore.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3NamespaceStore.java @@ -12,6 +12,7 @@ import java.io.IOException; import java.io.UncheckedIOException; +import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; @@ -27,6 +28,8 @@ */ class S3NamespaceStore implements Iterable { + static final String NAMESPACES_KEY = "namespaces/current"; + private final Map namespacesMap = new LinkedHashMap<>(16); public synchronized String getNamespace(String prefix) { @@ -61,7 +64,7 @@ public synchronized void clear() { @SuppressWarnings("unchecked") synchronized void deserialize(ObjectStore objectStore, ObjectMapper mapper) { - byte[] data = objectStore.get("namespaces/current"); + byte[] data = objectStore.get(NAMESPACES_KEY); if (data == null) { return; } @@ -79,14 +82,14 @@ synchronized void deserialize(ObjectStore objectStore, ObjectMapper mapper) { synchronized void serialize(ObjectStore objectStore, ObjectMapper mapper) { try { - List> entries = new java.util.ArrayList<>(); + List> entries = new ArrayList<>(); for (SimpleNamespace ns : namespacesMap.values()) { Map entry = new LinkedHashMap<>(); entry.put("prefix", ns.getPrefix()); entry.put("name", ns.getName()); entries.add(entry); } - objectStore.put("namespaces/current", mapper.writeValueAsBytes(entries)); + objectStore.put(NAMESPACES_KEY, mapper.writeValueAsBytes(entries)); } catch (IOException e) { throw new UncheckedIOException("Failed to serialize namespaces", e); } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java index 1b0c9c0d28d..ce03080c5a9 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java @@ -95,9 +95,6 @@ class S3SailStore implements SailStore { ALL_INDEXES = List.copyOf(indexes); } - private static final int DEFAULT_ROW_GROUP_SIZE = 8 * 1024 * 1024; // 8 MiB - private static final int DEFAULT_PAGE_SIZE = 64 * 1024; // 64 KiB - private final S3ValueStore valueStore; private final S3NamespaceStore namespaceStore; @@ -114,8 +111,6 @@ class S3SailStore implements SailStore { private final TieredCache cache; private final CompactionPolicy compactionPolicy; private final Compactor compactor; - private final int rowGroupSize; - private final int pageSize; /** * A lock to control concurrent access by {@link S3SailSink} to the stores. @@ -147,8 +142,6 @@ private static ObjectStore createObjectStore(S3StoreConfig config) { this.namespaceStore = new S3NamespaceStore(); this.objectStore = objectStore; this.memTableFlushSize = config.getMemTableSize(); - this.rowGroupSize = DEFAULT_ROW_GROUP_SIZE; - this.pageSize = DEFAULT_PAGE_SIZE; // Single SPOC index for the MemTable this.memTable = new MemTable(SPOC_INDEX); @@ -165,7 +158,7 @@ private static ObjectStore createObjectStore(S3StoreConfig config) { config.getDiskCacheSize(), objectStore); this.compactionPolicy = new CompactionPolicy(); - this.compactor = new Compactor(objectStore, cache, rowGroupSize, pageSize); + this.compactor = new Compactor(objectStore, cache); // Deserialize value store and namespaces if (catalog.getNextValueId() > 0) { @@ -227,12 +220,11 @@ private void flushToObjectStore() { return; } - // Always persist namespaces and values (they may have changed without any quad writes) - valueStore.serialize(objectStore); - namespaceStore.serialize(objectStore, jsonMapper); - if (memTable.size() == 0) { - return; // no quads to flush — avoid wasting epoch numbers and S3 writes + // No quads to flush — still persist namespaces/values (they may have changed) + valueStore.serialize(objectStore); + namespaceStore.serialize(objectStore, jsonMapper); + return; } long epoch = epochCounter.getAndIncrement(); @@ -252,16 +244,14 @@ private void flushToObjectStore() { private static List collectQuads(MemTable frozen) { List allQuads = new ArrayList<>(frozen.size()); - long[] quad = new long[4]; + long[] scratch = new long[4]; for (Map.Entry entry : frozen.getData().entrySet()) { - long[] q = new long[5]; // s, p, o, c, flag - frozen.getIndex().keyToQuad(entry.getKey(), quad); - q[0] = quad[QuadIndex.SUBJ_IDX]; - q[1] = quad[QuadIndex.PRED_IDX]; - q[2] = quad[QuadIndex.OBJ_IDX]; - q[3] = quad[QuadIndex.CONTEXT_IDX]; - q[4] = entry.getValue()[0]; - allQuads.add(q); + frozen.getIndex().keyToQuad(entry.getKey(), scratch); + allQuads.add(new long[] { + scratch[QuadIndex.SUBJ_IDX], scratch[QuadIndex.PRED_IDX], + scratch[QuadIndex.OBJ_IDX], scratch[QuadIndex.CONTEXT_IDX], + entry.getValue()[0] + }); } return allQuads; } @@ -272,10 +262,9 @@ private void writeParquetFiles(long epoch, List allQuads, QuadStats stat List sorted = sortQuadEntries(allQuads, sortIndex); ParquetSchemas.SortOrder sortOrder = ParquetSchemas.SortOrder.fromSuffix(sortSuffix); - byte[] parquetData = ParquetFileBuilder.build(sorted, ParquetSchemas.QUAD_SCHEMA, - sortOrder, rowGroupSize, pageSize); + byte[] parquetData = ParquetFileBuilder.build(sorted, sortOrder); - String s3Key = "data/L0-" + String.format("%05d", epoch) + "-" + sortSuffix + ".parquet"; + String s3Key = Catalog.dataKey(0, epoch, sortSuffix); objectStore.put(s3Key, parquetData); if (cache != null) { @@ -304,15 +293,24 @@ private void persistMetadata(long epoch) { private static List sortQuadEntries(List quads, QuadIndex sortIndex) { List sorted = new ArrayList<>(quads); String seq = sortIndex.getFieldSeqString(); + int i0 = QuadIndex.fieldCharToIdx(seq.charAt(0)); + int i1 = QuadIndex.fieldCharToIdx(seq.charAt(1)); + int i2 = QuadIndex.fieldCharToIdx(seq.charAt(2)); + int i3 = QuadIndex.fieldCharToIdx(seq.charAt(3)); sorted.sort((a, b) -> { - for (int i = 0; i < 4; i++) { - int idx = QuadIndex.fieldCharToIdx(seq.charAt(i)); - int cmp = Long.compare(a[idx], b[idx]); - if (cmp != 0) { - return cmp; - } + int cmp = Long.compare(a[i0], b[i0]); + if (cmp != 0) { + return cmp; + } + cmp = Long.compare(a[i1], b[i1]); + if (cmp != 0) { + return cmp; + } + cmp = Long.compare(a[i2], b[i2]); + if (cmp != 0) { + return cmp; } - return 0; + return Long.compare(a[i3], b[i3]); }); List result = new ArrayList<>(sorted.size()); diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3ValueStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3ValueStore.java index f89b420a624..5d56e69af62 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3ValueStore.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3ValueStore.java @@ -35,6 +35,11 @@ class S3ValueStore extends AbstractValueFactory { static final long UNKNOWN_ID = -1; + static final String VALUES_KEY = "values/current"; + + private static final byte TYPE_IRI = 0; + private static final byte TYPE_LITERAL = 1; + private static final byte TYPE_BNODE = 2; private final ConcurrentHashMap valueToId = new ConcurrentHashMap<>(); private final ConcurrentHashMap idToValue = new ConcurrentHashMap<>(); @@ -115,16 +120,16 @@ void serialize(ObjectStore objectStore) { Value val = entry.getValue(); if (val instanceof IRI) { - out.writeByte(0); + out.writeByte(TYPE_IRI); writeBytes(out, buf, val.stringValue().getBytes(StandardCharsets.UTF_8)); } else if (val instanceof Literal) { - out.writeByte(1); + out.writeByte(TYPE_LITERAL); Literal lit = (Literal) val; writeBytes(out, buf, lit.getLabel().getBytes(StandardCharsets.UTF_8)); writeBytes(out, buf, lit.getDatatype().stringValue().getBytes(StandardCharsets.UTF_8)); writeBytes(out, buf, lit.getLanguage().orElse("").getBytes(StandardCharsets.UTF_8)); } else if (val instanceof BNode) { - out.writeByte(2); + out.writeByte(TYPE_BNODE); writeBytes(out, buf, ((BNode) val).getID().getBytes(StandardCharsets.UTF_8)); } else { throw new IllegalStateException("Unsupported value type: " + val.getClass()); @@ -132,7 +137,7 @@ void serialize(ObjectStore objectStore) { } out.flush(); - objectStore.put("values/current", baos.toByteArray()); + objectStore.put(VALUES_KEY, baos.toByteArray()); } catch (IOException e) { throw new UncheckedIOException(e); } @@ -142,7 +147,7 @@ void serialize(ObjectStore objectStore) { * Deserializes the value store from the object store. */ void deserialize(ObjectStore objectStore, long nextValueId) { - byte[] data = objectStore.get("values/current"); + byte[] data = objectStore.get(VALUES_KEY); if (data == null) { return; } @@ -157,14 +162,14 @@ void deserialize(ObjectStore objectStore, long nextValueId) { Value val; switch (type) { - case 0: { // IRI + case TYPE_IRI: { int len = (int) Varint.readUnsigned(bb); byte[] payload = new byte[len]; bb.get(payload); val = createIRI(new String(payload, StandardCharsets.UTF_8)); break; } - case 1: { // Literal + case TYPE_LITERAL: { int labelLen = (int) Varint.readUnsigned(bb); byte[] labelBytes = new byte[labelLen]; bb.get(labelBytes); @@ -188,7 +193,7 @@ void deserialize(ObjectStore objectStore, long nextValueId) { } break; } - case 2: { // BNode + case TYPE_BNODE: { int len = (int) Varint.readUnsigned(bb); byte[] payload = new byte[len]; bb.get(payload); diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java index 9a3abdc1cea..356525f38ec 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java @@ -49,6 +49,9 @@ @JsonIgnoreProperties(ignoreUnknown = true) public class Catalog { + static final String CATALOG_POINTER_KEY = "catalog/current"; + private static final String CATALOG_DIR = "catalog/"; + @JsonProperty("version") private int version = 3; @@ -108,11 +111,11 @@ public void setFiles(List files) { * @return the loaded catalog, or an empty catalog if none exists */ public static Catalog load(ObjectStore store, ObjectMapper mapper) { - byte[] pointer = store.get("catalog/current"); + byte[] pointer = store.get(CATALOG_POINTER_KEY); if (pointer == null) { return new Catalog(); } - String catalogKey = "catalog/" + new String(pointer, StandardCharsets.UTF_8).trim(); + String catalogKey = CATALOG_DIR + new String(pointer, StandardCharsets.UTF_8).trim(); byte[] json = store.get(catalogKey); if (json == null) { return new Catalog(); @@ -140,8 +143,8 @@ public void save(ObjectStore store, ObjectMapper mapper, long epoch) { try { String versionedKey = "v" + epoch + ".json"; byte[] json = mapper.writerWithDefaultPrettyPrinter().writeValueAsBytes(this); - store.put("catalog/" + versionedKey, json); - store.put("catalog/current", versionedKey.getBytes(StandardCharsets.UTF_8)); + store.put(CATALOG_DIR + versionedKey, json); + store.put(CATALOG_POINTER_KEY, versionedKey.getBytes(StandardCharsets.UTF_8)); } catch (IOException e) { throw new UncheckedIOException("Failed to save catalog", e); } @@ -187,6 +190,13 @@ public List getFilesForSortOrder(String sortOrder) { return result; } + /** + * Generates the S3 key for a data file at the given level, epoch, and sort suffix. + */ + public static String dataKey(int level, long epoch, String sortSuffix) { + return "data/L" + level + "-" + String.format("%05d", epoch) + "-" + sortSuffix + ".parquet"; + } + /** * Metadata about a single Parquet file in the catalog, including its location, sort order, size, and min/max * statistics for subject, predicate, object, and context columns. @@ -246,7 +256,7 @@ public ParquetFileInfo(String s3Key, int level, String sortOrder, long rowCount, stats.minObject, stats.maxObject, stats.minContext, stats.maxContext); } - public ParquetFileInfo(String s3Key, int level, String sortOrder, long rowCount, + ParquetFileInfo(String s3Key, int level, String sortOrder, long rowCount, long epoch, long sizeBytes, long minSubject, long maxSubject, long minPredicate, long maxPredicate, diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java index d5e16518f9b..049c325e13d 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java @@ -38,14 +38,10 @@ public class Compactor { private final ObjectStore objectStore; private final TieredCache cache; - private final int rowGroupSize; - private final int pageSize; - public Compactor(ObjectStore objectStore, TieredCache cache, int rowGroupSize, int pageSize) { + public Compactor(ObjectStore objectStore, TieredCache cache) { this.objectStore = objectStore; this.cache = cache; - this.rowGroupSize = rowGroupSize; - this.pageSize = pageSize; } /** @@ -107,11 +103,9 @@ public CompactionResult compact(List sourceFiles, } // Write merged Parquet file - String s3Key = "data/L" + targetLevel + "-" - + String.format("%05d", epoch) + "-" + suffix + ".parquet"; + String s3Key = Catalog.dataKey(targetLevel, epoch, suffix); - byte[] parquetData = ParquetFileBuilder.build(merged, ParquetSchemas.QUAD_SCHEMA, - sortOrder, rowGroupSize, pageSize); + byte[] parquetData = ParquetFileBuilder.build(merged, sortOrder); objectStore.put(s3Key, parquetData); if (cache != null) { diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java index 413d5d9d213..3fc87d3ab5b 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MemTable.java @@ -20,6 +20,7 @@ import java.util.concurrent.ConcurrentNavigableMap; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; /** * In-memory sorted store for quads using a {@link ConcurrentSkipListMap}. Stores quads as varint-encoded byte[] keys @@ -45,9 +46,13 @@ public class MemTable { private static final byte[] VALUE_INFERRED = new byte[] { FLAG_INFERRED }; private static final byte[] VALUE_TOMBSTONE = new byte[] { FLAG_TOMBSTONE }; + /** Estimated overhead per skip-list entry: key array header + value array header + node overhead. */ + private static final int ENTRY_OVERHEAD = 16 + 16 + 64; + private final QuadIndex index; private final ConcurrentSkipListMap data; private final AtomicBoolean frozen = new AtomicBoolean(false); + private final AtomicLong estimatedBytes = new AtomicLong(); /** * Creates a new MemTable backed by the given index for key encoding. @@ -72,7 +77,10 @@ public MemTable(QuadIndex index) { public void put(long s, long p, long o, long c, boolean explicit) { checkNotFrozen(); byte[] key = index.toKeyBytes(s, p, o, c); - data.put(key, explicit ? VALUE_EXPLICIT : VALUE_INFERRED); + byte[] prev = data.put(key, explicit ? VALUE_EXPLICIT : VALUE_INFERRED); + if (prev == null) { + estimatedBytes.addAndGet(key.length + 1L + ENTRY_OVERHEAD); + } } /** @@ -87,7 +95,10 @@ public void put(long s, long p, long o, long c, boolean explicit) { public void remove(long s, long p, long o, long c) { checkNotFrozen(); byte[] key = index.toKeyBytes(s, p, o, c); - data.put(key, VALUE_TOMBSTONE); + byte[] prev = data.put(key, VALUE_TOMBSTONE); + if (prev == null) { + estimatedBytes.addAndGet(key.length + 1L + ENTRY_OVERHEAD); + } } /** @@ -138,16 +149,10 @@ public int size() { } /** - * Returns a rough estimate of memory consumption in bytes. + * Returns a rough estimate of memory consumption in bytes. O(1) — maintained incrementally on put/remove. */ public long approximateSizeInBytes() { - long size = 0; - for (Map.Entry entry : data.entrySet()) { - // key array overhead (16 bytes) + key data + value array overhead (16 bytes) + value data - // + map entry overhead (~64 bytes for skip list node) - size += 16 + entry.getKey().length + 16 + entry.getValue().length + 64; - } - return size; + return estimatedBytes.get(); } /** @@ -175,6 +180,7 @@ public boolean isFrozen() { public void clear() { checkNotFrozen(); data.clear(); + estimatedBytes.set(0); } /** diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadStats.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadStats.java index 66b248fc0a4..0b65e60850b 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadStats.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadStats.java @@ -41,44 +41,45 @@ public QuadStats(long minSubject, long maxSubject, * that deleted entries do not inflate the range statistics used for pruning. */ public static QuadStats fromQuads(List quads) { - long minS = Long.MAX_VALUE, maxS = Long.MIN_VALUE; - long minP = Long.MAX_VALUE, maxP = Long.MIN_VALUE; - long minO = Long.MAX_VALUE, maxO = Long.MIN_VALUE; - long minC = Long.MAX_VALUE, maxC = Long.MIN_VALUE; + Accumulator acc = new Accumulator(); for (long[] q : quads) { - if (q[4] == MemTable.FLAG_TOMBSTONE) { - continue; + if (q[4] != MemTable.FLAG_TOMBSTONE) { + acc.add(q[0], q[1], q[2], q[3]); } - minS = Math.min(minS, q[0]); - maxS = Math.max(maxS, q[0]); - minP = Math.min(minP, q[1]); - maxP = Math.max(maxP, q[1]); - minO = Math.min(minO, q[2]); - maxO = Math.max(maxO, q[2]); - minC = Math.min(minC, q[3]); - maxC = Math.max(maxC, q[3]); } - return new QuadStats(minS, maxS, minP, maxP, minO, maxO, minC, maxC); + return acc.build(); } /** * Computes min/max stats from a list of QuadEntry objects. */ public static QuadStats fromEntries(List entries) { + Accumulator acc = new Accumulator(); + for (QuadEntry e : entries) { + acc.add(e.subject, e.predicate, e.object, e.context); + } + return acc.build(); + } + + private static class Accumulator { long minS = Long.MAX_VALUE, maxS = Long.MIN_VALUE; long minP = Long.MAX_VALUE, maxP = Long.MIN_VALUE; long minO = Long.MAX_VALUE, maxO = Long.MIN_VALUE; long minC = Long.MAX_VALUE, maxC = Long.MIN_VALUE; - for (QuadEntry e : entries) { - minS = Math.min(minS, e.subject); - maxS = Math.max(maxS, e.subject); - minP = Math.min(minP, e.predicate); - maxP = Math.max(maxP, e.predicate); - minO = Math.min(minO, e.object); - maxO = Math.max(maxO, e.object); - minC = Math.min(minC, e.context); - maxC = Math.max(maxC, e.context); + + void add(long s, long p, long o, long c) { + minS = Math.min(minS, s); + maxS = Math.max(maxS, s); + minP = Math.min(minP, p); + maxP = Math.max(maxP, p); + minO = Math.min(minO, o); + maxO = Math.max(maxO, o); + minC = Math.min(minC, c); + maxC = Math.max(maxC, c); + } + + QuadStats build() { + return new QuadStats(minS, maxS, minP, maxP, minO, maxO, minC, maxC); } - return new QuadStats(minS, maxS, minP, maxP, minO, maxO, minC, maxC); } } From 80fd7f7f187aa32ea21d0b4a4bd4792df33adc82 Mon Sep 17 00:00:00 2001 From: Chengxu Bian Date: Wed, 4 Mar 2026 17:14:40 -0500 Subject: [PATCH 08/10] feat: parallel writes, streaming reads, bloom filters, and cardinality estimates - Parallelize Parquet writes with CompletableFuture (3 files written concurrently) - Optimize getContextIDs() to use CSPO index with last-seen dedup - Move compaction to background single-thread executor - Implement catalog-based cardinality estimation in S3EvaluationStatistics - Refactor ParquetQuadSource to stream rows lazily instead of loading all into memory - Add row group filtering using Parquet column statistics (min/max) - Add BloomFilter class for leading-component filtering per Parquet file - Add close() to RawEntrySource; MergeIterator closes sources when exhausted --- .../rdf4j/sail/s3/S3EvaluationStatistics.java | 95 ++++++- .../eclipse/rdf4j/sail/s3/S3SailStore.java | 259 +++++++++++------- .../rdf4j/sail/s3/storage/BloomFilter.java | 157 +++++++++++ .../rdf4j/sail/s3/storage/Catalog.java | 76 +++++ .../rdf4j/sail/s3/storage/Compactor.java | 31 ++- .../rdf4j/sail/s3/storage/MergeIterator.java | 11 + .../sail/s3/storage/ParquetQuadSource.java | 188 +++++++++---- .../rdf4j/sail/s3/storage/QuadIndex.java | 226 ++++----------- .../rdf4j/sail/s3/storage/QuadStats.java | 21 +- .../rdf4j/sail/s3/storage/RawEntrySource.java | 3 + 10 files changed, 727 insertions(+), 340 deletions(-) create mode 100644 core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/BloomFilter.java diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3EvaluationStatistics.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3EvaluationStatistics.java index 7660bb5de72..aa6255a0d1b 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3EvaluationStatistics.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3EvaluationStatistics.java @@ -10,21 +10,108 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.s3; +import java.util.List; + +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.query.algebra.StatementPattern; +import org.eclipse.rdf4j.query.algebra.Var; import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics; +import org.eclipse.rdf4j.sail.s3.storage.Catalog; /** - * Evaluation statistics for the S3 sail. Currently uses the base class's default cardinality estimation. This can be - * enhanced later to query the actual storage for more accurate estimates. + * Evaluation statistics for the S3 SAIL. Uses catalog-level file statistics (row counts and min/max ranges) to estimate + * statement pattern cardinality. */ class S3EvaluationStatistics extends EvaluationStatistics { + private final S3ValueStore valueStore; + private final Catalog catalog; + + S3EvaluationStatistics(S3ValueStore valueStore, Catalog catalog) { + this.valueStore = valueStore; + this.catalog = catalog; + } + @Override protected CardinalityCalculator createCardinalityCalculator() { return new S3CardinalityCalculator(); } protected class S3CardinalityCalculator extends CardinalityCalculator { - // Uses the default cardinality estimation from the base class. - // Can be enhanced to consult S3ValueStore and storage for accurate estimates. + + @Override + protected double getCardinality(StatementPattern sp) { + Value subj = getConstantValue(sp.getSubjectVar()); + if (subj != null && !(subj instanceof Resource)) { + subj = null; + } + Value pred = getConstantValue(sp.getPredicateVar()); + if (pred != null && !(pred instanceof IRI)) { + pred = null; + } + Value obj = getConstantValue(sp.getObjectVar()); + Value context = getConstantValue(sp.getContextVar()); + if (context != null && !(context instanceof Resource)) { + context = null; + } + return estimateCardinality((Resource) subj, (IRI) pred, obj, (Resource) context); + } + + private Value getConstantValue(Var var) { + return (var != null) ? var.getValue() : null; + } + } + + private double estimateCardinality(Resource subj, IRI pred, Value obj, Resource context) { + long subjID = S3ValueStore.UNKNOWN_ID; + if (subj != null) { + subjID = valueStore.getId(subj); + if (subjID == S3ValueStore.UNKNOWN_ID) { + return 0; + } + } + + long predID = S3ValueStore.UNKNOWN_ID; + if (pred != null) { + predID = valueStore.getId(pred); + if (predID == S3ValueStore.UNKNOWN_ID) { + return 0; + } + } + + long objID = S3ValueStore.UNKNOWN_ID; + if (obj != null) { + objID = valueStore.getId(obj); + if (objID == S3ValueStore.UNKNOWN_ID) { + return 0; + } + } + + long contextID = S3ValueStore.UNKNOWN_ID; + if (context != null) { + contextID = valueStore.getId(context); + if (contextID == S3ValueStore.UNKNOWN_ID) { + return 0; + } + } + + if (catalog == null) { + return 1000; + } + + // Sum row counts from files whose stats allow matching the pattern, + // then divide by number of sort orders since each triple is stored 3 times + List files = catalog.getFiles(); + long totalMatchingRows = 0; + for (Catalog.ParquetFileInfo file : files) { + if (file.mayContain(subjID, predID, objID, contextID)) { + totalMatchingRows += file.getRowCount(); + } + } + + int numSortOrders = 3; + return (double) totalMatchingRows / numSortOrders; } } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java index ce03080c5a9..4aca6cd3832 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java @@ -14,20 +14,20 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.Comparator; -import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantLock; import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.common.iteration.CloseableIteratorIteration; -import org.eclipse.rdf4j.common.iteration.ConvertingIteration; import org.eclipse.rdf4j.common.iteration.EmptyIteration; -import org.eclipse.rdf4j.common.iteration.FilterIteration; import org.eclipse.rdf4j.common.iteration.UnionIteration; import org.eclipse.rdf4j.common.order.StatementOrder; import org.eclipse.rdf4j.common.transaction.IsolationLevel; @@ -46,6 +46,7 @@ import org.eclipse.rdf4j.sail.base.SailStore; import org.eclipse.rdf4j.sail.s3.cache.TieredCache; import org.eclipse.rdf4j.sail.s3.config.S3StoreConfig; +import org.eclipse.rdf4j.sail.s3.storage.BloomFilter; import org.eclipse.rdf4j.sail.s3.storage.Catalog; import org.eclipse.rdf4j.sail.s3.storage.CompactionPolicy; import org.eclipse.rdf4j.sail.s3.storage.Compactor; @@ -84,6 +85,7 @@ class S3SailStore implements SailStore { private static final Logger logger = LoggerFactory.getLogger(S3SailStore.class); private static final QuadIndex SPOC_INDEX = new QuadIndex("spoc"); + private static final QuadIndex CSPO_INDEX = new QuadIndex("cspo"); private static final List ALL_INDEXES; static { @@ -111,6 +113,9 @@ class S3SailStore implements SailStore { private final TieredCache cache; private final CompactionPolicy compactionPolicy; private final Compactor compactor; + private final ExecutorService writeExecutor; + private final ExecutorService compactionExecutor; + private volatile CompletableFuture pendingCompaction; /** * A lock to control concurrent access by {@link S3SailSink} to the stores. @@ -159,6 +164,9 @@ private static ObjectStore createObjectStore(S3StoreConfig config) { this.compactionPolicy = new CompactionPolicy(); this.compactor = new Compactor(objectStore, cache); + this.writeExecutor = Executors.newFixedThreadPool( + Math.min(ALL_INDEXES.size(), Runtime.getRuntime().availableProcessors())); + this.compactionExecutor = Executors.newSingleThreadExecutor(); // Deserialize value store and namespaces if (catalog.getNextValueId() > 0) { @@ -172,6 +180,8 @@ private static ObjectStore createObjectStore(S3StoreConfig config) { this.cache = null; this.compactionPolicy = null; this.compactor = null; + this.writeExecutor = null; + this.compactionExecutor = null; } } @@ -182,7 +192,7 @@ public ValueFactory getValueFactory() { @Override public EvaluationStatistics getEvaluationStatistics() { - return new S3EvaluationStatistics(); + return new S3EvaluationStatistics(valueStore, catalog); } @Override @@ -199,7 +209,18 @@ public SailSource getInferredSailSource() { public void close() throws SailException { try { if (objectStore != null) { + // Await any pending compaction before flushing + CompletableFuture compaction = pendingCompaction; + if (compaction != null) { + compaction.join(); + } flushToObjectStore(); + if (writeExecutor != null) { + writeExecutor.shutdown(); + } + if (compactionExecutor != null) { + compactionExecutor.shutdown(); + } if (cache != null) { cache.close(); } @@ -234,45 +255,77 @@ private void flushToObjectStore() { frozen.freeze(); memTable = new MemTable(SPOC_INDEX); - List allQuads = collectQuads(frozen); - QuadStats stats = QuadStats.fromQuads(allQuads); - writeParquetFiles(epoch, allQuads, stats); + List allEntries = collectEntries(frozen); + QuadStats stats = QuadStats.fromEntries(allEntries); + writeParquetFiles(epoch, allEntries, stats); persistMetadata(epoch); runCompactionIfNeeded(); } - private static List collectQuads(MemTable frozen) { - List allQuads = new ArrayList<>(frozen.size()); + private static List collectEntries(MemTable frozen) { + List entries = new ArrayList<>(frozen.size()); long[] scratch = new long[4]; for (Map.Entry entry : frozen.getData().entrySet()) { frozen.getIndex().keyToQuad(entry.getKey(), scratch); - allQuads.add(new long[] { + entries.add(new QuadEntry( scratch[QuadIndex.SUBJ_IDX], scratch[QuadIndex.PRED_IDX], scratch[QuadIndex.OBJ_IDX], scratch[QuadIndex.CONTEXT_IDX], - entry.getValue()[0] - }); + entry.getValue()[0])); } - return allQuads; + return entries; } - private void writeParquetFiles(long epoch, List allQuads, QuadStats stats) { + private void writeParquetFiles(long epoch, List allEntries, QuadStats stats) { + List> futures = new ArrayList<>(ALL_INDEXES.size()); for (QuadIndex sortIndex : ALL_INDEXES) { - String sortSuffix = sortIndex.getFieldSeqString(); - List sorted = sortQuadEntries(allQuads, sortIndex); + futures.add(CompletableFuture.runAsync(() -> { + String sortSuffix = sortIndex.getFieldSeqString(); + List sorted = new ArrayList<>(allEntries); + sorted.sort(sortIndex.entryComparator()); - ParquetSchemas.SortOrder sortOrder = ParquetSchemas.SortOrder.fromSuffix(sortSuffix); - byte[] parquetData = ParquetFileBuilder.build(sorted, sortOrder); + ParquetSchemas.SortOrder sortOrder = ParquetSchemas.SortOrder.fromSuffix(sortSuffix); + byte[] parquetData = ParquetFileBuilder.build(sorted, sortOrder); - String s3Key = Catalog.dataKey(0, epoch, sortSuffix); - objectStore.put(s3Key, parquetData); + BloomFilter bloom = buildBloomFilter(sorted, sortSuffix); - if (cache != null) { - cache.writeThrough(s3Key, parquetData); - } + String s3Key = Catalog.dataKey(0, epoch, sortSuffix); + objectStore.put(s3Key, parquetData); + + if (cache != null) { + cache.writeThrough(s3Key, parquetData); + } + + catalog.addFile(new Catalog.ParquetFileInfo( + s3Key, 0, sortSuffix, sorted.size(), epoch, parquetData.length, stats, bloom)); + }, writeExecutor)); + } + CompletableFuture.allOf(futures.toArray(CompletableFuture[]::new)).join(); + } + + /** + * Builds a bloom filter for the leading component of the given sort order. + */ + static BloomFilter buildBloomFilter(List entries, String sortSuffix) { + BloomFilter bloom = new BloomFilter(Math.max(1, entries.size()), 0.01); + for (QuadEntry entry : entries) { + bloom.add(leadingComponent(entry, sortSuffix)); + } + return bloom; + } - catalog.addFile(new Catalog.ParquetFileInfo( - s3Key, 0, sortSuffix, sorted.size(), epoch, parquetData.length, stats)); + private static long leadingComponent(QuadEntry entry, String sortSuffix) { + switch (sortSuffix.charAt(0)) { + case 's': + return entry.subject; + case 'o': + return entry.object; + case 'c': + return entry.context; + case 'p': + return entry.predicate; + default: + return entry.subject; } } @@ -288,55 +341,54 @@ private void persistMetadata(long epoch) { } /** - * Sorts quad entries according to the given sort index. + * Checks compaction triggers and submits compaction to the background executor if needed. If a compaction is + * already running, skips to avoid queuing multiple compactions. */ - private static List sortQuadEntries(List quads, QuadIndex sortIndex) { - List sorted = new ArrayList<>(quads); - String seq = sortIndex.getFieldSeqString(); - int i0 = QuadIndex.fieldCharToIdx(seq.charAt(0)); - int i1 = QuadIndex.fieldCharToIdx(seq.charAt(1)); - int i2 = QuadIndex.fieldCharToIdx(seq.charAt(2)); - int i3 = QuadIndex.fieldCharToIdx(seq.charAt(3)); - sorted.sort((a, b) -> { - int cmp = Long.compare(a[i0], b[i0]); - if (cmp != 0) { - return cmp; - } - cmp = Long.compare(a[i1], b[i1]); - if (cmp != 0) { - return cmp; - } - cmp = Long.compare(a[i2], b[i2]); - if (cmp != 0) { - return cmp; - } - return Long.compare(a[i3], b[i3]); - }); + private void runCompactionIfNeeded() { + if (compactionPolicy == null || compactor == null) { + return; + } - List result = new ArrayList<>(sorted.size()); - for (long[] q : sorted) { - result.add(new QuadEntry(q[0], q[1], q[2], q[3], (byte) q[4])); + // Skip if a compaction is already running + CompletableFuture current = pendingCompaction; + if (current != null && !current.isDone()) { + return; } - return result; - } - /** - * Checks compaction triggers and runs compaction if needed. - */ - private void runCompactionIfNeeded() { - if (compactionPolicy == null || compactor == null) { + List filesSnapshot = catalog.getFiles(); + boolean needsL0 = compactionPolicy.shouldCompact(filesSnapshot, 0); + boolean needsL1 = compactionPolicy.shouldCompact(filesSnapshot, 1); + + if (!needsL0 && !needsL1) { return; } + pendingCompaction = CompletableFuture.runAsync(() -> { + try { + doCompaction(); + } catch (Exception e) { + logger.error("Background compaction failed", e); + } + }, compactionExecutor); + } + + private void doCompaction() { List results = new ArrayList<>(); - List files = catalog.getFiles(); + + // Snapshot file list under synchronization + List files; + synchronized (catalog) { + files = catalog.getFiles(); + } // L0→L1 compaction if (compactionPolicy.shouldCompact(files, 0)) { List l0Files = CompactionPolicy.filesAtLevel(files, 0); long compactEpoch = epochCounter.getAndIncrement(); results.add(compactor.compact(l0Files, 0, 1, compactEpoch, catalog)); - files = catalog.getFiles(); + synchronized (catalog) { + files = catalog.getFiles(); + } } // L1→L2 compaction @@ -348,9 +400,11 @@ private void runCompactionIfNeeded() { if (!results.isEmpty()) { // Save catalog BEFORE deleting old files — crash-safe ordering - long epoch = epochCounter.getAndIncrement(); - catalog.setEpoch(epoch); - catalog.save(objectStore, jsonMapper, epoch); + synchronized (catalog) { + long epoch = epochCounter.getAndIncrement(); + catalog.setEpoch(epoch); + catalog.save(objectStore, jsonMapper, epoch); + } // Now safe to delete old files for (Compactor.CompactionResult result : results) { @@ -373,7 +427,17 @@ private boolean hasPersistence() { */ private Iterator queryQuads(long s, long p, long o, long c, boolean explicit) { return hasPersistence() - ? createMergedIterator(s, p, o, c, explicit) + ? createMergedIterator(s, p, o, c, explicit, null) + : memTable.scan(s, p, o, c, explicit); + } + + /** + * Queries quads with a preferred index hint. + */ + private Iterator queryQuads(long s, long p, long o, long c, boolean explicit, + QuadIndex preferredIndex) { + return hasPersistence() + ? createMergedIterator(s, p, o, c, explicit, preferredIndex) : memTable.scan(s, p, o, c, explicit); } @@ -452,12 +516,13 @@ CloseableIteration createStatementIterator( * prunes files using catalog stats, and merges all sources. */ private Iterator createMergedIterator(long subjID, long predID, long objID, long contextID, - boolean explicit) { + boolean explicit, QuadIndex preferredIndex) { byte expectedFlag = explicit ? MemTable.FLAG_EXPLICIT : MemTable.FLAG_INFERRED; - // Select best index for the query pattern - QuadIndex bestIndex = QuadIndex.getBestIndex(ALL_INDEXES, subjID, predID, objID, contextID); + // Select best index for the query pattern, or use the preferred index if provided + QuadIndex bestIndex = preferredIndex != null ? preferredIndex + : QuadIndex.getBestIndex(ALL_INDEXES, subjID, predID, objID, contextID); String sortSuffix = bestIndex.getFieldSeqString(); // Build sources: MemTable (newest) + Parquet files (newest epoch first) @@ -473,17 +538,7 @@ private Iterator createMergedIterator(long subjID, long predID, long obj .toList(); for (Catalog.ParquetFileInfo fileInfo : sortOrderFiles) { - // Catalog-level pruning using per-file stats - if (subjID >= 0 && (subjID < fileInfo.getMinSubject() || subjID > fileInfo.getMaxSubject())) { - continue; - } - if (predID >= 0 && (predID < fileInfo.getMinPredicate() || predID > fileInfo.getMaxPredicate())) { - continue; - } - if (objID >= 0 && (objID < fileInfo.getMinObject() || objID > fileInfo.getMaxObject())) { - continue; - } - if (contextID >= 0 && (contextID < fileInfo.getMinContext() || contextID > fileInfo.getMaxContext())) { + if (!fileInfo.mayContain(subjID, predID, objID, contextID)) { continue; } @@ -737,29 +792,45 @@ public CloseableIteration getNamespaces() { @Override public CloseableIteration getContextIDs() throws SailException { - Iterator allQuads = queryQuads(-1, -1, -1, -1, explicit); - - return new FilterIteration( - new ConvertingIteration( - new CloseableIteratorIteration<>(allQuads)) { - @Override - protected Resource convert(long[] quad) { - if (quad[3] == 0) { - return null; + // Use CSPO index where context is the leading field, so context values are grouped + Iterator allQuads = queryQuads(-1, -1, -1, -1, explicit, CSPO_INDEX); + + return new CloseableIteration<>() { + private long lastContextId = Long.MIN_VALUE; + private Resource nextCtx = advance(); + + private Resource advance() { + while (allQuads.hasNext()) { + long[] quad = allQuads.next(); + long ctxId = quad[3]; + if (ctxId != 0 && ctxId != lastContextId) { + lastContextId = ctxId; + Value val = valueStore.getValue(ctxId); + if (val instanceof Resource) { + return (Resource) val; } - Value val = valueStore.getValue(quad[3]); - return val instanceof Resource ? (Resource) val : null; } - }) { - private final Set seen = new HashSet<>(); + } + return null; + } @Override - protected boolean accept(Resource ctx) { - return ctx != null && seen.add(ctx); + public boolean hasNext() { + return nextCtx != null; + } + + @Override + public Resource next() { + if (nextCtx == null) { + throw new java.util.NoSuchElementException(); + } + Resource result = nextCtx; + nextCtx = advance(); + return result; } @Override - protected void handleClose() { + public void close() { // no-op } }; diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/BloomFilter.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/BloomFilter.java new file mode 100644 index 00000000000..2c3ad2dc154 --- /dev/null +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/BloomFilter.java @@ -0,0 +1,157 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.s3.storage; + +import java.util.Base64; + +/** + * A simple bit-array bloom filter for long values. Uses two independent hash functions derived from a single + * murmur3-style hash to set/test bits. + * + *

    + * Each Parquet file gets one bloom filter keyed on the leading component of the file's sort order (e.g. subject + * IDs for SPOC files, object IDs for OPSC files, context IDs for CSPO files). + *

    + */ +public final class BloomFilter { + + private static final int MIN_BITS = 64; + private final long[] bits; + private final int numBits; + private final int numHashFunctions; + + /** + * Creates a bloom filter sized for the expected number of insertions and false positive probability. + * + * @param expectedInsertions expected number of distinct elements + * @param fpp desired false positive probability (e.g. 0.01 for 1%) + */ + public BloomFilter(int expectedInsertions, double fpp) { + if (expectedInsertions <= 0) { + expectedInsertions = 1; + } + this.numBits = Math.max(MIN_BITS, optimalNumBits(expectedInsertions, fpp)); + this.numHashFunctions = optimalNumHashFunctions(expectedInsertions, numBits); + this.bits = new long[(numBits + 63) >>> 6]; + } + + private BloomFilter(long[] bits, int numBits, int numHashFunctions) { + this.bits = bits; + this.numBits = numBits; + this.numHashFunctions = numHashFunctions; + } + + /** + * Adds a value to the bloom filter. + */ + public void add(long value) { + long hash1 = murmurHash(value); + long hash2 = murmurHash(value ^ 0x9E3779B97F4A7C15L); + for (int i = 0; i < numHashFunctions; i++) { + int bit = (int) (((hash1 + (long) i * hash2) & Long.MAX_VALUE) % numBits); + bits[bit >>> 6] |= 1L << (bit & 63); + } + } + + /** + * Tests whether a value might be in the set. + * + * @return {@code true} if the value might be present; {@code false} if it is definitely not present + */ + public boolean mightContain(long value) { + long hash1 = murmurHash(value); + long hash2 = murmurHash(value ^ 0x9E3779B97F4A7C15L); + for (int i = 0; i < numHashFunctions; i++) { + int bit = (int) (((hash1 + (long) i * hash2) & Long.MAX_VALUE) % numBits); + if ((bits[bit >>> 6] & (1L << (bit & 63))) == 0) { + return false; + } + } + return true; + } + + /** + * Serializes this bloom filter to a Base64-encoded string for JSON storage. + */ + public String toBase64() { + // Format: [numBits (4 bytes)] [numHashFunctions (4 bytes)] [bits array (8 bytes each)] + byte[] data = new byte[8 + bits.length * 8]; + writeInt(data, 0, numBits); + writeInt(data, 4, numHashFunctions); + for (int i = 0; i < bits.length; i++) { + writeLong(data, 8 + i * 8, bits[i]); + } + return Base64.getEncoder().encodeToString(data); + } + + /** + * Deserializes a bloom filter from a Base64-encoded string. + */ + public static BloomFilter fromBase64(String encoded) { + byte[] data = Base64.getDecoder().decode(encoded); + int numBits = readInt(data, 0); + int numHash = readInt(data, 4); + int arrayLen = (data.length - 8) / 8; + long[] bits = new long[arrayLen]; + for (int i = 0; i < arrayLen; i++) { + bits[i] = readLong(data, 8 + i * 8); + } + return new BloomFilter(bits, numBits, numHash); + } + + private static long murmurHash(long value) { + long h = value; + h ^= h >>> 33; + h *= 0xFF51AFD7ED558CCDL; + h ^= h >>> 33; + h *= 0xC4CEB9FE1A85EC53L; + h ^= h >>> 33; + return h; + } + + private static int optimalNumBits(int n, double fpp) { + return (int) (-n * Math.log(fpp) / (Math.log(2) * Math.log(2))); + } + + private static int optimalNumHashFunctions(int n, int m) { + return Math.max(1, (int) Math.round((double) m / n * Math.log(2))); + } + + private static void writeInt(byte[] buf, int offset, int value) { + buf[offset] = (byte) (value >>> 24); + buf[offset + 1] = (byte) (value >>> 16); + buf[offset + 2] = (byte) (value >>> 8); + buf[offset + 3] = (byte) value; + } + + private static void writeLong(byte[] buf, int offset, long value) { + buf[offset] = (byte) (value >>> 56); + buf[offset + 1] = (byte) (value >>> 48); + buf[offset + 2] = (byte) (value >>> 40); + buf[offset + 3] = (byte) (value >>> 32); + buf[offset + 4] = (byte) (value >>> 24); + buf[offset + 5] = (byte) (value >>> 16); + buf[offset + 6] = (byte) (value >>> 8); + buf[offset + 7] = (byte) value; + } + + private static int readInt(byte[] buf, int offset) { + return ((buf[offset] & 0xFF) << 24) | ((buf[offset + 1] & 0xFF) << 16) + | ((buf[offset + 2] & 0xFF) << 8) | (buf[offset + 3] & 0xFF); + } + + private static long readLong(byte[] buf, int offset) { + return ((long) (buf[offset] & 0xFF) << 56) | ((long) (buf[offset + 1] & 0xFF) << 48) + | ((long) (buf[offset + 2] & 0xFF) << 40) | ((long) (buf[offset + 3] & 0xFF) << 32) + | ((long) (buf[offset + 4] & 0xFF) << 24) | ((long) (buf[offset + 5] & 0xFF) << 16) + | ((long) (buf[offset + 6] & 0xFF) << 8) | (long) (buf[offset + 7] & 0xFF); + } +} diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java index 356525f38ec..c7a8951b86b 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java @@ -17,6 +17,7 @@ import java.util.List; import java.util.Set; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.ObjectMapper; @@ -246,6 +247,11 @@ public static class ParquetFileInfo { @JsonProperty("maxContext") private long maxContext; + @JsonProperty("bloomFilter") + private String bloomFilterBase64; + + private transient BloomFilter bloomFilter; + public ParquetFileInfo() { } @@ -256,6 +262,12 @@ public ParquetFileInfo(String s3Key, int level, String sortOrder, long rowCount, stats.minObject, stats.maxObject, stats.minContext, stats.maxContext); } + public ParquetFileInfo(String s3Key, int level, String sortOrder, long rowCount, + long epoch, long sizeBytes, QuadStats stats, BloomFilter bloomFilter) { + this(s3Key, level, sortOrder, rowCount, epoch, sizeBytes, stats); + setBloomFilter(bloomFilter); + } + ParquetFileInfo(String s3Key, int level, String sortOrder, long rowCount, long epoch, long sizeBytes, long minSubject, long maxSubject, @@ -389,5 +401,69 @@ public long getMaxContext() { public void setMaxContext(long maxContext) { this.maxContext = maxContext; } + + @JsonIgnore + public BloomFilter getBloomFilter() { + if (bloomFilter == null && bloomFilterBase64 != null) { + bloomFilter = BloomFilter.fromBase64(bloomFilterBase64); + } + return bloomFilter; + } + + public void setBloomFilter(BloomFilter filter) { + this.bloomFilter = filter; + this.bloomFilterBase64 = filter != null ? filter.toBase64() : null; + } + + /** + * Returns the leading component's filter value for this file's sort order. SPOC → subject, OPSC → object, CSPO + * → context. + */ + private long getLeadingFilterValue(long s, long p, long o, long c) { + if (sortOrder == null) { + return -1; + } + switch (sortOrder.charAt(0)) { + case 's': + return s; + case 'o': + return o; + case 'c': + return c; + case 'p': + return p; + default: + return -1; + } + } + + /** + * Tests whether this file's statistics allow it to contain a quad matching the given pattern. Bound components + * (>= 0) are checked against the file's min/max range; unbound components (< 0) are wildcards. Also checks the + * bloom filter for the leading component if available. + */ + public boolean mayContain(long s, long p, long o, long c) { + if (s >= 0 && (s < minSubject || s > maxSubject)) { + return false; + } + if (p >= 0 && (p < minPredicate || p > maxPredicate)) { + return false; + } + if (o >= 0 && (o < minObject || o > maxObject)) { + return false; + } + if (c >= 0 && (c < minContext || c > maxContext)) { + return false; + } + // Check bloom filter for the leading component + BloomFilter bf = getBloomFilter(); + if (bf != null) { + long leadingVal = getLeadingFilterValue(s, p, o, c); + if (leadingVal >= 0 && !bf.mightContain(leadingVal)) { + return false; + } + } + return true; + } } } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java index 049c325e13d..3e2ca4415f0 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java @@ -112,9 +112,12 @@ public CompactionResult compact(List sourceFiles, cache.writeThrough(s3Key, parquetData); } + // Build bloom filter for the leading component + BloomFilter bloom = buildBloomFilter(merged, suffix); + QuadStats stats = QuadStats.fromEntries(merged); newFiles.add(new Catalog.ParquetFileInfo(s3Key, targetLevel, suffix, merged.size(), - epoch, parquetData.length, stats)); + epoch, parquetData.length, stats, bloom)); } // Update catalog in memory: remove old files, add new ones. @@ -156,6 +159,32 @@ private List mergeEntries(List sources, QuadIndex qua return new ArrayList<>(deduped.values()); } + private static BloomFilter buildBloomFilter(List entries, String sortSuffix) { + BloomFilter bloom = new BloomFilter(Math.max(1, entries.size()), 0.01); + for (QuadEntry entry : entries) { + long val; + switch (sortSuffix.charAt(0)) { + case 's': + val = entry.subject; + break; + case 'o': + val = entry.object; + break; + case 'c': + val = entry.context; + break; + case 'p': + val = entry.predicate; + break; + default: + val = entry.subject; + break; + } + bloom.add(val); + } + return bloom; + } + private static class CompactKey implements Comparable { final byte[] key; diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MergeIterator.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MergeIterator.java index c0844bafdae..7b64d529919 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MergeIterator.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/MergeIterator.java @@ -26,6 +26,7 @@ public class MergeIterator implements Iterator { private final byte expectedFlag; private final long patternS, patternP, patternO, patternC; private final PriorityQueue heap; + private final List allSources; private long[] next; /** @@ -46,6 +47,7 @@ public MergeIterator(List sources, QuadIndex quadIndex, byte exp this.patternO = o; this.patternC = c; this.heap = new PriorityQueue<>(); + this.allSources = sources; for (int i = 0; i < sources.size(); i++) { RawEntrySource src = sources.get(i); @@ -101,6 +103,15 @@ private void advance() { next = quad; return; } + + // Heap exhausted — close all sources + closeSources(); + } + + private void closeSources() { + for (RawEntrySource source : allSources) { + source.close(); + } } @Override diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetQuadSource.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetQuadSource.java index 07f4a9a4c39..aa3881090f1 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetQuadSource.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/ParquetQuadSource.java @@ -12,23 +12,26 @@ import java.io.IOException; import java.io.UncheckedIOException; -import java.util.ArrayList; import java.util.List; import org.apache.parquet.ParquetReadOptions; import org.apache.parquet.column.page.PageReadStore; +import org.apache.parquet.column.statistics.LongStatistics; +import org.apache.parquet.column.statistics.Statistics; import org.apache.parquet.conf.PlainParquetConfiguration; import org.apache.parquet.example.data.Group; import org.apache.parquet.example.data.simple.convert.GroupRecordConverter; import org.apache.parquet.hadoop.ParquetFileReader; +import org.apache.parquet.hadoop.metadata.BlockMetaData; +import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; import org.apache.parquet.io.ColumnIOFactory; import org.apache.parquet.io.MessageColumnIO; import org.apache.parquet.io.RecordReader; import org.apache.parquet.schema.MessageType; /** - * A {@link RawEntrySource} that reads entries from an in-memory Parquet file. Entries are sorted according to the - * file's sort order and emitted as 4-varint-encoded byte[] keys with 1-byte flag values. + * A {@link RawEntrySource} that streams entries from an in-memory Parquet file. Entries are read one row group at a + * time, and rows within each row group are read lazily. * *

    * The key format encodes all four quad components (s, p, o, c) as varints in the order defined by the @@ -37,74 +40,97 @@ */ public class ParquetQuadSource implements RawEntrySource { - private final List entries; - private int pos; + private final ParquetFileReader reader; + private final MessageType schema; + private final MessageColumnIO columnIO; + private final QuadIndex quadIndex; + private final long filterS, filterP, filterO, filterC; + private final List rowGroups; + private int rowGroupIndex; + + private RecordReader recordReader; + private long remainingRows; + private byte[] nextKey; + private byte nextFlag; + private boolean closed; /** - * Creates a source from Parquet file bytes. - * - * @param parquetData the complete Parquet file as byte[] - * @param quadIndex the quad index defining the key encoding order + * Creates a streaming source from Parquet file bytes. */ public ParquetQuadSource(byte[] parquetData, QuadIndex quadIndex) { this(parquetData, quadIndex, -1, -1, -1, -1); } /** - * Creates a source from Parquet file bytes with filtering. - * - * @param parquetData the complete Parquet file as byte[] - * @param quadIndex the quad index defining the key encoding order - * @param subject subject filter, or -1 for wildcard - * @param predicate predicate filter, or -1 for wildcard - * @param object object filter, or -1 for wildcard - * @param context context filter, or -1 for wildcard + * Creates a streaming source from Parquet file bytes with filtering. */ public ParquetQuadSource(byte[] parquetData, QuadIndex quadIndex, long subject, long predicate, long object, long context) { - this.entries = readAllEntries(parquetData, quadIndex, subject, predicate, object, context); - this.pos = 0; + this.quadIndex = quadIndex; + this.filterS = subject; + this.filterP = predicate; + this.filterO = object; + this.filterC = context; + + try { + ByteArrayInputFile inputFile = new ByteArrayInputFile(parquetData); + this.reader = ParquetFileReader.open(inputFile, + new ParquetReadOptions.Builder(new PlainParquetConfiguration()) + .withCodecFactory(SimpleCodecFactory.INSTANCE) + .build()); + this.schema = reader.getFooter().getFileMetaData().getSchema(); + this.columnIO = new ColumnIOFactory().getColumnIO(schema); + this.rowGroups = reader.getRowGroups(); + this.rowGroupIndex = 0; + this.remainingRows = 0; + this.recordReader = null; + + // Buffer the first matching entry + advanceToNext(); + } catch (IOException e) { + throw new UncheckedIOException("Failed to open Parquet file for streaming", e); + } } @Override public boolean hasNext() { - return pos < entries.size(); + return nextKey != null; } @Override public byte[] peekKey() { - return entries.get(pos).key; + return nextKey; } @Override public byte peekFlag() { - return entries.get(pos).flag; + return nextFlag; } @Override public void advance() { - pos++; + advanceToNext(); + } + + @Override + public void close() { + if (!closed) { + closed = true; + try { + reader.close(); + } catch (IOException e) { + throw new UncheckedIOException("Failed to close Parquet reader", e); + } + } } - private static List readAllEntries(byte[] parquetData, QuadIndex quadIndex, - long filterS, long filterP, long filterO, long filterC) { - List result = new ArrayList<>(); - ByteArrayInputFile inputFile = new ByteArrayInputFile(parquetData); - - try (ParquetFileReader reader = ParquetFileReader.open(inputFile, - new ParquetReadOptions.Builder(new PlainParquetConfiguration()) - .withCodecFactory(SimpleCodecFactory.INSTANCE) - .build())) { - MessageType schema = reader.getFooter().getFileMetaData().getSchema(); - MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema); - - PageReadStore pages; - while ((pages = reader.readNextRowGroup()) != null) { - long rows = pages.getRowCount(); - RecordReader recordReader = columnIO.getRecordReader(pages, - new GroupRecordConverter(schema)); - - for (long i = 0; i < rows; i++) { + private void advanceToNext() { + nextKey = null; + try { + while (true) { + // Read rows from current row group + while (remainingRows > 0) { + remainingRows--; Group group = recordReader.read(); long subject = group.getLong(ParquetSchemas.COL_SUBJECT, 0); long predicate = group.getLong(ParquetSchemas.COL_PREDICATE, 0); @@ -117,24 +143,82 @@ private static List readAllEntries(byte[] parquetData, QuadIndex quadInde continue; } - byte[] key = quadIndex.toKeyBytes(subject, predicate, object, context); - result.add(new Entry(key, (byte) flag)); + nextKey = quadIndex.toKeyBytes(subject, predicate, object, context); + nextFlag = (byte) flag; + return; + } + + // Move to next row group + if (!loadNextRowGroup()) { + return; } } } catch (IOException e) { - throw new UncheckedIOException("Failed to read Parquet file", e); + throw new UncheckedIOException("Failed to read Parquet row", e); } + } + + private boolean loadNextRowGroup() throws IOException { + while (rowGroupIndex < rowGroups.size()) { + BlockMetaData block = rowGroups.get(rowGroupIndex); + rowGroupIndex++; - return result; + // Row group filtering: check column statistics + if (!rowGroupMayMatch(block)) { + reader.skipNextRowGroup(); + continue; + } + + PageReadStore pages = reader.readNextRowGroup(); + if (pages == null) { + continue; + } + + remainingRows = pages.getRowCount(); + recordReader = columnIO.getRecordReader(pages, new GroupRecordConverter(schema)); + return true; + } + return false; } - private static class Entry { - final byte[] key; - final byte flag; + /** + * Checks whether a row group's column statistics allow a match against the current filter. If a bound filter value + * falls outside a column's [min, max] range, the entire row group can be skipped. + */ + private boolean rowGroupMayMatch(BlockMetaData block) { + for (ColumnChunkMetaData col : block.getColumns()) { + Statistics stats = col.getStatistics(); + if (stats == null || stats.isEmpty() || !stats.hasNonNullValue()) { + continue; + } + if (!(stats instanceof LongStatistics)) { + continue; + } + LongStatistics longStats = (LongStatistics) stats; + long min = longStats.getMin(); + long max = longStats.getMax(); + + String colName = col.getPath().toDotString(); + long filterVal = getFilterForColumn(colName); + if (filterVal >= 0 && (filterVal < min || filterVal > max)) { + return false; + } + } + return true; + } - Entry(byte[] key, byte flag) { - this.key = key; - this.flag = flag; + private long getFilterForColumn(String colName) { + switch (colName) { + case ParquetSchemas.COL_SUBJECT: + return filterS; + case ParquetSchemas.COL_PREDICATE: + return filterP; + case ParquetSchemas.COL_OBJECT: + return filterO; + case ParquetSchemas.COL_CONTEXT: + return filterC; + default: + return -1; } } } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java index 2837cb0692d..4f4662e93ca 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadIndex.java @@ -11,10 +11,9 @@ package org.eclipse.rdf4j.sail.s3.storage; import java.nio.ByteBuffer; -import java.util.HashSet; +import java.util.Comparator; import java.util.List; -import java.util.Set; -import java.util.StringTokenizer; +import java.util.function.ToLongFunction; /** * Manages index permutations for quad (S, P, O, C) storage. Each QuadIndex defines a field ordering (e.g. "spoc", @@ -53,13 +52,6 @@ public QuadIndex(String fieldSeq) { this.indexMap = buildIndexMap(this.fieldSeq); } - /** - * Returns the field sequence for this index. - */ - public char[] getFieldSeq() { - return fieldSeq; - } - /** * Returns the field sequence as a String. */ @@ -79,73 +71,18 @@ public String getFieldSeqString() { * @return pattern score (0-4) */ public int getPatternScore(long subj, long pred, long obj, long context) { + long[] values = { subj, pred, obj, context }; int score = 0; - for (char field : fieldSeq) { - switch (field) { - case 's': - if (subj >= 0) { - score++; - } else { - return score; - } - break; - case 'p': - if (pred >= 0) { - score++; - } else { - return score; - } - break; - case 'o': - if (obj >= 0) { - score++; - } else { - return score; - } - break; - case 'c': - if (context >= 0) { - score++; - } else { - return score; - } - break; - default: - throw new IllegalStateException( - "Invalid character '" + field + "' in field sequence: " + new String(fieldSeq)); + for (int idx : indexMap) { + if (values[idx] >= 0) { + score++; + } else { + return score; } } return score; } - /** - * Writes a quad as varints in index order into the given buffer. - * - * @param bb buffer for writing bytes - * @param subj subject ID - * @param pred predicate ID - * @param obj object ID - * @param context context ID - */ - public void toKey(ByteBuffer bb, long subj, long pred, long obj, long context) { - for (char field : fieldSeq) { - switch (field) { - case 's': - Varint.writeUnsigned(bb, subj); - break; - case 'p': - Varint.writeUnsigned(bb, pred); - break; - case 'o': - Varint.writeUnsigned(bb, obj); - break; - case 'c': - Varint.writeUnsigned(bb, context); - break; - } - } - } - /** * Encodes a quad as a byte array key in index order. * @@ -156,13 +93,14 @@ public void toKey(ByteBuffer bb, long subj, long pred, long obj, long context) { * @return encoded byte array key */ public byte[] toKeyBytes(long subj, long pred, long obj, long context) { + long[] values = { subj, pred, obj, context }; int length = Varint.calcListLengthUnsigned( - getValueForField(fieldSeq[0], subj, pred, obj, context), - getValueForField(fieldSeq[1], subj, pred, obj, context), - getValueForField(fieldSeq[2], subj, pred, obj, context), - getValueForField(fieldSeq[3], subj, pred, obj, context)); + values[indexMap[0]], values[indexMap[1]], + values[indexMap[2]], values[indexMap[3]]); ByteBuffer bb = ByteBuffer.allocate(length); - toKey(bb, subj, pred, obj, context); + for (int idx : indexMap) { + Varint.writeUnsigned(bb, values[idx]); + } return bb.array(); } @@ -188,26 +126,7 @@ public void keyToQuad(byte[] key, long[] quad) { } /** - * Constructs the minimum key for a range scan. Unbound or zero-valued components become 0 (the lowest valid ID). - * Context ID 0 is the default/null graph sentinel and maps to 0, which is correct for both exact and wildcard - * scans. - * - * @param bb buffer for writing bytes - * @param subj subject ID, or -1 for wildcard - * @param pred predicate ID, or -1 for wildcard - * @param obj object ID, or -1 for wildcard - * @param context context ID, or -1 for wildcard (0 = default graph) - */ - public void getMinKey(ByteBuffer bb, long subj, long pred, long obj, long context) { - toKey(bb, - subj <= 0 ? 0 : subj, - pred <= 0 ? 0 : pred, - obj <= 0 ? 0 : obj, - context <= 0 ? 0 : context); - } - - /** - * Constructs the minimum key as a byte array for a range scan. + * Constructs the minimum key as a byte array for a range scan. Unbound or zero-valued components become 0. */ public byte[] getMinKeyBytes(long subj, long pred, long obj, long context) { return toKeyBytes( @@ -218,25 +137,7 @@ public byte[] getMinKeyBytes(long subj, long pred, long obj, long context) { } /** - * Constructs the maximum key for a range scan. Unbound components (negative) become Long.MAX_VALUE. Context ID 0 - * (the default/null graph) is a valid bound value, not a wildcard. - * - * @param bb buffer for writing bytes - * @param subj subject ID, or -1 for wildcard - * @param pred predicate ID, or -1 for wildcard - * @param obj object ID, or -1 for wildcard - * @param context context ID, or -1 for wildcard (0 = default graph, a valid bound value) - */ - public void getMaxKey(ByteBuffer bb, long subj, long pred, long obj, long context) { - toKey(bb, - subj < 0 ? Long.MAX_VALUE : subj, - pred < 0 ? Long.MAX_VALUE : pred, - obj < 0 ? Long.MAX_VALUE : obj, - context < 0 ? Long.MAX_VALUE : context); - } - - /** - * Constructs the maximum key as a byte array for a range scan. + * Constructs the maximum key as a byte array for a range scan. Unbound components (negative) become Long.MAX_VALUE. */ public byte[] getMaxKeyBytes(long subj, long pred, long obj, long context) { return toKeyBytes( @@ -272,35 +173,6 @@ public static QuadIndex getBestIndex(List indexes, long subj, long pr return bestIndex; } - /** - * Parses a comma/whitespace-separated list of index specifications. Each spec must consist of 4 characters: 's', - * 'p', 'o' and 'c'. - * - * @param indexSpecStr a string like "spoc, posc, cosp" - * @return a set of parsed index specifications - * @throws IllegalArgumentException if any spec is invalid - */ - public static Set parseIndexSpecList(String indexSpecStr) { - Set indexes = new HashSet<>(); - - if (indexSpecStr != null) { - StringTokenizer tok = new StringTokenizer(indexSpecStr, ", \t"); - while (tok.hasMoreTokens()) { - String index = tok.nextToken().toLowerCase(); - - if (index.length() != 4 || index.indexOf('s') == -1 || index.indexOf('p') == -1 - || index.indexOf('o') == -1 || index.indexOf('c') == -1) { - throw new IllegalArgumentException( - "Invalid value '" + index + "' in index specification: " + indexSpecStr); - } - - indexes.add(index); - } - } - - return indexes; - } - /** * Tests whether a decoded quad matches the given pattern. Unbound components (< 0) are treated as wildcards. * @@ -319,21 +191,29 @@ public static boolean matches(long[] quad, long s, long p, long o, long c) { } /** - * Maps a field character ('s', 'p', 'o', 'c') to the corresponding array index (0-3). + * Returns a comparator that orders {@link QuadEntry} objects according to this index's field sequence. Field + * extractors are precomputed at construction time for efficient sorting. */ - public static int fieldCharToIdx(char c) { - switch (c) { - case 's': - return SUBJ_IDX; - case 'p': - return PRED_IDX; - case 'o': - return OBJ_IDX; - case 'c': - return CONTEXT_IDX; - default: - throw new IllegalArgumentException("Invalid field: " + c); - } + public Comparator entryComparator() { + ToLongFunction e0 = extractorFor(indexMap[0]); + ToLongFunction e1 = extractorFor(indexMap[1]); + ToLongFunction e2 = extractorFor(indexMap[2]); + ToLongFunction e3 = extractorFor(indexMap[3]); + return (a, b) -> { + int cmp = Long.compare(e0.applyAsLong(a), e0.applyAsLong(b)); + if (cmp != 0) { + return cmp; + } + cmp = Long.compare(e1.applyAsLong(a), e1.applyAsLong(b)); + if (cmp != 0) { + return cmp; + } + cmp = Long.compare(e2.applyAsLong(a), e2.applyAsLong(b)); + if (cmp != 0) { + return cmp; + } + return Long.compare(e3.applyAsLong(a), e3.applyAsLong(b)); + }; } @Override @@ -341,6 +221,21 @@ public String toString() { return fieldSeqString; } + private static ToLongFunction extractorFor(int componentIndex) { + switch (componentIndex) { + case SUBJ_IDX: + return e -> e.subject; + case PRED_IDX: + return e -> e.predicate; + case OBJ_IDX: + return e -> e.object; + case CONTEXT_IDX: + return e -> e.context; + default: + throw new IllegalArgumentException("Invalid component index: " + componentIndex); + } + } + private static int[] buildIndexMap(char[] fieldSeq) { int[] indexes = new int[fieldSeq.length]; for (int i = 0; i < fieldSeq.length; i++) { @@ -364,19 +259,4 @@ private static int[] buildIndexMap(char[] fieldSeq) { } return indexes; } - - private static long getValueForField(char field, long subj, long pred, long obj, long context) { - switch (field) { - case 's': - return subj; - case 'p': - return pred; - case 'o': - return obj; - case 'c': - return context; - default: - throw new IllegalArgumentException("Invalid field: " + field); - } - } } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadStats.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadStats.java index 0b65e60850b..3652c5595ae 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadStats.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/QuadStats.java @@ -37,26 +37,15 @@ public QuadStats(long minSubject, long maxSubject, } /** - * Computes min/max stats from a list of long[5] arrays (s, p, o, c, flag). Tombstones (flag == 0) are excluded so - * that deleted entries do not inflate the range statistics used for pruning. - */ - public static QuadStats fromQuads(List quads) { - Accumulator acc = new Accumulator(); - for (long[] q : quads) { - if (q[4] != MemTable.FLAG_TOMBSTONE) { - acc.add(q[0], q[1], q[2], q[3]); - } - } - return acc.build(); - } - - /** - * Computes min/max stats from a list of QuadEntry objects. + * Computes min/max stats from a list of QuadEntry objects. Tombstones are excluded so that deleted entries do not + * inflate the range statistics used for pruning. */ public static QuadStats fromEntries(List entries) { Accumulator acc = new Accumulator(); for (QuadEntry e : entries) { - acc.add(e.subject, e.predicate, e.object, e.context); + if (e.flag != MemTable.FLAG_TOMBSTONE) { + acc.add(e.subject, e.predicate, e.object, e.context); + } } return acc.build(); } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/RawEntrySource.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/RawEntrySource.java index 9095094a319..5d5346a6546 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/RawEntrySource.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/RawEntrySource.java @@ -23,4 +23,7 @@ public interface RawEntrySource { byte peekFlag(); void advance(); + + default void close() { + } } From 89a73e16910e5679c5ff24e53abeb1497168ec9b Mon Sep 17 00:00:00 2001 From: Chengxu bian Date: Sun, 8 Mar 2026 16:47:34 +0000 Subject: [PATCH 09/10] refactor: deduplicate bloom filter logic and simplify serialization Consolidate duplicated buildBloomFilter and leading-component switch logic from S3SailStore and Compactor into BloomFilter. Replace hand-rolled byte serialization with ByteBuffer. Merge queryQuads overloads and eliminate double shouldCompact evaluation in compaction. --- .../eclipse/rdf4j/sail/s3/S3SailStore.java | 54 ++------- .../rdf4j/sail/s3/storage/BloomFilter.java | 103 +++++++++++------- .../rdf4j/sail/s3/storage/Catalog.java | 24 +--- .../rdf4j/sail/s3/storage/Compactor.java | 29 +---- 4 files changed, 75 insertions(+), 135 deletions(-) diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java index 4aca6cd3832..162c1dcc72e 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/S3SailStore.java @@ -287,7 +287,7 @@ private void writeParquetFiles(long epoch, List allEntries, QuadStats ParquetSchemas.SortOrder sortOrder = ParquetSchemas.SortOrder.fromSuffix(sortSuffix); byte[] parquetData = ParquetFileBuilder.build(sorted, sortOrder); - BloomFilter bloom = buildBloomFilter(sorted, sortSuffix); + BloomFilter bloom = BloomFilter.buildForEntries(sorted, sortSuffix); String s3Key = Catalog.dataKey(0, epoch, sortSuffix); objectStore.put(s3Key, parquetData); @@ -303,32 +303,6 @@ private void writeParquetFiles(long epoch, List allEntries, QuadStats CompletableFuture.allOf(futures.toArray(CompletableFuture[]::new)).join(); } - /** - * Builds a bloom filter for the leading component of the given sort order. - */ - static BloomFilter buildBloomFilter(List entries, String sortSuffix) { - BloomFilter bloom = new BloomFilter(Math.max(1, entries.size()), 0.01); - for (QuadEntry entry : entries) { - bloom.add(leadingComponent(entry, sortSuffix)); - } - return bloom; - } - - private static long leadingComponent(QuadEntry entry, String sortSuffix) { - switch (sortSuffix.charAt(0)) { - case 's': - return entry.subject; - case 'o': - return entry.object; - case 'c': - return entry.context; - case 'p': - return entry.predicate; - default: - return entry.subject; - } - } - private void persistMetadata(long epoch) { // Save catalog first: if we crash after catalog but before values, // on restart we have new nextValueId but old values — IDs are gaps (safe). @@ -365,14 +339,14 @@ private void runCompactionIfNeeded() { pendingCompaction = CompletableFuture.runAsync(() -> { try { - doCompaction(); + doCompaction(needsL0, needsL1); } catch (Exception e) { logger.error("Background compaction failed", e); } }, compactionExecutor); } - private void doCompaction() { + private void doCompaction(boolean compactL0, boolean compactL1) { List results = new ArrayList<>(); // Snapshot file list under synchronization @@ -382,7 +356,7 @@ private void doCompaction() { } // L0→L1 compaction - if (compactionPolicy.shouldCompact(files, 0)) { + if (compactL0) { List l0Files = CompactionPolicy.filesAtLevel(files, 0); long compactEpoch = epochCounter.getAndIncrement(); results.add(compactor.compact(l0Files, 0, 1, compactEpoch, catalog)); @@ -391,8 +365,8 @@ private void doCompaction() { } } - // L1→L2 compaction - if (compactionPolicy.shouldCompact(files, 1)) { + // L1→L2 compaction (re-check after L0 compaction may have produced new L1 files) + if (compactL1 || (compactL0 && compactionPolicy.shouldCompact(files, 1))) { List l1Files = CompactionPolicy.filesAtLevel(files, 1); long compactEpoch = epochCounter.getAndIncrement(); results.add(compactor.compact(l1Files, 1, 2, compactEpoch, catalog)); @@ -423,16 +397,8 @@ private boolean hasPersistence() { } /** - * Queries quads using the best available source (merged Parquet + MemTable, or MemTable only). - */ - private Iterator queryQuads(long s, long p, long o, long c, boolean explicit) { - return hasPersistence() - ? createMergedIterator(s, p, o, c, explicit, null) - : memTable.scan(s, p, o, c, explicit); - } - - /** - * Queries quads with a preferred index hint. + * Queries quads using the best available source (merged Parquet + MemTable, or MemTable only). If + * {@code preferredIndex} is non-null, it is used instead of automatic index selection. */ private Iterator queryQuads(long s, long p, long o, long c, boolean explicit, QuadIndex preferredIndex) { @@ -500,7 +466,7 @@ CloseableIteration createStatementIterator( ArrayList> perContextIterList = new ArrayList<>(contextIDList.size()); for (long contextID : contextIDList) { - Iterator quads = queryQuads(subjID, predID, objID, contextID, explicit); + Iterator quads = queryQuads(subjID, predID, objID, contextID, explicit, null); perContextIterList.add(new QuadToStatementIteration(quads, valueStore)); } @@ -746,7 +712,7 @@ private long removeStatements(Resource subj, IRI pred, Value obj, boolean explic long removeCount = 0; for (long contextId : contextIds) { - Iterator iter = queryQuads(subjID, predID, objID, contextId, explicit); + Iterator iter = queryQuads(subjID, predID, objID, contextId, explicit, null); // Buffer results before removing to avoid ConcurrentModificationException // when the iterator is backed by the MemTable's own map diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/BloomFilter.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/BloomFilter.java index 2c3ad2dc154..7a92af97964 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/BloomFilter.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/BloomFilter.java @@ -10,7 +10,9 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.s3.storage; +import java.nio.ByteBuffer; import java.util.Base64; +import java.util.List; /** * A simple bit-array bloom filter for long values. Uses two independent hash functions derived from a single @@ -78,31 +80,81 @@ public boolean mightContain(long value) { return true; } + /** + * Builds a bloom filter for the leading component of the given sort order. + */ + public static BloomFilter buildForEntries(List entries, String sortSuffix) { + BloomFilter bloom = new BloomFilter(Math.max(1, entries.size()), 0.01); + for (QuadEntry entry : entries) { + bloom.add(leadingComponent(entry, sortSuffix)); + } + return bloom; + } + + /** + * Extracts the leading component value from a quad entry based on the sort order suffix. + */ + static long leadingComponent(QuadEntry entry, String sortSuffix) { + switch (sortSuffix.charAt(0)) { + case 's': + return entry.subject; + case 'o': + return entry.object; + case 'c': + return entry.context; + case 'p': + return entry.predicate; + default: + return entry.subject; + } + } + + /** + * Extracts the leading component value from raw quad IDs based on the sort order suffix. + */ + static long leadingComponent(long s, long p, long o, long c, String sortOrder) { + if (sortOrder == null) { + return -1; + } + switch (sortOrder.charAt(0)) { + case 's': + return s; + case 'p': + return p; + case 'o': + return o; + case 'c': + return c; + default: + return -1; + } + } + /** * Serializes this bloom filter to a Base64-encoded string for JSON storage. */ public String toBase64() { // Format: [numBits (4 bytes)] [numHashFunctions (4 bytes)] [bits array (8 bytes each)] - byte[] data = new byte[8 + bits.length * 8]; - writeInt(data, 0, numBits); - writeInt(data, 4, numHashFunctions); - for (int i = 0; i < bits.length; i++) { - writeLong(data, 8 + i * 8, bits[i]); + ByteBuffer buf = ByteBuffer.allocate(8 + bits.length * 8); + buf.putInt(numBits); + buf.putInt(numHashFunctions); + for (long word : bits) { + buf.putLong(word); } - return Base64.getEncoder().encodeToString(data); + return Base64.getEncoder().encodeToString(buf.array()); } /** * Deserializes a bloom filter from a Base64-encoded string. */ public static BloomFilter fromBase64(String encoded) { - byte[] data = Base64.getDecoder().decode(encoded); - int numBits = readInt(data, 0); - int numHash = readInt(data, 4); - int arrayLen = (data.length - 8) / 8; + ByteBuffer buf = ByteBuffer.wrap(Base64.getDecoder().decode(encoded)); + int numBits = buf.getInt(); + int numHash = buf.getInt(); + int arrayLen = buf.remaining() / 8; long[] bits = new long[arrayLen]; for (int i = 0; i < arrayLen; i++) { - bits[i] = readLong(data, 8 + i * 8); + bits[i] = buf.getLong(); } return new BloomFilter(bits, numBits, numHash); } @@ -125,33 +177,4 @@ private static int optimalNumHashFunctions(int n, int m) { return Math.max(1, (int) Math.round((double) m / n * Math.log(2))); } - private static void writeInt(byte[] buf, int offset, int value) { - buf[offset] = (byte) (value >>> 24); - buf[offset + 1] = (byte) (value >>> 16); - buf[offset + 2] = (byte) (value >>> 8); - buf[offset + 3] = (byte) value; - } - - private static void writeLong(byte[] buf, int offset, long value) { - buf[offset] = (byte) (value >>> 56); - buf[offset + 1] = (byte) (value >>> 48); - buf[offset + 2] = (byte) (value >>> 40); - buf[offset + 3] = (byte) (value >>> 32); - buf[offset + 4] = (byte) (value >>> 24); - buf[offset + 5] = (byte) (value >>> 16); - buf[offset + 6] = (byte) (value >>> 8); - buf[offset + 7] = (byte) value; - } - - private static int readInt(byte[] buf, int offset) { - return ((buf[offset] & 0xFF) << 24) | ((buf[offset + 1] & 0xFF) << 16) - | ((buf[offset + 2] & 0xFF) << 8) | (buf[offset + 3] & 0xFF); - } - - private static long readLong(byte[] buf, int offset) { - return ((long) (buf[offset] & 0xFF) << 56) | ((long) (buf[offset + 1] & 0xFF) << 48) - | ((long) (buf[offset + 2] & 0xFF) << 40) | ((long) (buf[offset + 3] & 0xFF) << 32) - | ((long) (buf[offset + 4] & 0xFF) << 24) | ((long) (buf[offset + 5] & 0xFF) << 16) - | ((long) (buf[offset + 6] & 0xFF) << 8) | (long) (buf[offset + 7] & 0xFF); - } } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java index c7a8951b86b..64f29ab510a 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Catalog.java @@ -415,28 +415,6 @@ public void setBloomFilter(BloomFilter filter) { this.bloomFilterBase64 = filter != null ? filter.toBase64() : null; } - /** - * Returns the leading component's filter value for this file's sort order. SPOC → subject, OPSC → object, CSPO - * → context. - */ - private long getLeadingFilterValue(long s, long p, long o, long c) { - if (sortOrder == null) { - return -1; - } - switch (sortOrder.charAt(0)) { - case 's': - return s; - case 'o': - return o; - case 'c': - return c; - case 'p': - return p; - default: - return -1; - } - } - /** * Tests whether this file's statistics allow it to contain a quad matching the given pattern. Bound components * (>= 0) are checked against the file's min/max range; unbound components (< 0) are wildcards. Also checks the @@ -458,7 +436,7 @@ public boolean mayContain(long s, long p, long o, long c) { // Check bloom filter for the leading component BloomFilter bf = getBloomFilter(); if (bf != null) { - long leadingVal = getLeadingFilterValue(s, p, o, c); + long leadingVal = BloomFilter.leadingComponent(s, p, o, c, sortOrder); if (leadingVal >= 0 && !bf.mightContain(leadingVal)) { return false; } diff --git a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java index 3e2ca4415f0..81d551a36b2 100644 --- a/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java +++ b/core/sail/s3/src/main/java/org/eclipse/rdf4j/sail/s3/storage/Compactor.java @@ -112,8 +112,7 @@ public CompactionResult compact(List sourceFiles, cache.writeThrough(s3Key, parquetData); } - // Build bloom filter for the leading component - BloomFilter bloom = buildBloomFilter(merged, suffix); + BloomFilter bloom = BloomFilter.buildForEntries(merged, suffix); QuadStats stats = QuadStats.fromEntries(merged); newFiles.add(new Catalog.ParquetFileInfo(s3Key, targetLevel, suffix, merged.size(), @@ -159,32 +158,6 @@ private List mergeEntries(List sources, QuadIndex qua return new ArrayList<>(deduped.values()); } - private static BloomFilter buildBloomFilter(List entries, String sortSuffix) { - BloomFilter bloom = new BloomFilter(Math.max(1, entries.size()), 0.01); - for (QuadEntry entry : entries) { - long val; - switch (sortSuffix.charAt(0)) { - case 's': - val = entry.subject; - break; - case 'o': - val = entry.object; - break; - case 'c': - val = entry.context; - break; - case 'p': - val = entry.predicate; - break; - default: - val = entry.subject; - break; - } - bloom.add(val); - } - return bloom; - } - private static class CompactKey implements Comparable { final byte[] key; From fd9195143db6ade7b26a8b2cc1ed12037bdb05b2 Mon Sep 17 00:00:00 2001 From: Chengxu bian Date: Thu, 9 Apr 2026 05:25:46 +0000 Subject: [PATCH 10/10] Add missing Eclipse copyright headers Add the standard Eclipse Distribution License v1.0 / BSD-3-Clause header to the 14 Hadoop stub files in core/sail/s3 that were missing it, fixing the copyright-check CI job. --- .../org/apache/hadoop/conf/Configuration.java | 23 ++++++++++++------- .../java/org/apache/hadoop/fs/FileStatus.java | 16 +++++++++---- .../java/org/apache/hadoop/fs/FileSystem.java | 16 +++++++++---- .../main/java/org/apache/hadoop/fs/Path.java | 16 +++++++++---- .../java/org/apache/hadoop/fs/PathFilter.java | 16 +++++++++---- .../org/apache/hadoop/mapred/JobConf.java | 16 +++++++++---- .../apache/hadoop/mapreduce/InputFormat.java | 16 +++++++++---- .../apache/hadoop/mapreduce/InputSplit.java | 16 +++++++++---- .../java/org/apache/hadoop/mapreduce/Job.java | 16 +++++++++---- .../apache/hadoop/mapreduce/JobContext.java | 16 +++++++++---- .../apache/hadoop/mapreduce/RecordReader.java | 16 +++++++++---- .../hadoop/mapreduce/TaskAttemptContext.java | 16 +++++++++---- .../mapreduce/lib/input/FileInputFormat.java | 18 +++++++++++---- .../hadoop/mapreduce/lib/input/FileSplit.java | 16 +++++++++---- 14 files changed, 172 insertions(+), 61 deletions(-) diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/conf/Configuration.java b/core/sail/s3/src/main/java/org/apache/hadoop/conf/Configuration.java index e0904545c5a..7a46f59182e 100644 --- a/core/sail/s3/src/main/java/org/apache/hadoop/conf/Configuration.java +++ b/core/sail/s3/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -1,12 +1,19 @@ -/* - * Minimal stub for org.apache.hadoop.conf.Configuration. +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. * - * Parquet-hadoop references this class in abstract method signatures - * (WriteSupport.init, ParquetWriter.Builder.getWriteSupport). Our code - * overrides the ParquetConfiguration variants instead, so this class is - * never instantiated or used at runtime. It exists only to satisfy the - * JVM class loader. - */ + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Minimal stub for org.apache.hadoop.conf.Configuration. +// Parquet-hadoop references this class in abstract method signatures +// (WriteSupport.init, ParquetWriter.Builder.getWriteSupport). Our code +// overrides the ParquetConfiguration variants instead, so this class is +// never instantiated or used at runtime. It exists only to satisfy the +// JVM class loader. package org.apache.hadoop.conf; public class Configuration { diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/fs/FileStatus.java b/core/sail/s3/src/main/java/org/apache/hadoop/fs/FileStatus.java index acb583d94a2..5898ef94c1b 100644 --- a/core/sail/s3/src/main/java/org/apache/hadoop/fs/FileStatus.java +++ b/core/sail/s3/src/main/java/org/apache/hadoop/fs/FileStatus.java @@ -1,7 +1,15 @@ -/* - * Minimal stub — satisfies JVM class loading for parquet-hadoop. - * Never instantiated at runtime. - */ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Minimal stub — satisfies JVM class loading for parquet-hadoop. +// Never instantiated at runtime. package org.apache.hadoop.fs; public class FileStatus { diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/fs/FileSystem.java b/core/sail/s3/src/main/java/org/apache/hadoop/fs/FileSystem.java index fd73ea3e2ef..518d1e0fbc4 100644 --- a/core/sail/s3/src/main/java/org/apache/hadoop/fs/FileSystem.java +++ b/core/sail/s3/src/main/java/org/apache/hadoop/fs/FileSystem.java @@ -1,7 +1,15 @@ -/* - * Minimal stub — satisfies JVM class loading for parquet-hadoop. - * Never instantiated at runtime. - */ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Minimal stub — satisfies JVM class loading for parquet-hadoop. +// Never instantiated at runtime. package org.apache.hadoop.fs; public abstract class FileSystem { diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/fs/Path.java b/core/sail/s3/src/main/java/org/apache/hadoop/fs/Path.java index f0ceeb9ab79..fe178aab10e 100644 --- a/core/sail/s3/src/main/java/org/apache/hadoop/fs/Path.java +++ b/core/sail/s3/src/main/java/org/apache/hadoop/fs/Path.java @@ -1,7 +1,15 @@ -/* - * Minimal stub — satisfies JVM class loading for parquet-hadoop. - * Never instantiated at runtime. - */ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Minimal stub — satisfies JVM class loading for parquet-hadoop. +// Never instantiated at runtime. package org.apache.hadoop.fs; public class Path { diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/fs/PathFilter.java b/core/sail/s3/src/main/java/org/apache/hadoop/fs/PathFilter.java index 25be1aaf955..3aad803c490 100644 --- a/core/sail/s3/src/main/java/org/apache/hadoop/fs/PathFilter.java +++ b/core/sail/s3/src/main/java/org/apache/hadoop/fs/PathFilter.java @@ -1,7 +1,15 @@ -/* - * Minimal stub — satisfies JVM class loading for parquet-hadoop. - * Never instantiated at runtime. - */ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Minimal stub — satisfies JVM class loading for parquet-hadoop. +// Never instantiated at runtime. package org.apache.hadoop.fs; public interface PathFilter { diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/mapred/JobConf.java b/core/sail/s3/src/main/java/org/apache/hadoop/mapred/JobConf.java index 71836d9a557..126ffd16689 100644 --- a/core/sail/s3/src/main/java/org/apache/hadoop/mapred/JobConf.java +++ b/core/sail/s3/src/main/java/org/apache/hadoop/mapred/JobConf.java @@ -1,7 +1,15 @@ -/* - * Minimal stub — satisfies JVM class loading for parquet-hadoop. - * Never instantiated at runtime. - */ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Minimal stub — satisfies JVM class loading for parquet-hadoop. +// Never instantiated at runtime. package org.apache.hadoop.mapred; import org.apache.hadoop.conf.Configuration; diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/InputFormat.java b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/InputFormat.java index 872f9916abf..2ac8f746c4e 100644 --- a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/InputFormat.java +++ b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/InputFormat.java @@ -1,7 +1,15 @@ -/* - * Minimal stub — satisfies JVM class loading for parquet-hadoop. - * Never instantiated at runtime. - */ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Minimal stub — satisfies JVM class loading for parquet-hadoop. +// Never instantiated at runtime. package org.apache.hadoop.mapreduce; public abstract class InputFormat { diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/InputSplit.java b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/InputSplit.java index 3f106aeab5b..13521f68a59 100644 --- a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/InputSplit.java +++ b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/InputSplit.java @@ -1,7 +1,15 @@ -/* - * Minimal stub — satisfies JVM class loading for parquet-hadoop. - * Never instantiated at runtime. - */ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Minimal stub — satisfies JVM class loading for parquet-hadoop. +// Never instantiated at runtime. package org.apache.hadoop.mapreduce; public abstract class InputSplit { diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/Job.java b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/Job.java index f6ced8fc859..721f0940068 100644 --- a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/Job.java +++ b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/Job.java @@ -1,7 +1,15 @@ -/* - * Minimal stub — satisfies JVM class loading for parquet-hadoop. - * Never instantiated at runtime. - */ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Minimal stub — satisfies JVM class loading for parquet-hadoop. +// Never instantiated at runtime. package org.apache.hadoop.mapreduce; public class Job { diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/JobContext.java b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/JobContext.java index e06f131fc38..909112c5c65 100644 --- a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/JobContext.java +++ b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/JobContext.java @@ -1,7 +1,15 @@ -/* - * Minimal stub — satisfies JVM class loading for parquet-hadoop. - * Never instantiated at runtime. - */ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Minimal stub — satisfies JVM class loading for parquet-hadoop. +// Never instantiated at runtime. package org.apache.hadoop.mapreduce; public interface JobContext { diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/RecordReader.java b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/RecordReader.java index e9d4741a711..f582a259f44 100644 --- a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/RecordReader.java +++ b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/RecordReader.java @@ -1,7 +1,15 @@ -/* - * Minimal stub — satisfies JVM class loading for parquet-hadoop. - * Never instantiated at runtime. - */ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Minimal stub — satisfies JVM class loading for parquet-hadoop. +// Never instantiated at runtime. package org.apache.hadoop.mapreduce; public abstract class RecordReader { diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/TaskAttemptContext.java b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/TaskAttemptContext.java index abb3c7cc7e9..942f8ff3ee0 100644 --- a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/TaskAttemptContext.java +++ b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/TaskAttemptContext.java @@ -1,7 +1,15 @@ -/* - * Minimal stub — satisfies JVM class loading for parquet-hadoop. - * Never instantiated at runtime. - */ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Minimal stub — satisfies JVM class loading for parquet-hadoop. +// Never instantiated at runtime. package org.apache.hadoop.mapreduce; public interface TaskAttemptContext extends JobContext { diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java index 5386226e74b..0bb81f4742f 100644 --- a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java +++ b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java @@ -1,8 +1,16 @@ -/* - * Minimal stub — satisfies JVM class loading for parquet-hadoop. - * ParquetInputFormat extends this class; loaded when ParquetReadOptions.Builder - * calls ParquetInputFormat.getFilter(). Never used at runtime. - */ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Minimal stub — satisfies JVM class loading for parquet-hadoop. +// ParquetInputFormat extends this class; loaded when ParquetReadOptions.Builder +// calls ParquetInputFormat.getFilter(). Never used at runtime. package org.apache.hadoop.mapreduce.lib.input; import org.apache.hadoop.mapreduce.InputFormat; diff --git a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileSplit.java b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileSplit.java index 6ae226d8383..ec406809ef1 100644 --- a/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileSplit.java +++ b/core/sail/s3/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileSplit.java @@ -1,7 +1,15 @@ -/* - * Minimal stub — satisfies JVM class loading for parquet-hadoop. - * Never instantiated at runtime. - */ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +// Minimal stub — satisfies JVM class loading for parquet-hadoop. +// Never instantiated at runtime. package org.apache.hadoop.mapreduce.lib.input; import org.apache.hadoop.mapreduce.InputSplit;