Skip to content

Commit 136be9f

Browse files
authored
GH-5148 Introduce "soft fail" for corrupt ValueStore (#5157)
2 parents 517353e + c47fe2b commit 136be9f

17 files changed

Lines changed: 1299 additions & 57 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,4 @@ org.eclipse.dash.licenses-1.0.2.jar
5252
e2e/node_modules
5353
e2e/playwright-report
5454
e2e/test-results
55+
.aider*

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
*******************************************************************************/
1111
package org.eclipse.rdf4j.sail.nativerdf;
1212

13+
import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES;
14+
1315
import java.io.IOException;
1416

1517
import org.eclipse.rdf4j.common.io.ByteArrayUtil;
@@ -20,13 +22,20 @@
2022
import org.eclipse.rdf4j.model.Value;
2123
import org.eclipse.rdf4j.sail.SailException;
2224
import org.eclipse.rdf4j.sail.nativerdf.btree.RecordIterator;
25+
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRI;
26+
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRIOrBNode;
27+
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptUnknownValue;
28+
import org.slf4j.Logger;
29+
import org.slf4j.LoggerFactory;
2330

2431
/**
2532
* A statement iterator that wraps a RecordIterator containing statement records and translates these records to
2633
* {@link Statement} objects.
2734
*/
2835
class NativeStatementIterator extends LookAheadIteration<Statement> {
2936

37+
private static final Logger logger = LoggerFactory.getLogger(NativeStatementIterator.class);
38+
3039
/*-----------*
3140
* Variables *
3241
*-----------*/
@@ -54,25 +63,42 @@ public NativeStatementIterator(RecordIterator btreeIter, ValueStore valueStore)
5463
@Override
5564
public Statement getNextElement() throws SailException {
5665
try {
57-
byte[] nextValue = btreeIter.next();
66+
byte[] nextValue;
67+
try {
68+
nextValue = btreeIter.next();
69+
} catch (AssertionError | Exception e) {
70+
logger.error("Error while reading next value from btree iterator for {}", btreeIter.toString(), e);
71+
throw e;
72+
}
5873

5974
if (nextValue == null) {
6075
return null;
6176
}
6277

6378
int subjID = ByteArrayUtil.getInt(nextValue, TripleStore.SUBJ_IDX);
64-
Resource subj = (Resource) valueStore.getValue(subjID);
79+
Resource subj = valueStore.getResource(subjID);
6580

6681
int predID = ByteArrayUtil.getInt(nextValue, TripleStore.PRED_IDX);
67-
IRI pred = (IRI) valueStore.getValue(predID);
82+
IRI pred = valueStore.getIRI(predID);
6883

6984
int objID = ByteArrayUtil.getInt(nextValue, TripleStore.OBJ_IDX);
7085
Value obj = valueStore.getValue(objID);
7186

7287
Resource context = null;
7388
int contextID = ByteArrayUtil.getInt(nextValue, TripleStore.CONTEXT_IDX);
7489
if (contextID != 0) {
75-
context = (Resource) valueStore.getValue(contextID);
90+
context = valueStore.getResource(contextID);
91+
}
92+
if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) {
93+
if (subj == null) {
94+
subj = new CorruptIRIOrBNode(valueStore.getRevision(), subjID, null);
95+
}
96+
if (pred == null) {
97+
pred = new CorruptIRI(valueStore.getRevision(), predID, null, null);
98+
}
99+
if (obj == null) {
100+
obj = new CorruptUnknownValue(valueStore.getRevision(), objID, null);
101+
}
76102
}
77103

78104
return valueStore.createStatement(subj, pred, obj, context);

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStore.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.commons.io.FileUtils;
2525
import org.eclipse.rdf4j.collection.factory.api.CollectionFactory;
2626
import org.eclipse.rdf4j.collection.factory.mapdb.MapDb3CollectionFactory;
27+
import org.eclipse.rdf4j.common.annotation.InternalUseOnly;
2728
import org.eclipse.rdf4j.common.concurrent.locks.Lock;
2829
import org.eclipse.rdf4j.common.concurrent.locks.LockManager;
2930
import org.eclipse.rdf4j.common.io.MavenUtil;
@@ -62,6 +63,17 @@ public class NativeStore extends AbstractNotifyingSail implements FederatedServi
6263

6364
private static final String VERSION = MavenUtil.loadVersion("org.eclipse.rdf4j", "rdf4j-sail-nativerdf", "devel");
6465

66+
/**
67+
* Do not throw an exception when corrupt data is detected. Instead, try to return as much data as possible.
68+
*
69+
* Variable can be set through the system property
70+
* org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes.
71+
*/
72+
@InternalUseOnly
73+
public static boolean SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES = "true"
74+
.equalsIgnoreCase(
75+
System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes"));;
76+
6577
private static final Cleaner REMOVE_STORES_USED_FOR_MEMORY_OVERFLOW = Cleaner.create();
6678

6779
/**

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/TripleStore.java

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -287,9 +287,71 @@ private Set<String> parseIndexSpecList(String indexSpecStr) throws SailException
287287
}
288288

289289
private void initIndexes(Set<String> indexSpecs) throws IOException {
290+
291+
HashSet<String> invalidIndexes = new HashSet<>();
292+
290293
for (String fieldSeq : indexSpecs) {
291294
logger.trace("Initializing index '{}'...", fieldSeq);
292-
indexes.add(new TripleIndex(fieldSeq));
295+
try {
296+
indexes.add(new TripleIndex(fieldSeq, false));
297+
} catch (Exception e) {
298+
if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) {
299+
invalidIndexes.add(fieldSeq);
300+
logger.warn("Ignoring index because it failed to initialize index '{}'", fieldSeq, e);
301+
} else {
302+
logger.error(
303+
"Failed to initialize index '{}', consider setting org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true.",
304+
fieldSeq, e);
305+
throw e;
306+
}
307+
308+
}
309+
310+
}
311+
312+
if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) {
313+
indexSpecs.removeAll(invalidIndexes);
314+
}
315+
316+
List<TripleIndex> emptyIndexes = new ArrayList<>();
317+
List<TripleIndex> nonEmptyIndexes = new ArrayList<>();
318+
319+
checkIfIndexesAreEmptyOrNot(nonEmptyIndexes, emptyIndexes);
320+
321+
if (!emptyIndexes.isEmpty() && !nonEmptyIndexes.isEmpty()) {
322+
if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) {
323+
indexes.removeAll(emptyIndexes);
324+
} else {
325+
for (TripleIndex index : emptyIndexes) {
326+
throw new IOException("Index '" + new String(index.getFieldSeq())
327+
+ "' is unexpectedly empty while other indexes are not. Consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true. Index file: "
328+
+ index.getBTree().getFile().getAbsolutePath());
329+
}
330+
}
331+
}
332+
333+
}
334+
335+
private void checkIfIndexesAreEmptyOrNot(List<TripleIndex> nonEmptyIndexes, List<TripleIndex> emptyIndexes)
336+
throws IOException {
337+
for (TripleIndex index : indexes) {
338+
try (RecordIterator recordIterator = index.getBTree().iterateAll()) {
339+
try {
340+
byte[] next = recordIterator.next();
341+
if (next != null) {
342+
next = recordIterator.next();
343+
if (next != null) {
344+
nonEmptyIndexes.add(index);
345+
} else {
346+
emptyIndexes.add(index);
347+
}
348+
} else {
349+
emptyIndexes.add(index);
350+
}
351+
} catch (Throwable ignored) {
352+
emptyIndexes.add(index);
353+
}
354+
}
293355
}
294356
}
295357

@@ -355,7 +417,7 @@ private void reindex(Set<String> currentIndexSpecs, Set<String> newIndexSpecs) t
355417
for (String fieldSeq : addedIndexSpecs) {
356418
logger.debug("Initializing new index '{}'...", fieldSeq);
357419

358-
TripleIndex addedIndex = new TripleIndex(fieldSeq);
420+
TripleIndex addedIndex = new TripleIndex(fieldSeq, true);
359421
BTree addedBTree = null;
360422
RecordIterator sourceIter = null;
361423
try {
@@ -1122,7 +1184,17 @@ private class TripleIndex {
11221184

11231185
private final BTree btree;
11241186

1125-
public TripleIndex(String fieldSeq) throws IOException {
1187+
public TripleIndex(String fieldSeq, boolean deleteExistingIndexFile) throws IOException {
1188+
if (deleteExistingIndexFile) {
1189+
File indexFile = new File(dir, getFilenamePrefix(fieldSeq) + ".dat");
1190+
if (indexFile.exists()) {
1191+
indexFile.delete();
1192+
}
1193+
File alloxFile = new File(dir, getFilenamePrefix(fieldSeq) + ".alloc");
1194+
if (alloxFile.exists()) {
1195+
alloxFile.delete();
1196+
}
1197+
}
11261198
tripleComparator = new TripleComparator(fieldSeq);
11271199
btree = new BTree(dir, getFilenamePrefix(fieldSeq), 2048, RECORD_LENGTH, tripleComparator, forceSync);
11281200
}

0 commit comments

Comments
 (0)