Skip to content

Commit 0076003

Browse files
committed
GH-5148 improved handling of corrupt spoc/posc/... indexes
1 parent b6215bb commit 0076003

6 files changed

Lines changed: 235 additions & 13 deletions

File tree

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,17 @@
2525
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRI;
2626
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRIOrBNode;
2727
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptUnknownValue;
28+
import org.slf4j.Logger;
29+
import org.slf4j.LoggerFactory;
2830

2931
/**
3032
* A statement iterator that wraps a RecordIterator containing statement records and translates these records to
3133
* {@link Statement} objects.
3234
*/
3335
class NativeStatementIterator extends LookAheadIteration<Statement> {
3436

37+
private static final Logger logger = LoggerFactory.getLogger(NativeStatementIterator.class);
38+
3539
/*-----------*
3640
* Variables *
3741
*-----------*/
@@ -59,7 +63,13 @@ public NativeStatementIterator(RecordIterator btreeIter, ValueStore valueStore)
5963
@Override
6064
public Statement getNextElement() throws SailException {
6165
try {
62-
byte[] nextValue = btreeIter.next();
66+
byte[] nextValue;
67+
try {
68+
nextValue = btreeIter.next();
69+
} catch (AssertionError | Exception e) {
70+
logger.error("Error while reading next value from btree iterator for {}", btreeIter.toString(), e);
71+
throw e;
72+
}
6373

6474
if (nextValue == null) {
6575
return null;

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/TripleStore.java

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -287,9 +287,71 @@ private Set<String> parseIndexSpecList(String indexSpecStr) throws SailException
287287
}
288288

289289
private void initIndexes(Set<String> indexSpecs) throws IOException {
290+
291+
HashSet<String> invalidIndexes = new HashSet<>();
292+
290293
for (String fieldSeq : indexSpecs) {
291294
logger.trace("Initializing index '{}'...", fieldSeq);
292-
indexes.add(new TripleIndex(fieldSeq));
295+
try {
296+
indexes.add(new TripleIndex(fieldSeq, false));
297+
} catch (Exception e) {
298+
if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA) {
299+
invalidIndexes.add(fieldSeq);
300+
logger.warn("Ignoring index because it failed to initialize index '{}'", fieldSeq, e);
301+
} else {
302+
logger.error(
303+
"Failed to initialize index '{}', consider setting org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true.",
304+
fieldSeq, e);
305+
throw e;
306+
}
307+
308+
}
309+
310+
}
311+
312+
if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA) {
313+
indexSpecs.removeAll(invalidIndexes);
314+
}
315+
316+
List<TripleIndex> emptyIndexes = new ArrayList<>();
317+
List<TripleIndex> nonEmptyIndexes = new ArrayList<>();
318+
319+
checkIfIndexesAreEmptyOrNot(nonEmptyIndexes, emptyIndexes);
320+
321+
if (!emptyIndexes.isEmpty() && !nonEmptyIndexes.isEmpty()) {
322+
if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA) {
323+
indexes.removeAll(emptyIndexes);
324+
} else {
325+
for (TripleIndex index : emptyIndexes) {
326+
throw new IOException("Index '" + new String(index.getFieldSeq())
327+
+ "' is unexpectedly empty while other indexes are not. Consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true. Index file: "
328+
+ index.getBTree().getFile().getAbsolutePath());
329+
}
330+
}
331+
}
332+
333+
}
334+
335+
private void checkIfIndexesAreEmptyOrNot(List<TripleIndex> nonEmptyIndexes, List<TripleIndex> emptyIndexes)
336+
throws IOException {
337+
for (TripleIndex index : indexes) {
338+
try (RecordIterator recordIterator = index.getBTree().iterateAll()) {
339+
try {
340+
byte[] next = recordIterator.next();
341+
if (next != null) {
342+
next = recordIterator.next();
343+
if (next != null) {
344+
nonEmptyIndexes.add(index);
345+
} else {
346+
emptyIndexes.add(index);
347+
}
348+
} else {
349+
emptyIndexes.add(index);
350+
}
351+
} catch (Throwable ignored) {
352+
emptyIndexes.add(index);
353+
}
354+
}
293355
}
294356
}
295357

@@ -355,7 +417,7 @@ private void reindex(Set<String> currentIndexSpecs, Set<String> newIndexSpecs) t
355417
for (String fieldSeq : addedIndexSpecs) {
356418
logger.debug("Initializing new index '{}'...", fieldSeq);
357419

358-
TripleIndex addedIndex = new TripleIndex(fieldSeq);
420+
TripleIndex addedIndex = new TripleIndex(fieldSeq, true);
359421
BTree addedBTree = null;
360422
RecordIterator sourceIter = null;
361423
try {
@@ -1122,7 +1184,17 @@ private class TripleIndex {
11221184

11231185
private final BTree btree;
11241186

1125-
public TripleIndex(String fieldSeq) throws IOException {
1187+
public TripleIndex(String fieldSeq, boolean deleteExistingIndexFile) throws IOException {
1188+
if (deleteExistingIndexFile) {
1189+
File indexFile = new File(dir, getFilenamePrefix(fieldSeq) + ".dat");
1190+
if (indexFile.exists()) {
1191+
indexFile.delete();
1192+
}
1193+
File alloxFile = new File(dir, getFilenamePrefix(fieldSeq) + ".alloc");
1194+
if (alloxFile.exists()) {
1195+
alloxFile.delete();
1196+
}
1197+
}
11261198
tripleComparator = new TripleComparator(fieldSeq);
11271199
btree = new BTree(dir, getFilenamePrefix(fieldSeq), 2048, RECORD_LENGTH, tripleComparator, forceSync);
11281200
}

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/btree/BTree.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,12 @@ public BTree(File dataDir, String filenamePrefix, int blockSize, int valueSize,
292292
this.valueSize = buf.getInt();
293293
this.rootNodeID = buf.getInt();
294294

295+
if (rootNodeID == 0) {
296+
if (nioFile.size() >= 1024) {
297+
throw new IllegalStateException("Root node ID is 0 but file is not empty");
298+
}
299+
}
300+
295301
if (Arrays.equals(MAGIC_NUMBER, magicNumber)) {
296302
if (version > FILE_FORMAT_VERSION) {
297303
throw new IOException("Unable to read BTree file " + file + "; it uses a newer file format");
@@ -1117,4 +1123,11 @@ public void print(PrintStream out) throws IOException {
11171123
out.println("#values = " + valueCount);
11181124
out.println("---end of BTree file---");
11191125
}
1126+
1127+
@Override
1128+
public String toString() {
1129+
return "BTree{" +
1130+
"file=" + getFile() +
1131+
'}';
1132+
}
11201133
}

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/btree/RangeIterator.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,4 +422,11 @@ public boolean nodeMergedWith(Node sourceNode, Node targetNode, int mergeIdx) th
422422

423423
return deregister;
424424
}
425+
426+
@Override
427+
public String toString() {
428+
return "RangeIterator{" +
429+
"tree=" + tree +
430+
'}';
431+
}
425432
}

core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java

Lines changed: 127 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,15 @@ public void before() throws IOException {
9090
backupFile(dataDir, "values.dat");
9191
backupFile(dataDir, "values.id");
9292
backupFile(dataDir, "values.hash");
93+
backupFile(dataDir, "namespaces.dat");
94+
backupFile(dataDir, "contexts.dat");
95+
backupFile(dataDir, "triples-posc.alloc");
96+
backupFile(dataDir, "triples-posc.dat");
97+
backupFile(dataDir, "triples-spoc.alloc");
98+
backupFile(dataDir, "triples-spoc.dat");
99+
100+
NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = true;
101+
93102
}
94103

95104
public static void overwriteByteInFile(File valuesFile, long pos, int newVal) throws IOException {
@@ -234,25 +243,136 @@ public void testCorruptValuesIdFile() throws IOException {
234243
@Test
235244
public void testCorruptValuesHashFile() throws IOException {
236245
repo.shutDown();
237-
File valuesHashFile = new File(dataDir, "values.hash");
238-
long fileSize = valuesHashFile.length();
246+
String file = "values.hash";
247+
File nativeStoreFile = new File(dataDir, file);
248+
long fileSize = nativeStoreFile.length();
249+
250+
for (long i = 4; i < fileSize; i++) {
251+
restoreFile(dataDir, file);
252+
overwriteByteInFile(nativeStoreFile, i, 0x0);
253+
repo.init();
254+
List<Statement> list = getStatements();
255+
assertEquals(6, list.size(), "Failed at byte position " + i);
256+
repo.shutDown();
257+
}
258+
}
259+
260+
@Test
261+
public void testCorruptValuesNamespacesFile() throws IOException {
262+
repo.shutDown();
263+
String file = "namespaces.dat";
264+
File nativeStoreFile = new File(dataDir, file);
265+
long fileSize = nativeStoreFile.length();
266+
267+
for (long i = 4; i < fileSize; i++) {
268+
restoreFile(dataDir, file);
269+
overwriteByteInFile(nativeStoreFile, i, 0x0);
270+
repo.init();
271+
List<Statement> list = getStatements();
272+
assertEquals(6, list.size(), "Failed at byte position " + i);
273+
repo.shutDown();
274+
}
275+
}
276+
277+
@Test
278+
public void testCorruptValuesContextsFile() throws IOException {
279+
repo.shutDown();
280+
String file = "contexts.dat";
281+
File nativeStoreFile = new File(dataDir, file);
282+
long fileSize = nativeStoreFile.length();
283+
284+
for (long i = 4; i < fileSize; i++) {
285+
restoreFile(dataDir, file);
286+
overwriteByteInFile(nativeStoreFile, i, 0x0);
287+
repo.init();
288+
List<Statement> list = getStatements();
289+
assertEquals(6, list.size(), "Failed at byte position " + i);
290+
repo.shutDown();
291+
}
292+
}
293+
294+
@Test
295+
public void testCorruptValuesPoscAllocFile() throws IOException {
296+
repo.shutDown();
297+
String file = "triples-posc.alloc";
298+
File nativeStoreFile = new File(dataDir, file);
299+
long fileSize = nativeStoreFile.length();
239300

240301
for (long i = 4; i < fileSize; i++) {
241-
restoreFile(dataDir, "values.hash");
242-
overwriteByteInFile(valuesHashFile, i, 0x0);
302+
restoreFile(dataDir, file);
303+
overwriteByteInFile(nativeStoreFile, i, 0x0);
243304
repo.init();
244305
List<Statement> list = getStatements();
245306
assertEquals(6, list.size(), "Failed at byte position " + i);
246307
repo.shutDown();
247308
}
248309
}
249310

311+
@Test
312+
public void testCorruptValuesPoscDataFile() throws IOException {
313+
repo.shutDown();
314+
String file = "triples-posc.dat";
315+
File nativeStoreFile = new File(dataDir, file);
316+
long fileSize = nativeStoreFile.length();
317+
318+
for (long i = 4; i < fileSize; i++) {
319+
NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = true;
320+
restoreFile(dataDir, file);
321+
overwriteByteInFile(nativeStoreFile, i, 0x0);
322+
repo.init();
323+
List<Statement> list = getStatements();
324+
assertEquals(6, list.size(), "Failed at byte position " + i);
325+
repo.shutDown();
326+
}
327+
}
328+
329+
@Test
330+
public void testCorruptValuesSpocAllocFile() throws IOException {
331+
repo.shutDown();
332+
String file = "triples-spoc.alloc";
333+
File nativeStoreFile = new File(dataDir, file);
334+
long fileSize = nativeStoreFile.length();
335+
336+
for (long i = 4; i < fileSize; i++) {
337+
restoreFile(dataDir, file);
338+
overwriteByteInFile(nativeStoreFile, i, 0x0);
339+
repo.init();
340+
List<Statement> list = getStatements();
341+
assertEquals(6, list.size(), "Failed at byte position " + i);
342+
repo.shutDown();
343+
}
344+
}
345+
346+
@Test
347+
public void testCorruptValuesSpocDataFile() throws IOException {
348+
repo.shutDown();
349+
String file = "triples-spoc.dat";
350+
File nativeStoreFile = new File(dataDir, file);
351+
long fileSize = nativeStoreFile.length();
352+
353+
for (long i = 4; i < fileSize; i++) {
354+
restoreFile(dataDir, file);
355+
overwriteByteInFile(nativeStoreFile, i, 0x0);
356+
repo.init();
357+
try {
358+
List<Statement> list = getStatements();
359+
assertEquals(6, list.size(), "Failed at byte position " + i);
360+
} catch (Throwable ignored) {
361+
repo.shutDown();
362+
nativeStoreFile.delete();
363+
repo.init();
364+
List<Statement> list = getStatements();
365+
assertEquals(6, list.size(), "Failed at byte position " + i);
366+
}
367+
368+
repo.shutDown();
369+
}
370+
}
371+
250372
@NotNull
251373
private List<Statement> getStatements() {
252374
List<Statement> list = new ArrayList<>();
253375

254-
NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = true;
255-
256376
try (RepositoryConnection conn = repo.getConnection()) {
257377
StringWriter stringWriter = new StringWriter();
258378
RDFWriter writer = Rio.createWriter(RDFFormat.NQUADS, stringWriter);
@@ -266,13 +386,12 @@ private List<Statement> getStatements() {
266386
}
267387
}
268388
return list;
269-
} finally {
270-
NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = false;
271389
}
272390
}
273391

274392
@AfterEach
275393
public void after() throws IOException {
394+
NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = false;
276395
repo.shutDown();
277396
}
278397
}

site/content/documentation/programming/repository.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ Repository repo = new SailRepository(new NativeStore());
9999
```
100100

101101
In the unlikely event of corruption the system property `org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData` can be set to `true` to
102-
allow the NativeStore to output CorruptValue/CorruptIRI/CorruptIRIOrBNode/CorruptLiteral objects.
102+
allow the NativeStore to output CorruptValue/CorruptIRI/CorruptIRIOrBNode/CorruptLiteral objects. Take a backup of all data before setting
103+
this property as it allows the NativeStore to delete corrupt indexes in an attempt to recreate them. Consider this feature experimental and use with caution.
103104

104105
### Elasticsearch RDF Repository
105106

0 commit comments

Comments
 (0)