Skip to content

Commit b6215bb

Browse files
committed
GH-5148 improved soft fail on corruption for values.id and values.hash files.
1 parent 92f4fe4 commit b6215bb

5 files changed

Lines changed: 62 additions & 31 deletions

File tree

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
*******************************************************************************/
1111
package org.eclipse.rdf4j.sail.nativerdf;
1212

13+
import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA;
14+
1315
import java.io.IOException;
1416

1517
import org.eclipse.rdf4j.common.io.ByteArrayUtil;
@@ -20,6 +22,9 @@
2022
import org.eclipse.rdf4j.model.Value;
2123
import org.eclipse.rdf4j.sail.SailException;
2224
import org.eclipse.rdf4j.sail.nativerdf.btree.RecordIterator;
25+
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRI;
26+
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRIOrBNode;
27+
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptUnknownValue;
2328

2429
/**
2530
* A statement iterator that wraps a RecordIterator containing statement records and translates these records to
@@ -74,6 +79,17 @@ public Statement getNextElement() throws SailException {
7479
if (contextID != 0) {
7580
context = valueStore.getResource(contextID);
7681
}
82+
if (SOFT_FAIL_ON_CORRUPT_DATA) {
83+
if (subj == null) {
84+
subj = new CorruptIRIOrBNode(valueStore.getRevision(), subjID, null);
85+
}
86+
if (pred == null) {
87+
pred = new CorruptIRI(valueStore.getRevision(), predID, null, null);
88+
}
89+
if (obj == null) {
90+
obj = new CorruptUnknownValue(valueStore.getRevision(), objID, null);
91+
}
92+
}
7793

7894
return valueStore.createStatement(subj, pred, obj, context);
7995
} catch (IOException e) {

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStore.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.commons.io.FileUtils;
2525
import org.eclipse.rdf4j.collection.factory.api.CollectionFactory;
2626
import org.eclipse.rdf4j.collection.factory.mapdb.MapDb3CollectionFactory;
27+
import org.eclipse.rdf4j.common.annotation.InternalUseOnly;
2728
import org.eclipse.rdf4j.common.concurrent.locks.Lock;
2829
import org.eclipse.rdf4j.common.concurrent.locks.LockManager;
2930
import org.eclipse.rdf4j.common.io.MavenUtil;
@@ -62,6 +63,15 @@ public class NativeStore extends AbstractNotifyingSail implements FederatedServi
6263

6364
private static final String VERSION = MavenUtil.loadVersion("org.eclipse.rdf4j", "rdf4j-sail-nativerdf", "devel");
6465

66+
/**
67+
* Do not throw an exception when corrupt data is detected. Instead, try to return as much data as possible.
68+
*
69+
* Variable can be set through the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData.
70+
*/
71+
@InternalUseOnly
72+
public static boolean SOFT_FAIL_ON_CORRUPT_DATA = "true"
73+
.equalsIgnoreCase(System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData"));;
74+
6575
private static final Cleaner REMOVE_STORES_USED_FOR_MEMORY_OVERFLOW = Cleaner.create();
6676

6777
/**

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
*******************************************************************************/
1111
package org.eclipse.rdf4j.sail.nativerdf;
1212

13+
import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA;
14+
1315
import java.io.File;
1416
import java.io.IOException;
1517
import java.io.UnsupportedEncodingException;
@@ -128,12 +130,6 @@ public class ValueStore extends SimpleValueFactory {
128130
*/
129131
private final ConcurrentCache<String, Integer> namespaceIDCache;
130132

131-
/**
132-
* Do not throw an exception in case a value cannot be loaded, e.g. due to a corrupt value store.
133-
*/
134-
public static boolean SOFT_FAIL_ON_CORRUPT_DATA = "true"
135-
.equalsIgnoreCase(System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData"));;
136-
137133
/*--------------*
138134
* Constructors *
139135
*--------------*/

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
*******************************************************************************/
1111
package org.eclipse.rdf4j.sail.nativerdf.datastore;
1212

13+
import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA;
14+
1315
import java.io.Closeable;
1416
import java.io.File;
1517
import java.io.IOException;
@@ -18,7 +20,6 @@
1820
import java.util.NoSuchElementException;
1921

2022
import org.eclipse.rdf4j.common.io.NioFile;
21-
import org.eclipse.rdf4j.sail.nativerdf.ValueStore;
2223
import org.slf4j.Logger;
2324
import org.slf4j.LoggerFactory;
2425

@@ -203,38 +204,49 @@ public byte[] getData(long offset) throws IOException {
203204
(data[3]) & 0x000000ff;
204205

205206
// If the data length is larger than 750MB, we are likely reading the wrong data. Probably data corruption.
206-
if (dataLength > 750 * 1024 * 1024) {
207-
if (ValueStore.SOFT_FAIL_ON_CORRUPT_DATA) {
207+
if (dataLength > 128 * 1024 * 1024) {
208+
if (SOFT_FAIL_ON_CORRUPT_DATA) {
208209
logger.error(
209210
"Data length is {}MB which is larger than 750MB. This is likely data corruption. Truncating length to 32 MB.",
210211
dataLength / ((1024 * 1024)));
211212
dataLength = 32 * 1024 * 1024;
212213
}
213214
}
214215

215-
// We have either managed to read enough data and can return the required subset of the data, or we have read
216-
// too little so we need to execute another read to get the correct data.
217-
if (dataLength <= data.length - 4) {
216+
try {
218217

219-
// adjust the approximate average with 1 part actual length and 99 parts previous average up to a sensible
220-
// max of 200
221-
dataLengthApproximateAverage = (int) (Math.min(200,
222-
((dataLengthApproximateAverage / 100.0) * 99) + (dataLength / 100.0)));
218+
// We have either managed to read enough data and can return the required subset of the data, or we have
219+
// read
220+
// too little so we need to execute another read to get the correct data.
221+
if (dataLength <= data.length - 4) {
223222

224-
return Arrays.copyOfRange(data, 4, dataLength + 4);
223+
// adjust the approximate average with 1 part actual length and 99 parts previous average up to a
224+
// sensible
225+
// max of 200
226+
dataLengthApproximateAverage = (int) (Math.min(200,
227+
((dataLengthApproximateAverage / 100.0) * 99) + (dataLength / 100.0)));
225228

226-
} else {
229+
return Arrays.copyOfRange(data, 4, dataLength + 4);
227230

228-
// adjust the approximate average, but favour the actual dataLength since dataLength predictions misses are
229-
// costly
230-
dataLengthApproximateAverage = Math.min(200, (dataLengthApproximateAverage + dataLength) / 2);
231+
} else {
231232

232-
// we didn't read enough data so we need to execute a new read
233-
data = new byte[dataLength];
234-
buf = ByteBuffer.wrap(data);
235-
nioFile.read(buf, offset + 4L);
233+
// adjust the approximate average, but favour the actual dataLength since dataLength predictions misses
234+
// are costly
235+
dataLengthApproximateAverage = Math.min(200, (dataLengthApproximateAverage + dataLength) / 2);
236236

237-
return data;
237+
// we didn't read enough data so we need to execute a new read
238+
data = new byte[dataLength];
239+
buf = ByteBuffer.wrap(data);
240+
nioFile.read(buf, offset + 4L);
241+
242+
return data;
243+
}
244+
} catch (OutOfMemoryError e) {
245+
if (dataLength > 128 * 1024 * 1024) {
246+
logger.error(
247+
"Trying to read large amounts of data may be a sign of data corruption. Consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true");
248+
}
249+
throw e;
238250
}
239251

240252
}

core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ public void testCorruptValuesHashFile() throws IOException {
251251
private List<Statement> getStatements() {
252252
List<Statement> list = new ArrayList<>();
253253

254-
ValueStore.SOFT_FAIL_ON_CORRUPT_DATA = true;
254+
NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = true;
255255

256256
try (RepositoryConnection conn = repo.getConnection()) {
257257
StringWriter stringWriter = new StringWriter();
@@ -267,15 +267,12 @@ private List<Statement> getStatements() {
267267
}
268268
return list;
269269
} finally {
270-
ValueStore.SOFT_FAIL_ON_CORRUPT_DATA = false;
270+
NativeStore.SOFT_FAIL_ON_CORRUPT_DATA = false;
271271
}
272272
}
273273

274274
@AfterEach
275275
public void after() throws IOException {
276276
repo.shutDown();
277-
restoreFile(dataDir, "values.hash");
278-
restoreFile(dataDir, "values.id");
279-
restoreFile(dataDir, "values.dat");
280277
}
281278
}

0 commit comments

Comments
 (0)