Skip to content

Commit 92f4fe4

Browse files
committed
GH-5148 add support for more files in the test cases and add a fix for when the corruption causes large amounts of data to be read
1 parent 0c58aac commit 92f4fe4

3 files changed

Lines changed: 68 additions & 3 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,4 @@ org.eclipse.dash.licenses-1.0.2.jar
5252
e2e/node_modules
5353
e2e/playwright-report
5454
e2e/test-results
55+
.aider*

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/datastore/DataFile.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818
import java.util.NoSuchElementException;
1919

2020
import org.eclipse.rdf4j.common.io.NioFile;
21+
import org.eclipse.rdf4j.sail.nativerdf.ValueStore;
22+
import org.slf4j.Logger;
23+
import org.slf4j.LoggerFactory;
2124

2225
/**
2326
* Class supplying access to a data file. A data file stores data sequentially. Each entry starts with the entry's
@@ -27,6 +30,8 @@
2730
*/
2831
public class DataFile implements Closeable {
2932

33+
private static final Logger logger = LoggerFactory.getLogger(DataFile.class);
34+
3035
/*-----------*
3136
* Constants *
3237
*-----------*/
@@ -197,6 +202,16 @@ public byte[] getData(long offset) throws IOException {
197202
(data[2] << 8) & 0x0000ff00 |
198203
(data[3]) & 0x000000ff;
199204

205+
// If the data length is larger than 750MB, we are likely reading the wrong data. Probably data corruption.
206+
if (dataLength > 750 * 1024 * 1024) {
207+
if (ValueStore.SOFT_FAIL_ON_CORRUPT_DATA) {
208+
logger.error(
209+
"Data length is {}MB which is larger than 750MB. This is likely data corruption. Truncating length to 32 MB.",
210+
dataLength / ((1024 * 1024)));
211+
dataLength = 32 * 1024 * 1024;
212+
}
213+
}
214+
200215
// We have either managed to read enough data and can return the required subset of the data, or we have read
201216
// too little so we need to execute another read to get the correct data.
202217
if (dataLength <= data.length - 4) {

core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/NativeSailStoreCorruptionTest.java

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ public void before() throws IOException {
8888
conn.add(S5, CTX_2);
8989
}
9090
backupFile(dataDir, "values.dat");
91+
backupFile(dataDir, "values.id");
92+
backupFile(dataDir, "values.hash");
9193
}
9294

9395
public static void overwriteByteInFile(File valuesFile, long pos, int newVal) throws IOException {
@@ -116,7 +118,7 @@ public static void backupFile(File dataDir, String s) throws IOException {
116118
File backupFile = new File(dataDir, s + ".bak");
117119

118120
if (!valuesFile.exists()) {
119-
throw new IOException("values.dat does not exist and cannot be backed up.");
121+
throw new IOException(s + " does not exist and cannot be backed up.");
120122
}
121123

122124
// Copy values.dat to values.dat.bak
@@ -128,7 +130,7 @@ public static void restoreFile(File dataDir, String s) throws IOException {
128130
File backupFile = new File(dataDir, s + ".bak");
129131

130132
if (!backupFile.exists()) {
131-
throw new IOException("Backup file values.dat.bak does not exist.");
133+
throw new IOException("Backup file " + s + ".bak does not exist.");
132134
}
133135

134136
// Copy values.dat.bak back to values.dat
@@ -196,9 +198,53 @@ public void testCorruptValuesDatFileEntireValuesDatFile() throws IOException {
196198

197199
List<Statement> list = getStatements();
198200
assertEquals(6, list.size());
201+
}
202+
}
203+
204+
@Test
205+
public void testCorruptLastByteOfValuesDatFile() throws IOException {
206+
repo.shutDown();
207+
File valuesFile = new File(dataDir, "values.dat");
208+
long fileSize = valuesFile.length();
209+
210+
overwriteByteInFile(valuesFile, fileSize - 1, 0x0);
211+
212+
repo.init();
213+
214+
List<Statement> list = getStatements();
215+
assertEquals(6, list.size());
216+
}
217+
218+
@Test
219+
public void testCorruptValuesIdFile() throws IOException {
220+
repo.shutDown();
221+
File valuesIdFile = new File(dataDir, "values.id");
222+
long fileSize = valuesIdFile.length();
199223

224+
for (long i = 4; i < fileSize; i++) {
225+
restoreFile(dataDir, "values.id");
226+
overwriteByteInFile(valuesIdFile, i, 0x0);
227+
repo.init();
228+
List<Statement> list = getStatements();
229+
assertEquals(6, list.size(), "Failed at byte position " + i);
230+
repo.shutDown();
200231
}
232+
}
201233

234+
@Test
235+
public void testCorruptValuesHashFile() throws IOException {
236+
repo.shutDown();
237+
File valuesHashFile = new File(dataDir, "values.hash");
238+
long fileSize = valuesHashFile.length();
239+
240+
for (long i = 4; i < fileSize; i++) {
241+
restoreFile(dataDir, "values.hash");
242+
overwriteByteInFile(valuesHashFile, i, 0x0);
243+
repo.init();
244+
List<Statement> list = getStatements();
245+
assertEquals(6, list.size(), "Failed at byte position " + i);
246+
repo.shutDown();
247+
}
202248
}
203249

204250
@NotNull
@@ -226,7 +272,10 @@ private List<Statement> getStatements() {
226272
}
227273

228274
@AfterEach
229-
public void after() {
275+
public void after() throws IOException {
230276
repo.shutDown();
277+
restoreFile(dataDir, "values.hash");
278+
restoreFile(dataDir, "values.id");
279+
restoreFile(dataDir, "values.dat");
231280
}
232281
}

0 commit comments

Comments
 (0)