Skip to content

Commit 196cf9d

Browse files
committed
GH-5148 add tests and extend corruption handling to more parts of the code
1 parent cad4af9 commit 196cf9d

7 files changed

Lines changed: 596 additions & 45 deletions

File tree

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,18 +61,18 @@ public Statement getNextElement() throws SailException {
6161
}
6262

6363
int subjID = ByteArrayUtil.getInt(nextValue, TripleStore.SUBJ_IDX);
64-
Resource subj = (Resource) valueStore.getValue(subjID);
64+
Resource subj = valueStore.getResource(subjID);
6565

6666
int predID = ByteArrayUtil.getInt(nextValue, TripleStore.PRED_IDX);
67-
IRI pred = (IRI) valueStore.getValue(predID);
67+
IRI pred = valueStore.getIRI(predID);
6868

6969
int objID = ByteArrayUtil.getInt(nextValue, TripleStore.OBJ_IDX);
7070
Value obj = valueStore.getValue(objID);
7171

7272
Resource context = null;
7373
int contextID = ByteArrayUtil.getInt(nextValue, TripleStore.CONTEXT_IDX);
7474
if (contextID != 0) {
75-
context = (Resource) valueStore.getValue(contextID);
75+
context = valueStore.getResource(contextID);
7676
}
7777

7878
return valueStore.createStatement(subj, pred, obj, context);

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java

Lines changed: 100 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,17 @@
3333
import org.eclipse.rdf4j.model.vocabulary.XSD;
3434
import org.eclipse.rdf4j.sail.SailException;
3535
import org.eclipse.rdf4j.sail.nativerdf.datastore.DataStore;
36+
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRI;
37+
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRIOrBNode;
38+
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptLiteral;
3639
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptValue;
3740
import org.eclipse.rdf4j.sail.nativerdf.model.NativeBNode;
3841
import org.eclipse.rdf4j.sail.nativerdf.model.NativeIRI;
3942
import org.eclipse.rdf4j.sail.nativerdf.model.NativeLiteral;
4043
import org.eclipse.rdf4j.sail.nativerdf.model.NativeResource;
4144
import org.eclipse.rdf4j.sail.nativerdf.model.NativeValue;
45+
import org.slf4j.Logger;
46+
import org.slf4j.LoggerFactory;
4247

4348
/**
4449
* File-based indexed storage and retrieval of RDF values. ValueStore maps RDF values to integer IDs and vice-versa.
@@ -50,9 +55,7 @@
5055
@InternalUseOnly
5156
public class ValueStore extends SimpleValueFactory {
5257

53-
/*-----------*
54-
* Constants *
55-
*-----------*/
58+
private static final Logger logger = LoggerFactory.getLogger(ValueStore.class);
5659

5760
/**
5861
* The default value cache size.
@@ -127,7 +130,8 @@ public class ValueStore extends SimpleValueFactory {
127130
/**
128131
* Do not throw an exception in case a value cannot be loaded, e.g. due to a corrupt value store.
129132
*/
130-
private final boolean softFailOnCorruptData;
133+
public static boolean SOFT_FAIL_ON_CORRUPT_DATA = "true"
134+
.equalsIgnoreCase(System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData"));;
131135

132136
/*--------------*
133137
* Constructors *
@@ -153,14 +157,6 @@ public ValueStore(File dataDir, boolean forceSync, int valueCacheSize, int value
153157

154158
setNewRevision();
155159

156-
/*
157-
* Soft failure when a ValueStore is corrupt (i.e., one or more NativeValues cannot be read properly) can be
158-
* enabled using the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData (boolean). The
159-
* default behavior is that ValueStore will fail hard with a SailException, whereas softFaileOnCorruptData set
160-
* to true will make ValueStore return instances of CorruptValue if NativeValue cannot be read.
161-
*/
162-
this.softFailOnCorruptData = "true"
163-
.equalsIgnoreCase(System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData"));
164160
}
165161

166162
/*---------*
@@ -195,6 +191,7 @@ public Lock getReadLock() throws InterruptedException {
195191
* @throws IOException If an I/O error occurred.
196192
*/
197193
public NativeValue getValue(int id) throws IOException {
194+
198195
// Check value cache
199196
Integer cacheID = id;
200197
NativeValue resultValue = valueCache.get(cacheID);
@@ -206,12 +203,55 @@ public NativeValue getValue(int id) throws IOException {
206203
if (data != null) {
207204
resultValue = data2value(id, data);
208205

209-
// Store value in cache
210-
valueCache.put(cacheID, resultValue);
206+
if (!(resultValue instanceof CorruptValue)) {
207+
// Store value in cache
208+
valueCache.put(cacheID, resultValue);
209+
}
211210
}
212211
}
213212

214213
return resultValue;
214+
215+
}
216+
217+
/**
218+
* Gets the Resource for the specified ID.
219+
*
220+
* @param id A value ID.
221+
* @return The Resource for the ID, or <var>null</var> no such value could be found.
222+
* @throws IOException If an I/O error occurred.
223+
*/
224+
public <T extends NativeValue & Resource> T getResource(int id) throws IOException {
225+
226+
NativeValue resultValue = getValue(id);
227+
228+
if (!(resultValue instanceof Resource)) {
229+
if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) {
230+
return (T) new CorruptIRIOrBNode(revision, id, ((CorruptValue) resultValue).getData());
231+
}
232+
}
233+
234+
return (T) resultValue;
235+
}
236+
237+
/**
238+
* Gets the IRI for the specified ID.
239+
*
240+
* @param id A value ID.
241+
* @return The IRI for the ID, or <var>null</var> no such value could be found.
242+
* @throws IOException If an I/O error occurred.
243+
*/
244+
public <T extends NativeValue & IRI> T getIRI(int id) throws IOException {
245+
246+
NativeValue resultValue = getValue(id);
247+
248+
if (!(resultValue instanceof Resource)) {
249+
if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) {
250+
return (T) new CorruptIRIOrBNode(revision, id, ((CorruptValue) resultValue).getData());
251+
}
252+
}
253+
254+
return (T) resultValue;
215255
}
216256

217257
/**
@@ -542,7 +582,8 @@ private boolean isNamespaceData(byte[] data) {
542582

543583
private NativeValue data2value(int id, byte[] data) throws IOException {
544584
if (data.length == 0) {
545-
if (softFailOnCorruptData) {
585+
if (SOFT_FAIL_ON_CORRUPT_DATA) {
586+
logger.error("Soft fail on corrupt data: Empty data array for value with id {}", id);
546587
return new CorruptValue(revision, id, data);
547588
}
548589
throw new SailException("Empty data array for value with id " + id);
@@ -555,52 +596,69 @@ private NativeValue data2value(int id, byte[] data) throws IOException {
555596
case LITERAL_VALUE:
556597
return data2literal(id, data);
557598
default:
558-
if (softFailOnCorruptData) {
599+
if (SOFT_FAIL_ON_CORRUPT_DATA) {
600+
logger.error("Soft fail on corrupt data: Invalid type {} for value with id {}", data[0], id);
559601
return new CorruptValue(revision, id, data);
560602
}
561603
throw new SailException("Invalid type " + data[0] + " for value with id " + id);
562604
}
563605
}
564606

565-
private NativeIRI data2uri(int id, byte[] data) throws IOException {
566-
int nsID = ByteArrayUtil.getInt(data, 1);
567-
String namespace = getNamespace(nsID);
607+
private <T extends IRI & NativeValue> T data2uri(int id, byte[] data) throws IOException {
608+
try {
609+
int nsID = ByteArrayUtil.getInt(data, 1);
610+
String namespace = getNamespace(nsID);
611+
612+
String localName = new String(data, 5, data.length - 5, StandardCharsets.UTF_8);
568613

569-
String localName = new String(data, 5, data.length - 5, StandardCharsets.UTF_8);
614+
return (T) new NativeIRI(revision, namespace, localName, id);
615+
} catch (Throwable e) {
616+
if (SOFT_FAIL_ON_CORRUPT_DATA && (e instanceof Exception || e instanceof AssertionError)) {
617+
return (T) new CorruptIRI(revision, id, data);
618+
}
619+
throw e;
620+
}
570621

571-
return new NativeIRI(revision, namespace, localName, id);
572622
}
573623

574624
private NativeBNode data2bnode(int id, byte[] data) {
575625
String nodeID = new String(data, 1, data.length - 1, StandardCharsets.UTF_8);
576626
return new NativeBNode(revision, nodeID, id);
577627
}
578628

579-
private NativeLiteral data2literal(int id, byte[] data) throws IOException {
580-
// Get datatype
581-
int datatypeID = ByteArrayUtil.getInt(data, 1);
582-
IRI datatype = null;
583-
if (datatypeID != NativeValue.UNKNOWN_ID) {
584-
datatype = (IRI) getValue(datatypeID);
585-
}
629+
private <T extends NativeValue & Literal> T data2literal(int id, byte[] data) throws IOException {
630+
try {
631+
// Get datatype
632+
int datatypeID = ByteArrayUtil.getInt(data, 1);
633+
IRI datatype = null;
634+
if (datatypeID != NativeValue.UNKNOWN_ID) {
635+
datatype = (IRI) getValue(datatypeID);
636+
}
586637

587-
// Get language tag
588-
String lang = null;
589-
int langLength = data[5];
590-
if (langLength > 0) {
591-
lang = new String(data, 6, langLength, StandardCharsets.UTF_8);
592-
}
638+
// Get language tag
639+
String lang = null;
640+
int langLength = data[5];
641+
if (langLength > 0) {
642+
lang = new String(data, 6, langLength, StandardCharsets.UTF_8);
643+
}
593644

594-
// Get label
595-
String label = new String(data, 6 + langLength, data.length - 6 - langLength, StandardCharsets.UTF_8);
645+
// Get label
646+
String label = new String(data, 6 + langLength, data.length - 6 - langLength, StandardCharsets.UTF_8);
596647

597-
if (lang != null) {
598-
return new NativeLiteral(revision, label, lang, id);
599-
} else if (datatype != null) {
600-
return new NativeLiteral(revision, label, datatype, id);
601-
} else {
602-
return new NativeLiteral(revision, label, CoreDatatype.XSD.STRING, id);
648+
if (lang != null) {
649+
return (T) new NativeLiteral(revision, label, lang, id);
650+
} else if (datatype != null) {
651+
return (T) new NativeLiteral(revision, label, datatype, id);
652+
} else {
653+
return (T) new NativeLiteral(revision, label, CoreDatatype.XSD.STRING, id);
654+
}
655+
} catch (Throwable e) {
656+
if (SOFT_FAIL_ON_CORRUPT_DATA && (e instanceof Exception || e instanceof AssertionError)) {
657+
return (T) new CorruptLiteral(revision, id, data);
658+
}
659+
throw e;
603660
}
661+
604662
}
605663

606664
private String data2namespace(byte[] data) {
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2024 Eclipse RDF4J contributors.
3+
*
4+
* All rights reserved. This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Distribution License v1.0
6+
* which accompanies this distribution, and is available at
7+
* http://www.eclipse.org/org/documents/edl-v10.php.
8+
*
9+
* SPDX-License-Identifier: BSD-3-Clause
10+
******************************************************************************/
11+
12+
package org.eclipse.rdf4j.sail.nativerdf.model;
13+
14+
import org.eclipse.rdf4j.model.IRI;
15+
import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;
16+
17+
/**
18+
* CorruptIRI is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see
19+
* ValueStore#softFailOnCorruptData).
20+
*
21+
* @author Håvard M. Ottestad
22+
*/
23+
public class CorruptIRI extends CorruptValue implements IRI {
24+
25+
private static final long serialVersionUID = -6995615243794525852L;
26+
27+
public CorruptIRI(ValueStoreRevision revision, int internalID, byte[] data) {
28+
super(revision, internalID, data);
29+
}
30+
31+
public String stringValue() {
32+
return "CorruptIRI_with_ID_" + getInternalID();
33+
}
34+
35+
@Override
36+
public String getNamespace() {
37+
return "CORRUPT";
38+
}
39+
40+
@Override
41+
public String getLocalName() {
42+
return "CORRUPT";
43+
}
44+
45+
@Override
46+
public boolean equals(Object o) {
47+
if (this == o) {
48+
return true;
49+
}
50+
51+
if (o instanceof CorruptIRI && getInternalID() != NativeValue.UNKNOWN_ID) {
52+
CorruptIRI otherCorruptValue = (CorruptIRI) o;
53+
54+
if (otherCorruptValue.getInternalID() != NativeValue.UNKNOWN_ID
55+
&& getValueStoreRevision().equals(otherCorruptValue.getValueStoreRevision())) {
56+
// CorruptValue is from the same revision of the same native store with both IDs set
57+
return getInternalID() == otherCorruptValue.getInternalID();
58+
}
59+
}
60+
61+
return super.equals(o);
62+
}
63+
64+
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2024 Eclipse RDF4J contributors.
3+
*
4+
* All rights reserved. This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Distribution License v1.0
6+
* which accompanies this distribution, and is available at
7+
* http://www.eclipse.org/org/documents/edl-v10.php.
8+
*
9+
* SPDX-License-Identifier: BSD-3-Clause
10+
******************************************************************************/
11+
12+
package org.eclipse.rdf4j.sail.nativerdf.model;
13+
14+
import org.eclipse.rdf4j.model.BNode;
15+
import org.eclipse.rdf4j.model.IRI;
16+
import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;
17+
18+
/**
19+
* CorruptIRIOrBNode is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see
20+
* ValueStore#softFailOnCorruptData).
21+
*
22+
* @author Håvard M. Ottestad
23+
*/
24+
public class CorruptIRIOrBNode extends CorruptValue implements IRI, BNode {
25+
26+
private static final long serialVersionUID = 3709784393454516043L;
27+
28+
public CorruptIRIOrBNode(ValueStoreRevision revision, int internalID, byte[] data) {
29+
super(revision, internalID, data);
30+
}
31+
32+
public String stringValue() {
33+
return "CorruptIRI_with_ID_" + getInternalID();
34+
}
35+
36+
@Override
37+
public String getNamespace() {
38+
return "CORRUPT";
39+
}
40+
41+
@Override
42+
public String getLocalName() {
43+
return "CORRUPT";
44+
}
45+
46+
@Override
47+
public String getID() {
48+
return "";
49+
}
50+
51+
@Override
52+
public boolean equals(Object o) {
53+
if (this == o) {
54+
return true;
55+
}
56+
57+
if (o instanceof CorruptIRIOrBNode && getInternalID() != NativeValue.UNKNOWN_ID) {
58+
CorruptIRIOrBNode otherCorruptValue = (CorruptIRIOrBNode) o;
59+
60+
if (otherCorruptValue.getInternalID() != NativeValue.UNKNOWN_ID
61+
&& getValueStoreRevision().equals(otherCorruptValue.getValueStoreRevision())) {
62+
// CorruptValue is from the same revision of the same native store with both IDs set
63+
return getInternalID() == otherCorruptValue.getInternalID();
64+
}
65+
}
66+
67+
return super.equals(o);
68+
}
69+
70+
}

0 commit comments

Comments
 (0)