Skip to content

Commit 0c58aac

Browse files
committed
GH-5148 corrupt data can be written as NQuads
1 parent 196cf9d commit 0c58aac

6 files changed

Lines changed: 244 additions & 15 deletions

File tree

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/ValueStore.java

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRI;
3737
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRIOrBNode;
3838
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptLiteral;
39+
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptUnknownValue;
3940
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptValue;
4041
import org.eclipse.rdf4j.sail.nativerdf.model.NativeBNode;
4142
import org.eclipse.rdf4j.sail.nativerdf.model.NativeIRI;
@@ -225,10 +226,12 @@ public <T extends NativeValue & Resource> T getResource(int id) throws IOExcepti
225226

226227
NativeValue resultValue = getValue(id);
227228

228-
if (!(resultValue instanceof Resource)) {
229+
if (resultValue != null && !(resultValue instanceof Resource)) {
229230
if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) {
230231
return (T) new CorruptIRIOrBNode(revision, id, ((CorruptValue) resultValue).getData());
231232
}
233+
logger.warn(
234+
"Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true");
232235
}
233236

234237
return (T) resultValue;
@@ -245,10 +248,15 @@ public <T extends NativeValue & IRI> T getIRI(int id) throws IOException {
245248

246249
NativeValue resultValue = getValue(id);
247250

248-
if (!(resultValue instanceof Resource)) {
251+
if (resultValue != null && !(resultValue instanceof IRI)) {
249252
if (SOFT_FAIL_ON_CORRUPT_DATA && resultValue instanceof CorruptValue) {
250-
return (T) new CorruptIRIOrBNode(revision, id, ((CorruptValue) resultValue).getData());
253+
if (resultValue instanceof CorruptIRI) {
254+
return (T) resultValue;
255+
}
256+
return (T) new CorruptIRI(revision, id, null, ((CorruptValue) resultValue).getData());
251257
}
258+
logger.warn(
259+
"Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true");
252260
}
253261

254262
return (T) resultValue;
@@ -584,9 +592,10 @@ private NativeValue data2value(int id, byte[] data) throws IOException {
584592
if (data.length == 0) {
585593
if (SOFT_FAIL_ON_CORRUPT_DATA) {
586594
logger.error("Soft fail on corrupt data: Empty data array for value with id {}", id);
587-
return new CorruptValue(revision, id, data);
595+
return new CorruptUnknownValue(revision, id, data);
588596
}
589-
throw new SailException("Empty data array for value with id " + id);
597+
throw new SailException("Empty data array for value with id " + id
598+
+ " consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true");
590599
}
591600
switch (data[0]) {
592601
case URI_VALUE:
@@ -598,24 +607,29 @@ private NativeValue data2value(int id, byte[] data) throws IOException {
598607
default:
599608
if (SOFT_FAIL_ON_CORRUPT_DATA) {
600609
logger.error("Soft fail on corrupt data: Invalid type {} for value with id {}", data[0], id);
601-
return new CorruptValue(revision, id, data);
610+
return new CorruptUnknownValue(revision, id, data);
602611
}
603-
throw new SailException("Invalid type " + data[0] + " for value with id " + id);
612+
throw new SailException("Invalid type " + data[0] + " for value with id " + id
613+
+ " consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true");
604614
}
605615
}
606616

607617
private <T extends IRI & NativeValue> T data2uri(int id, byte[] data) throws IOException {
618+
String namespace = null;
619+
608620
try {
609621
int nsID = ByteArrayUtil.getInt(data, 1);
610-
String namespace = getNamespace(nsID);
622+
namespace = getNamespace(nsID);
611623

612624
String localName = new String(data, 5, data.length - 5, StandardCharsets.UTF_8);
613625

614626
return (T) new NativeIRI(revision, namespace, localName, id);
615627
} catch (Throwable e) {
616628
if (SOFT_FAIL_ON_CORRUPT_DATA && (e instanceof Exception || e instanceof AssertionError)) {
617-
return (T) new CorruptIRI(revision, id, data);
629+
return (T) new CorruptIRI(revision, id, namespace, data);
618630
}
631+
logger.error(
632+
"Possible corrupt data consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptData to true");
619633
throw e;
620634
}
621635

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRI.java

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,14 @@
1111

1212
package org.eclipse.rdf4j.sail.nativerdf.model;
1313

14+
import java.nio.charset.StandardCharsets;
15+
16+
import org.apache.commons.codec.binary.Hex;
1417
import org.eclipse.rdf4j.model.IRI;
1518
import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;
1619

20+
import com.google.common.net.UrlEscapers;
21+
1722
/**
1823
* CorruptIRI is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see
1924
* ValueStore#softFailOnCorruptData).
@@ -23,22 +28,48 @@
2328
public class CorruptIRI extends CorruptValue implements IRI {
2429

2530
private static final long serialVersionUID = -6995615243794525852L;
31+
private final String namespace;
2632

27-
public CorruptIRI(ValueStoreRevision revision, int internalID, byte[] data) {
33+
public CorruptIRI(ValueStoreRevision revision, int internalID, String namespace, byte[] data) {
2834
super(revision, internalID, data);
35+
this.namespace = namespace;
36+
}
37+
38+
@Override
39+
public String toString() {
40+
return stringValue();
2941
}
3042

3143
public String stringValue() {
44+
try {
45+
return getNamespace() + ":" + getLocalName();
46+
} catch (Throwable ignored) {
47+
}
48+
3249
return "CorruptIRI_with_ID_" + getInternalID();
3350
}
3451

3552
@Override
3653
public String getNamespace() {
37-
return "CORRUPT";
54+
if (namespace != null && !namespace.isEmpty()) {
55+
return namespace;
56+
}
57+
return "urn:CorruptIRI:";
3858
}
3959

4060
@Override
4161
public String getLocalName() {
62+
byte[] data = getData();
63+
if (data != null && data.length < 1024) {
64+
try {
65+
String localName = new String(data, 5, data.length - 5, StandardCharsets.UTF_8);
66+
return "CORRUPT_" + UrlEscapers.urlPathSegmentEscaper().escape(localName);
67+
} catch (Throwable ignored) {
68+
}
69+
70+
return "CORRUPT_" + Hex.encodeHexString(data);
71+
}
72+
4273
return "CORRUPT";
4374
}
4475

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptIRIOrBNode.java

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,15 @@
1111

1212
package org.eclipse.rdf4j.sail.nativerdf.model;
1313

14+
import java.nio.charset.StandardCharsets;
15+
16+
import org.apache.commons.codec.binary.Hex;
1417
import org.eclipse.rdf4j.model.BNode;
1518
import org.eclipse.rdf4j.model.IRI;
1619
import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;
1720

21+
import com.google.common.net.UrlEscapers;
22+
1823
/**
1924
* CorruptIRIOrBNode is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see
2025
* ValueStore#softFailOnCorruptData).
@@ -29,17 +34,38 @@ public CorruptIRIOrBNode(ValueStoreRevision revision, int internalID, byte[] dat
2934
super(revision, internalID, data);
3035
}
3136

37+
@Override
38+
public String toString() {
39+
return stringValue();
40+
}
41+
3242
public String stringValue() {
33-
return "CorruptIRI_with_ID_" + getInternalID();
43+
try {
44+
return getNamespace() + ":" + getLocalName();
45+
} catch (Throwable ignored) {
46+
}
47+
48+
return "CorruptIRIOrBNode_with_ID_" + getInternalID();
3449
}
3550

3651
@Override
3752
public String getNamespace() {
38-
return "CORRUPT";
53+
return "urn:CorruptIRIOrBNode:";
3954
}
4055

4156
@Override
4257
public String getLocalName() {
58+
byte[] data = getData();
59+
if (data != null && data.length < 1024) {
60+
try {
61+
String localName = new String(data, 5, data.length - 5, StandardCharsets.UTF_8);
62+
return "CORRUPT_" + UrlEscapers.urlPathSegmentEscaper().escape(localName);
63+
} catch (Throwable ignored) {
64+
}
65+
66+
return "CORRUPT_" + Hex.encodeHexString(data);
67+
}
68+
4369
return "CORRUPT";
4470
}
4571

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/model/CorruptLiteral.java

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,15 @@
1313

1414
import java.math.BigDecimal;
1515
import java.math.BigInteger;
16+
import java.nio.charset.StandardCharsets;
1617
import java.util.Optional;
1718

1819
import javax.xml.datatype.XMLGregorianCalendar;
1920

2021
import org.eclipse.rdf4j.model.IRI;
2122
import org.eclipse.rdf4j.model.Literal;
2223
import org.eclipse.rdf4j.model.base.CoreDatatype;
24+
import org.eclipse.rdf4j.model.util.Values;
2325
import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;
2426

2527
/**
@@ -32,6 +34,8 @@ public class CorruptLiteral extends CorruptValue implements Literal {
3234

3335
private static final long serialVersionUID = -2510885288827542623L;
3436

37+
private static final IRI CORRUPT = Values.iri("urn:corrupt");
38+
3539
public CorruptLiteral(ValueStoreRevision revision, int internalID, byte[] data) {
3640
super(revision, internalID, data);
3741
}
@@ -42,7 +46,15 @@ public String stringValue() {
4246

4347
@Override
4448
public String getLabel() {
45-
return "";
49+
byte[] data = getData();
50+
try {
51+
if (data != null && data.length < 1024) {
52+
return "CorruptUnknownValue with ID " + getInternalID() + " with possible data: "
53+
+ new String(data, StandardCharsets.UTF_8);
54+
}
55+
} catch (Throwable ignored) {
56+
}
57+
return "CorruptUnknownValue_with_ID_" + getInternalID();
4658
}
4759

4860
@Override
@@ -52,7 +64,7 @@ public Optional<String> getLanguage() {
5264

5365
@Override
5466
public IRI getDatatype() {
55-
return null;
67+
return CORRUPT;
5668
}
5769

5870
@Override
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2024 Eclipse RDF4J contributors.
3+
*
4+
* All rights reserved. This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Distribution License v1.0
6+
* which accompanies this distribution, and is available at
7+
* http://www.eclipse.org/org/documents/edl-v10.php.
8+
*
9+
* SPDX-License-Identifier: BSD-3-Clause
10+
******************************************************************************/
11+
12+
package org.eclipse.rdf4j.sail.nativerdf.model;
13+
14+
import java.math.BigDecimal;
15+
import java.math.BigInteger;
16+
import java.nio.charset.StandardCharsets;
17+
import java.util.Optional;
18+
19+
import javax.xml.datatype.XMLGregorianCalendar;
20+
21+
import org.eclipse.rdf4j.model.IRI;
22+
import org.eclipse.rdf4j.model.Literal;
23+
import org.eclipse.rdf4j.model.base.CoreDatatype;
24+
import org.eclipse.rdf4j.model.vocabulary.XSD;
25+
import org.eclipse.rdf4j.sail.nativerdf.ValueStoreRevision;
26+
27+
/**
28+
* CorruptUnknownValue is used when a NativeValue cannot be read from the ValueStore and if soft failure is enabled (see
29+
* ValueStore#softFailOnCorruptData). Since a type is needed
30+
*
31+
* @author Håvard M. Ottestad
32+
*/
33+
public class CorruptUnknownValue extends CorruptValue implements Literal {
34+
35+
private static final long serialVersionUID = -6650510290226676279L;
36+
37+
public CorruptUnknownValue(ValueStoreRevision revision, int internalID, byte[] data) {
38+
super(revision, internalID, data);
39+
}
40+
41+
@Override
42+
public String getLabel() {
43+
byte[] data = getData();
44+
try {
45+
if (data != null && data.length < 1024) {
46+
return "CorruptUnknownValue with ID " + getInternalID() + " with possible data: "
47+
+ new String(data, StandardCharsets.UTF_8);
48+
}
49+
} catch (Throwable ignored) {
50+
}
51+
return "CorruptUnknownValue_with_ID_" + getInternalID();
52+
}
53+
54+
@Override
55+
public Optional<String> getLanguage() {
56+
return Optional.empty();
57+
}
58+
59+
@Override
60+
public IRI getDatatype() {
61+
return XSD.STRING;
62+
}
63+
64+
@Override
65+
public boolean booleanValue() {
66+
return false;
67+
}
68+
69+
@Override
70+
public byte byteValue() {
71+
return 0;
72+
}
73+
74+
@Override
75+
public short shortValue() {
76+
return 0;
77+
}
78+
79+
@Override
80+
public int intValue() {
81+
return 0;
82+
}
83+
84+
@Override
85+
public long longValue() {
86+
return 0;
87+
}
88+
89+
@Override
90+
public BigInteger integerValue() {
91+
return null;
92+
}
93+
94+
@Override
95+
public BigDecimal decimalValue() {
96+
return null;
97+
}
98+
99+
@Override
100+
public float floatValue() {
101+
return 0;
102+
}
103+
104+
@Override
105+
public double doubleValue() {
106+
return 0;
107+
}
108+
109+
@Override
110+
public XMLGregorianCalendar calendarValue() {
111+
return null;
112+
}
113+
114+
@Override
115+
public CoreDatatype getCoreDatatype() {
116+
return null;
117+
}
118+
119+
@Override
120+
public boolean equals(Object o) {
121+
if (this == o) {
122+
return true;
123+
}
124+
125+
if (o instanceof CorruptUnknownValue && getInternalID() != NativeValue.UNKNOWN_ID) {
126+
CorruptUnknownValue otherCorruptValue = (CorruptUnknownValue) o;
127+
128+
if (otherCorruptValue.getInternalID() != NativeValue.UNKNOWN_ID
129+
&& getValueStoreRevision().equals(otherCorruptValue.getValueStoreRevision())) {
130+
// CorruptValue is from the same revision of the same native store with both IDs set
131+
return getInternalID() == otherCorruptValue.getInternalID();
132+
}
133+
}
134+
135+
return super.equals(o);
136+
}
137+
138+
}

0 commit comments

Comments
 (0)