Skip to content

Commit 01c6c32

Browse files
committed
issue #745 : Clear parser state before/after parsing
Model for use of RDFParser objects allows for reuse on different InputStream/Reader instances, so formally support this by resetting parser instances and testing for the expected differences in terms of blank node identifier differences Signed-off-by: Peter Ansell <p_ansell@yahoo.com>
1 parent 8fea325 commit 01c6c32

12 files changed

Lines changed: 169 additions & 106 deletions

File tree

core/rio/api/src/main/java/org/eclipse/rdf4j/rio/helpers/AbstractRDFParser.java

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
package org.eclipse.rdf4j.rio.helpers;
99

1010
import java.io.UnsupportedEncodingException;
11+
import java.nio.charset.StandardCharsets;
1112
import java.security.MessageDigest;
1213
import java.security.NoSuchAlgorithmException;
1314
import java.util.Collection;
@@ -360,10 +361,15 @@ protected void initializeNamespaceTableFromConfiguration() {
360361
* the document has been parsed completely, but subclasses can clear the map at other moments too, for
361362
* example when a bnode scope ends.
362363
*
363-
* @deprecated Map is no longer used.
364+
* @deprecated Map is no longer used, call {@link #clear()} instead.
364365
*/
365366
@Deprecated
366367
protected void clearBNodeIDMap() {
368+
baseURI = null;
369+
nextBNodePrefix = createUniqueBNodePrefix();
370+
namespaceTable.clear();
371+
372+
initializeNamespaceTableFromConfiguration();
367373
}
368374

369375
/**
@@ -431,8 +437,7 @@ protected BNode createBNode(String nodeID)
431437
throws RDFParseException
432438
{
433439
// If we are preserving blank node ids then we do not prefix them to
434-
// make
435-
// them globally unique
440+
// make them globally unique
436441
if (preserveBNodeIDs()) {
437442
return valueFactory.createBNode(nodeID);
438443
}
@@ -446,13 +451,7 @@ protected BNode createBNode(String nodeID)
446451
if (nodeID.length() > 32) {
447452
// we only hash the node ID if it is longer than the hash string
448453
// itself would be.
449-
byte[] chars = null;
450-
try {
451-
chars = nodeID.getBytes("UTF-8");
452-
}
453-
catch (UnsupportedEncodingException e) {
454-
throw new RuntimeException(e);
455-
}
454+
byte[] chars = nodeID.getBytes(StandardCharsets.UTF_8);
456455

457456
// we use an MD5 hash rather than the node ID itself to get a
458457
// fixed-length generated id, rather than

core/rio/binary/src/main/java/org/eclipse/rdf4j/rio/binary/BinaryRDFParser.java

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -65,30 +65,32 @@ public void parse(Reader reader, String baseURI)
6565
public void parse(InputStream in, String baseURI)
6666
throws IOException, RDFParseException, RDFHandlerException
6767
{
68-
if (in == null) {
69-
throw new IllegalArgumentException("Input stream must not be null");
70-
}
68+
clear();
69+
70+
try {
71+
if (in == null) {
72+
throw new IllegalArgumentException("Input stream must not be null");
73+
}
7174

72-
this.in = new DataInputStream(new BufferedInputStream(in));
75+
this.in = new DataInputStream(new BufferedInputStream(in));
7376

74-
// Check magic number
75-
byte[] magicNumber = IOUtil.readBytes(in, MAGIC_NUMBER.length);
76-
if (!Arrays.equals(magicNumber, MAGIC_NUMBER)) {
77-
reportFatalError("File does not contain a binary RDF document");
78-
}
77+
// Check magic number
78+
byte[] magicNumber = IOUtil.readBytes(in, MAGIC_NUMBER.length);
79+
if (!Arrays.equals(magicNumber, MAGIC_NUMBER)) {
80+
reportFatalError("File does not contain a binary RDF document");
81+
}
7982

80-
// Check format version (parser is backward-compatible with version 1 and
81-
// version 2)
82-
int formatVersion = this.in.readInt();
83-
if (formatVersion != FORMAT_VERSION) {
84-
reportFatalError("Incompatible format version: " + formatVersion);
85-
}
83+
// Check format version (parser is backward-compatible with version 1 and
84+
// version 2)
85+
int formatVersion = this.in.readInt();
86+
if (formatVersion != FORMAT_VERSION) {
87+
reportFatalError("Incompatible format version: " + formatVersion);
88+
}
8689

87-
if (rdfHandler != null) {
88-
rdfHandler.startRDF();
89-
}
90+
if (rdfHandler != null) {
91+
rdfHandler.startRDF();
92+
}
9093

91-
try {
9294
loop: while (true) {
9395
int recordType = this.in.readByte();
9496

core/rio/jsonld/src/main/java/org/eclipse/rdf4j/rio/jsonld/JSONLDInternalTripleCallback.java

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99

1010
import java.util.List;
1111
import java.util.Map.Entry;
12+
import java.util.function.Function;
13+
import java.util.function.Supplier;
1214

15+
import org.eclipse.rdf4j.model.BNode;
1316
import org.eclipse.rdf4j.model.IRI;
1417
import org.eclipse.rdf4j.model.Resource;
1518
import org.eclipse.rdf4j.model.Statement;
@@ -44,6 +47,10 @@ class JSONLDInternalTripleCallback implements JsonLdTripleCallback {
4447

4548
private final ParseErrorListener parseErrorListener;
4649

50+
private final Function<String, BNode> namedBNodeCreator;
51+
52+
private final Supplier<BNode> anonymousBNodeCreator;
53+
4754
public JSONLDInternalTripleCallback() {
4855
this(new StatementCollector(new LinkedHashModel()));
4956
}
@@ -53,16 +60,20 @@ public JSONLDInternalTripleCallback(RDFHandler nextHandler) {
5360
}
5461

5562
public JSONLDInternalTripleCallback(RDFHandler nextHandler, ValueFactory vf) {
56-
this(nextHandler, vf, new ParserConfig(), new ParseErrorLogger());
63+
this(nextHandler, vf, new ParserConfig(), new ParseErrorLogger(), nodeID -> vf.createBNode(nodeID),
64+
() -> vf.createBNode());
5765
}
5866

5967
public JSONLDInternalTripleCallback(RDFHandler nextHandler, ValueFactory vf, ParserConfig parserConfig,
60-
ParseErrorListener parseErrorListener)
68+
ParseErrorListener parseErrorListener, Function<String, BNode> namedBNodeCreator,
69+
Supplier<BNode> anonymousBNodeCreator)
6170
{
6271
this.handler = nextHandler;
6372
this.vf = vf;
6473
this.parserConfig = parserConfig;
6574
this.parseErrorListener = parseErrorListener;
75+
this.namedBNodeCreator = namedBNodeCreator;
76+
this.anonymousBNodeCreator = anonymousBNodeCreator;
6677
}
6778

6879
private void triple(String s, String p, String o, String graph) {
@@ -96,10 +107,10 @@ private void triple(String s, String p, String o, String graph) {
96107
private Resource createResource(String resource) {
97108
// Blank node without any given identifier
98109
if (resource.equals("_:")) {
99-
return vf.createBNode();
110+
return anonymousBNodeCreator.get();
100111
}
101112
else if (resource.startsWith("_:")) {
102-
return vf.createBNode(resource.substring(2));
113+
return namedBNodeCreator.apply(resource.substring(2));
103114
}
104115
else {
105116
return vf.createIRI(resource);

core/rio/jsonld/src/main/java/org/eclipse/rdf4j/rio/jsonld/JSONLDParser.java

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,16 @@ public RDFFormat getRDFFormat() {
5858
public void parse(final InputStream in, final String baseURI)
5959
throws IOException, RDFParseException, RDFHandlerException
6060
{
61-
final JSONLDInternalTripleCallback callback = new JSONLDInternalTripleCallback(getRDFHandler(),
62-
valueFactory, getParserConfig(), getParseErrorListener());
63-
64-
final JsonLdOptions options = new JsonLdOptions(baseURI);
65-
options.useNamespaces = true;
61+
clear();
6662

6763
try {
64+
final JSONLDInternalTripleCallback callback = new JSONLDInternalTripleCallback(getRDFHandler(),
65+
valueFactory, getParserConfig(), getParseErrorListener(), nodeID -> createBNode(nodeID),
66+
() -> createBNode());
67+
68+
final JsonLdOptions options = new JsonLdOptions(baseURI);
69+
options.useNamespaces = true;
70+
6871
JsonLdProcessor.toRDF(JsonUtils.fromInputStream(in), callback, options);
6972
}
7073
catch (final JsonLdError e) {
@@ -79,19 +82,25 @@ public void parse(final InputStream in, final String baseURI)
7982
}
8083
throw e;
8184
}
85+
finally {
86+
clear();
87+
}
8288
}
8389

8490
@Override
8591
public void parse(final Reader reader, final String baseURI)
8692
throws IOException, RDFParseException, RDFHandlerException
8793
{
88-
final JSONLDInternalTripleCallback callback = new JSONLDInternalTripleCallback(getRDFHandler(),
89-
valueFactory, getParserConfig(), getParseErrorListener());
90-
91-
final JsonLdOptions options = new JsonLdOptions(baseURI);
92-
options.useNamespaces = true;
94+
clear();
9395

9496
try {
97+
final JSONLDInternalTripleCallback callback = new JSONLDInternalTripleCallback(getRDFHandler(),
98+
valueFactory, getParserConfig(), getParseErrorListener(), nodeID -> createBNode(nodeID),
99+
() -> createBNode());
100+
101+
final JsonLdOptions options = new JsonLdOptions(baseURI);
102+
options.useNamespaces = true;
103+
95104
JsonLdProcessor.toRDF(JsonUtils.fromReader(reader), callback, options);
96105
}
97106
catch (final JsonLdError e) {
@@ -106,6 +115,9 @@ public void parse(final Reader reader, final String baseURI)
106115
}
107116
throw e;
108117
}
118+
finally {
119+
clear();
120+
}
109121
}
110122

111123
}

core/rio/jsonld/src/test/java/org/eclipse/rdf4j/rio/jsonld/JSONLDInternalTripleCallbackTest.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import java.io.IOException;
1313
import java.util.Iterator;
1414

15-
import org.eclipse.rdf4j.model.Graph;
15+
import org.eclipse.rdf4j.model.Model;
1616
import org.eclipse.rdf4j.model.Statement;
1717
import org.eclipse.rdf4j.model.impl.LinkedHashModel;
1818
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
@@ -40,12 +40,13 @@ public void triplesTest()
4040
final String expectedString = "(http://nonexistent.com/abox#Document1823812, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://nonexistent.com/tbox#Document) [null]";
4141
final Object input = JsonUtils.fromString(inputstring);
4242

43-
final Graph graph = new LinkedHashModel();
43+
final Model graph = new LinkedHashModel();
4444
final ParseErrorCollector parseErrorListener = new ParseErrorCollector();
4545
final ParserConfig parserConfig = new ParserConfig();
4646
final JSONLDInternalTripleCallback callback = new JSONLDInternalTripleCallback(
4747
new StatementCollector(graph), SimpleValueFactory.getInstance(), parserConfig,
48-
parseErrorListener);
48+
parseErrorListener, nodeID -> SimpleValueFactory.getInstance().createBNode(nodeID),
49+
() -> SimpleValueFactory.getInstance().createBNode());
4950

5051
JsonLdProcessor.toRDF(input, callback);
5152

core/rio/nquads/src/main/java/org/eclipse/rdf4j/rio/nquads/NQuadsParser.java

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -62,23 +62,25 @@ public synchronized void parse(final InputStream inputStream, final String baseU
6262
public synchronized void parse(final Reader reader, final String baseURI)
6363
throws IOException, RDFParseException, RDFHandlerException
6464
{
65-
if (reader == null) {
66-
throw new IllegalArgumentException("Reader can not be 'null'");
67-
}
68-
if (baseURI == null) {
69-
throw new IllegalArgumentException("base URI can not be 'null'");
70-
}
65+
clear();
66+
67+
try {
68+
if (reader == null) {
69+
throw new IllegalArgumentException("Reader can not be 'null'");
70+
}
71+
if (baseURI == null) {
72+
throw new IllegalArgumentException("base URI can not be 'null'");
73+
}
7174

72-
if (rdfHandler != null) {
73-
rdfHandler.startRDF();
74-
}
75+
if (rdfHandler != null) {
76+
rdfHandler.startRDF();
77+
}
7578

76-
this.reader = reader;
77-
lineNo = 1;
79+
this.reader = reader;
80+
lineNo = 1;
7881

79-
reportLocation(lineNo, 1);
82+
reportLocation(lineNo, 1);
8083

81-
try {
8284
int c = readCodePoint();
8385
c = skipWhitespace(c);
8486

core/rio/ntriples/src/main/java/org/eclipse/rdf4j/rio/ntriples/NTriplesParser.java

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -141,23 +141,25 @@ public synchronized void parse(InputStream in, String baseURI)
141141
public synchronized void parse(Reader reader, String baseURI)
142142
throws IOException, RDFParseException, RDFHandlerException
143143
{
144-
if (reader == null) {
145-
throw new IllegalArgumentException("Reader can not be 'null'");
146-
}
147-
if (baseURI == null) {
148-
throw new IllegalArgumentException("base URI can not be 'null'");
149-
}
144+
clear();
145+
146+
try {
147+
if (reader == null) {
148+
throw new IllegalArgumentException("Reader can not be 'null'");
149+
}
150+
if (baseURI == null) {
151+
throw new IllegalArgumentException("base URI can not be 'null'");
152+
}
150153

151-
if (rdfHandler != null) {
152-
rdfHandler.startRDF();
153-
}
154+
if (rdfHandler != null) {
155+
rdfHandler.startRDF();
156+
}
154157

155-
this.reader = reader;
156-
lineNo = 1;
158+
this.reader = reader;
159+
lineNo = 1;
157160

158-
reportLocation(lineNo, 1);
161+
reportLocation(lineNo, 1);
159162

160-
try {
161163
int c = readCodePoint();
162164
c = skipWhitespace(c);
163165

core/rio/rdfjson/src/main/java/org/eclipse/rdf4j/rio/rdfjson/RDFJSONParser.java

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,15 @@ public RDFFormat getRDFFormat() {
6969
public void parse(final InputStream inputStream, final String baseUri)
7070
throws IOException, RDFParseException, RDFHandlerException
7171
{
72-
if (this.rdfHandler != null) {
73-
this.rdfHandler.startRDF();
74-
}
75-
7672
JsonParser jp = null;
7773

74+
clear();
75+
7876
try {
77+
if (this.rdfHandler != null) {
78+
this.rdfHandler.startRDF();
79+
}
80+
7981
jp = RDFJSONUtility.JSON_FACTORY.createParser(new BOMInputStream(inputStream, false));
8082
rdfJsonToHandlerInternal(this.rdfHandler, this.valueFactory, jp);
8183
}
@@ -198,7 +200,7 @@ private void rdfJsonToHandlerInternal(final RDFHandler handler, final ValueFacto
198200
final String subjStr = jp.getCurrentName();
199201
Resource subject = null;
200202

201-
subject = subjStr.startsWith("_:") ? vf.createBNode(subjStr.substring(2)) : vf.createIRI(subjStr);
203+
subject = subjStr.startsWith("_:") ? createBNode(subjStr.substring(2)) : vf.createIRI(subjStr);
202204
if (jp.nextToken() != JsonToken.START_OBJECT) {
203205
reportFatalError("Expected subject value to start with an Object", jp.getCurrentLocation());
204206
}
@@ -336,7 +338,7 @@ else if (RDFJSONUtility.BNODE.equals(nextType)) {
336338
reportFatalError("Datatype was attached to a blank node object: subject="
337339
+ subjStr + " predicate=" + predStr, jp.getCurrentLocation());
338340
}
339-
object = vf.createBNode(nextValue.substring(2));
341+
object = createBNode(nextValue.substring(2));
340342
}
341343
else if (RDFJSONUtility.URI.equals(nextType)) {
342344
if (nextLanguage != null) {
@@ -359,7 +361,7 @@ else if (RDFJSONUtility.URI.equals(nextType)) {
359361
context = null;
360362
}
361363
else if(nextContext.startsWith("_:")) {
362-
context = vf.createBNode(nextContext.substring(2));
364+
context = createBNode(nextContext.substring(2));
363365
}
364366
else {
365367
context = vf.createIRI(nextContext);

core/rio/rdfxml/src/main/java/org/eclipse/rdf4j/rio/rdfxml/RDFXMLParser.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,8 @@ public synchronized void parse(Reader reader, String baseURI)
243243
private void parse(InputSource inputSource)
244244
throws IOException, RDFParseException, RDFHandlerException
245245
{
246+
clear();
247+
246248
try {
247249
documentURI = inputSource.getSystemId();
248250

core/rio/trix/src/main/java/org/eclipse/rdf4j/rio/trix/TriXParser.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -163,11 +163,13 @@ public void parse(Reader reader, String baseURI)
163163
private void parse(InputSource inputStreamOrReader)
164164
throws IOException, RDFParseException, RDFHandlerException
165165
{
166-
if (rdfHandler != null) {
167-
rdfHandler.startRDF();
168-
}
169-
166+
clear();
167+
170168
try {
169+
if (rdfHandler != null) {
170+
rdfHandler.startRDF();
171+
}
172+
171173
XMLReader xmlReader;
172174

173175
if (getParserConfig().isSet(XMLParserSettings.CUSTOM_XML_READER)) {

0 commit comments

Comments
 (0)