Skip to content

Commit aecf8d6

Browse files
author
Jeen Broekstra
authored
Merge pull request #817 from ansell/issues/#745-parser-resets
issue #745 : Clear parser state before/after parsing
2 parents 8fea325 + e1427f2 commit aecf8d6

12 files changed

Lines changed: 175 additions & 112 deletions

File tree

core/rio/api/src/main/java/org/eclipse/rdf4j/rio/helpers/AbstractRDFParser.java

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
package org.eclipse.rdf4j.rio.helpers;
99

1010
import java.io.UnsupportedEncodingException;
11+
import java.nio.charset.StandardCharsets;
1112
import java.security.MessageDigest;
1213
import java.security.NoSuchAlgorithmException;
1314
import java.util.Collection;
@@ -360,10 +361,11 @@ protected void initializeNamespaceTableFromConfiguration() {
360361
* the document has been parsed completely, but subclasses can clear the map at other moments too, for
361362
* example when a bnode scope ends.
362363
*
363-
* @deprecated Map is no longer used.
364+
* @deprecated Map is no longer used, call {@link #clear()} instead.
364365
*/
365366
@Deprecated
366367
protected void clearBNodeIDMap() {
368+
clear();
367369
}
368370

369371
/**
@@ -431,8 +433,7 @@ protected BNode createBNode(String nodeID)
431433
throws RDFParseException
432434
{
433435
// If we are preserving blank node ids then we do not prefix them to
434-
// make
435-
// them globally unique
436+
// make them globally unique
436437
if (preserveBNodeIDs()) {
437438
return valueFactory.createBNode(nodeID);
438439
}
@@ -446,13 +447,7 @@ protected BNode createBNode(String nodeID)
446447
if (nodeID.length() > 32) {
447448
// we only hash the node ID if it is longer than the hash string
448449
// itself would be.
449-
byte[] chars = null;
450-
try {
451-
chars = nodeID.getBytes("UTF-8");
452-
}
453-
catch (UnsupportedEncodingException e) {
454-
throw new RuntimeException(e);
455-
}
450+
byte[] chars = nodeID.getBytes(StandardCharsets.UTF_8);
456451

457452
// we use an MD5 hash rather than the node ID itself to get a
458453
// fixed-length generated id, rather than

core/rio/binary/src/main/java/org/eclipse/rdf4j/rio/binary/BinaryRDFParser.java

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -65,30 +65,32 @@ public void parse(Reader reader, String baseURI)
6565
public void parse(InputStream in, String baseURI)
6666
throws IOException, RDFParseException, RDFHandlerException
6767
{
68-
if (in == null) {
69-
throw new IllegalArgumentException("Input stream must not be null");
70-
}
68+
clear();
69+
70+
try {
71+
if (in == null) {
72+
throw new IllegalArgumentException("Input stream must not be null");
73+
}
7174

72-
this.in = new DataInputStream(new BufferedInputStream(in));
75+
this.in = new DataInputStream(new BufferedInputStream(in));
7376

74-
// Check magic number
75-
byte[] magicNumber = IOUtil.readBytes(in, MAGIC_NUMBER.length);
76-
if (!Arrays.equals(magicNumber, MAGIC_NUMBER)) {
77-
reportFatalError("File does not contain a binary RDF document");
78-
}
77+
// Check magic number
78+
byte[] magicNumber = IOUtil.readBytes(in, MAGIC_NUMBER.length);
79+
if (!Arrays.equals(magicNumber, MAGIC_NUMBER)) {
80+
reportFatalError("File does not contain a binary RDF document");
81+
}
7982

80-
// Check format version (parser is backward-compatible with version 1 and
81-
// version 2)
82-
int formatVersion = this.in.readInt();
83-
if (formatVersion != FORMAT_VERSION) {
84-
reportFatalError("Incompatible format version: " + formatVersion);
85-
}
83+
// Check format version (parser is backward-compatible with version 1 and
84+
// version 2)
85+
int formatVersion = this.in.readInt();
86+
if (formatVersion != FORMAT_VERSION) {
87+
reportFatalError("Incompatible format version: " + formatVersion);
88+
}
8689

87-
if (rdfHandler != null) {
88-
rdfHandler.startRDF();
89-
}
90+
if (rdfHandler != null) {
91+
rdfHandler.startRDF();
92+
}
9093

91-
try {
9294
loop: while (true) {
9395
int recordType = this.in.readByte();
9496

core/rio/jsonld/src/main/java/org/eclipse/rdf4j/rio/jsonld/JSONLDInternalTripleCallback.java

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99

1010
import java.util.List;
1111
import java.util.Map.Entry;
12+
import java.util.function.Function;
13+
import java.util.function.Supplier;
1214

15+
import org.eclipse.rdf4j.model.BNode;
1316
import org.eclipse.rdf4j.model.IRI;
1417
import org.eclipse.rdf4j.model.Resource;
1518
import org.eclipse.rdf4j.model.Statement;
@@ -44,6 +47,10 @@ class JSONLDInternalTripleCallback implements JsonLdTripleCallback {
4447

4548
private final ParseErrorListener parseErrorListener;
4649

50+
private final Function<String, BNode> namedBNodeCreator;
51+
52+
private final Supplier<BNode> anonymousBNodeCreator;
53+
4754
public JSONLDInternalTripleCallback() {
4855
this(new StatementCollector(new LinkedHashModel()));
4956
}
@@ -53,16 +60,20 @@ public JSONLDInternalTripleCallback(RDFHandler nextHandler) {
5360
}
5461

5562
public JSONLDInternalTripleCallback(RDFHandler nextHandler, ValueFactory vf) {
56-
this(nextHandler, vf, new ParserConfig(), new ParseErrorLogger());
63+
this(nextHandler, vf, new ParserConfig(), new ParseErrorLogger(), nodeID -> vf.createBNode(nodeID),
64+
() -> vf.createBNode());
5765
}
5866

5967
public JSONLDInternalTripleCallback(RDFHandler nextHandler, ValueFactory vf, ParserConfig parserConfig,
60-
ParseErrorListener parseErrorListener)
68+
ParseErrorListener parseErrorListener, Function<String, BNode> namedBNodeCreator,
69+
Supplier<BNode> anonymousBNodeCreator)
6170
{
6271
this.handler = nextHandler;
6372
this.vf = vf;
6473
this.parserConfig = parserConfig;
6574
this.parseErrorListener = parseErrorListener;
75+
this.namedBNodeCreator = namedBNodeCreator;
76+
this.anonymousBNodeCreator = anonymousBNodeCreator;
6677
}
6778

6879
private void triple(String s, String p, String o, String graph) {
@@ -96,10 +107,10 @@ private void triple(String s, String p, String o, String graph) {
96107
private Resource createResource(String resource) {
97108
// Blank node without any given identifier
98109
if (resource.equals("_:")) {
99-
return vf.createBNode();
110+
return anonymousBNodeCreator.get();
100111
}
101112
else if (resource.startsWith("_:")) {
102-
return vf.createBNode(resource.substring(2));
113+
return namedBNodeCreator.apply(resource.substring(2));
103114
}
104115
else {
105116
return vf.createIRI(resource);

core/rio/jsonld/src/main/java/org/eclipse/rdf4j/rio/jsonld/JSONLDParser.java

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,16 @@ public RDFFormat getRDFFormat() {
5858
public void parse(final InputStream in, final String baseURI)
5959
throws IOException, RDFParseException, RDFHandlerException
6060
{
61-
final JSONLDInternalTripleCallback callback = new JSONLDInternalTripleCallback(getRDFHandler(),
62-
valueFactory, getParserConfig(), getParseErrorListener());
63-
64-
final JsonLdOptions options = new JsonLdOptions(baseURI);
65-
options.useNamespaces = true;
61+
clear();
6662

6763
try {
64+
final JSONLDInternalTripleCallback callback = new JSONLDInternalTripleCallback(getRDFHandler(),
65+
valueFactory, getParserConfig(), getParseErrorListener(), nodeID -> createBNode(nodeID),
66+
() -> createBNode());
67+
68+
final JsonLdOptions options = new JsonLdOptions(baseURI);
69+
options.useNamespaces = true;
70+
6871
JsonLdProcessor.toRDF(JsonUtils.fromInputStream(in), callback, options);
6972
}
7073
catch (final JsonLdError e) {
@@ -79,19 +82,25 @@ public void parse(final InputStream in, final String baseURI)
7982
}
8083
throw e;
8184
}
85+
finally {
86+
clear();
87+
}
8288
}
8389

8490
@Override
8591
public void parse(final Reader reader, final String baseURI)
8692
throws IOException, RDFParseException, RDFHandlerException
8793
{
88-
final JSONLDInternalTripleCallback callback = new JSONLDInternalTripleCallback(getRDFHandler(),
89-
valueFactory, getParserConfig(), getParseErrorListener());
90-
91-
final JsonLdOptions options = new JsonLdOptions(baseURI);
92-
options.useNamespaces = true;
94+
clear();
9395

9496
try {
97+
final JSONLDInternalTripleCallback callback = new JSONLDInternalTripleCallback(getRDFHandler(),
98+
valueFactory, getParserConfig(), getParseErrorListener(), nodeID -> createBNode(nodeID),
99+
() -> createBNode());
100+
101+
final JsonLdOptions options = new JsonLdOptions(baseURI);
102+
options.useNamespaces = true;
103+
95104
JsonLdProcessor.toRDF(JsonUtils.fromReader(reader), callback, options);
96105
}
97106
catch (final JsonLdError e) {
@@ -106,6 +115,9 @@ public void parse(final Reader reader, final String baseURI)
106115
}
107116
throw e;
108117
}
118+
finally {
119+
clear();
120+
}
109121
}
110122

111123
}

core/rio/jsonld/src/test/java/org/eclipse/rdf4j/rio/jsonld/JSONLDInternalTripleCallbackTest.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import java.io.IOException;
1313
import java.util.Iterator;
1414

15-
import org.eclipse.rdf4j.model.Graph;
15+
import org.eclipse.rdf4j.model.Model;
1616
import org.eclipse.rdf4j.model.Statement;
1717
import org.eclipse.rdf4j.model.impl.LinkedHashModel;
1818
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
@@ -40,12 +40,13 @@ public void triplesTest()
4040
final String expectedString = "(http://nonexistent.com/abox#Document1823812, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, http://nonexistent.com/tbox#Document) [null]";
4141
final Object input = JsonUtils.fromString(inputstring);
4242

43-
final Graph graph = new LinkedHashModel();
43+
final Model graph = new LinkedHashModel();
4444
final ParseErrorCollector parseErrorListener = new ParseErrorCollector();
4545
final ParserConfig parserConfig = new ParserConfig();
4646
final JSONLDInternalTripleCallback callback = new JSONLDInternalTripleCallback(
4747
new StatementCollector(graph), SimpleValueFactory.getInstance(), parserConfig,
48-
parseErrorListener);
48+
parseErrorListener, nodeID -> SimpleValueFactory.getInstance().createBNode(nodeID),
49+
() -> SimpleValueFactory.getInstance().createBNode());
4950

5051
JsonLdProcessor.toRDF(input, callback);
5152

core/rio/nquads/src/main/java/org/eclipse/rdf4j/rio/nquads/NQuadsParser.java

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -62,23 +62,25 @@ public synchronized void parse(final InputStream inputStream, final String baseU
6262
public synchronized void parse(final Reader reader, final String baseURI)
6363
throws IOException, RDFParseException, RDFHandlerException
6464
{
65-
if (reader == null) {
66-
throw new IllegalArgumentException("Reader can not be 'null'");
67-
}
68-
if (baseURI == null) {
69-
throw new IllegalArgumentException("base URI can not be 'null'");
70-
}
65+
clear();
66+
67+
try {
68+
if (reader == null) {
69+
throw new IllegalArgumentException("Reader can not be 'null'");
70+
}
71+
if (baseURI == null) {
72+
throw new IllegalArgumentException("base URI can not be 'null'");
73+
}
7174

72-
if (rdfHandler != null) {
73-
rdfHandler.startRDF();
74-
}
75+
if (rdfHandler != null) {
76+
rdfHandler.startRDF();
77+
}
7578

76-
this.reader = reader;
77-
lineNo = 1;
79+
this.reader = reader;
80+
lineNo = 1;
7881

79-
reportLocation(lineNo, 1);
82+
reportLocation(lineNo, 1);
8083

81-
try {
8284
int c = readCodePoint();
8385
c = skipWhitespace(c);
8486

core/rio/ntriples/src/main/java/org/eclipse/rdf4j/rio/ntriples/NTriplesParser.java

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -141,23 +141,25 @@ public synchronized void parse(InputStream in, String baseURI)
141141
public synchronized void parse(Reader reader, String baseURI)
142142
throws IOException, RDFParseException, RDFHandlerException
143143
{
144-
if (reader == null) {
145-
throw new IllegalArgumentException("Reader can not be 'null'");
146-
}
147-
if (baseURI == null) {
148-
throw new IllegalArgumentException("base URI can not be 'null'");
149-
}
144+
clear();
145+
146+
try {
147+
if (reader == null) {
148+
throw new IllegalArgumentException("Reader can not be 'null'");
149+
}
150+
if (baseURI == null) {
151+
throw new IllegalArgumentException("base URI can not be 'null'");
152+
}
150153

151-
if (rdfHandler != null) {
152-
rdfHandler.startRDF();
153-
}
154+
if (rdfHandler != null) {
155+
rdfHandler.startRDF();
156+
}
154157

155-
this.reader = reader;
156-
lineNo = 1;
158+
this.reader = reader;
159+
lineNo = 1;
157160

158-
reportLocation(lineNo, 1);
161+
reportLocation(lineNo, 1);
159162

160-
try {
161163
int c = readCodePoint();
162164
c = skipWhitespace(c);
163165

0 commit comments

Comments
 (0)