Skip to content

Commit 89cf6d3

Browse files
author
James Leigh
authored
Merge pull request #837 from jamesrdf/issues/#69-iri-parser
Fix #69: IRI Parser for validation and resolution
2 parents 7da044c + 73b0065 commit 89cf6d3

12 files changed

Lines changed: 2202 additions & 145 deletions

File tree

core/queryparser/sparql/src/main/java/org/eclipse/rdf4j/query/parser/sparql/BaseDeclProcessor.java

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
*******************************************************************************/
88
package org.eclipse.rdf4j.query.parser.sparql;
99

10+
import java.net.URISyntaxException;
11+
12+
import org.eclipse.rdf4j.common.net.ParsedIRI;
1013
import org.eclipse.rdf4j.common.net.ParsedURI;
1114
import org.eclipse.rdf4j.query.MalformedQueryException;
1215
import org.eclipse.rdf4j.query.parser.sparql.ast.ASTBaseDecl;
@@ -44,20 +47,30 @@ public class BaseDeclProcessor {
4447
public static void process(ASTOperationContainer qc, String externalBaseURI)
4548
throws MalformedQueryException
4649
{
47-
ParsedURI parsedBaseURI = null;
50+
ParsedIRI parsedBaseURI = null;
4851

4952
// Use the query model's own base URI, if available
5053
ASTBaseDecl baseDecl = qc.getBaseDecl();
5154
if (baseDecl != null) {
52-
parsedBaseURI = new ParsedURI(baseDecl.getIRI());
55+
try {
56+
parsedBaseURI = new ParsedIRI(baseDecl.getIRI());
57+
}
58+
catch (URISyntaxException e) {
59+
throw new MalformedQueryException(e);
60+
}
5361

5462
if (!parsedBaseURI.isAbsolute()) {
5563
throw new MalformedQueryException("BASE IRI is not an absolute IRI: " + externalBaseURI);
5664
}
5765
}
5866
else if (externalBaseURI != null) {
5967
// Use external base URI if the query doesn't contain one itself
60-
parsedBaseURI = new ParsedURI(externalBaseURI);
68+
try {
69+
parsedBaseURI = new ParsedIRI(externalBaseURI);
70+
}
71+
catch (URISyntaxException e) {
72+
throw new MalformedQueryException(e);
73+
}
6174

6275
if (!parsedBaseURI.isAbsolute()) {
6376
throw new IllegalArgumentException(
@@ -98,18 +111,21 @@ else if (qc.getOperation() instanceof ASTDeleteData) {
98111

99112
private static class RelativeIRIResolver extends AbstractASTVisitor {
100113

101-
private ParsedURI parsedBaseURI;
114+
private ParsedIRI parsedBaseURI;
102115

103116
public RelativeIRIResolver(ParsedURI parsedBaseURI) {
117+
this(ParsedIRI.create(parsedBaseURI.toString()));
118+
}
119+
120+
public RelativeIRIResolver(ParsedIRI parsedBaseURI) {
104121
this.parsedBaseURI = parsedBaseURI;
105122
}
106123

107124
@Override
108125
public Object visit(ASTIRI node, Object data)
109126
throws VisitorException
110127
{
111-
ParsedURI resolvedURI = parsedBaseURI.resolve(node.getValue());
112-
node.setValue(resolvedURI.toString());
128+
node.setValue(parsedBaseURI.resolve(node.getValue()));
113129

114130
return super.visit(node, data);
115131
}

core/repository/sparql/src/main/java/org/eclipse/rdf4j/repository/sparql/query/SPARQLOperation.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import java.util.regex.Pattern;
1616

1717
import org.apache.http.client.HttpClient;
18-
import org.eclipse.rdf4j.common.net.ParsedURI;
18+
import org.eclipse.rdf4j.common.net.ParsedIRI;
1919
import org.eclipse.rdf4j.model.IRI;
2020
import org.eclipse.rdf4j.model.Literal;
2121
import org.eclipse.rdf4j.model.Value;
@@ -48,7 +48,7 @@ public SPARQLOperation(HttpClient client, String url, String base, String operat
4848
this.url = url;
4949
this.operation = operation;
5050
this.client = client;
51-
boolean abs = base != null && base.length() > 0 && new ParsedURI(base).isAbsolute();
51+
boolean abs = base != null && base.length() > 0 && ParsedIRI.create(base).isAbsolute();
5252
if (abs && !operation.toUpperCase().contains("BASE")) {
5353
this.operation = "BASE <" + base + "> " + operation;
5454
}

core/rio/api/src/main/java/org/eclipse/rdf4j/rio/helpers/AbstractRDFParser.java

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*******************************************************************************/
88
package org.eclipse.rdf4j.rio.helpers;
99

10-
import java.io.UnsupportedEncodingException;
10+
import java.net.URISyntaxException;
1111
import java.nio.charset.StandardCharsets;
1212
import java.security.MessageDigest;
1313
import java.security.NoSuchAlgorithmException;
@@ -20,6 +20,7 @@
2020

2121
import javax.xml.bind.annotation.adapters.HexBinaryAdapter;
2222

23+
import org.eclipse.rdf4j.common.net.ParsedIRI;
2324
import org.eclipse.rdf4j.common.net.ParsedURI;
2425
import org.eclipse.rdf4j.model.BNode;
2526
import org.eclipse.rdf4j.model.IRI;
@@ -74,7 +75,7 @@ public abstract class AbstractRDFParser implements RDFParser {
7475
/**
7576
* The base URI for resolving relative URIs.
7677
*/
77-
private ParsedURI baseURI;
78+
private ParsedIRI baseURI;
7879

7980
/**
8081
* Enables a consistent global mapping of blank node identifiers without using a map, but concatenating
@@ -298,16 +299,16 @@ public DatatypeHandling datatypeHandling() {
298299
*/
299300
protected void setBaseURI(String uriSpec) {
300301
// Store normalized base URI
301-
ParsedURI baseURI = new ParsedURI(uriSpec);
302-
baseURI.normalize();
303-
setBaseURI(baseURI);
302+
if (this.baseURI == null || !this.baseURI.toString().equals(uriSpec)) {
303+
this.baseURI = ParsedIRI.create(uriSpec).normalize();
304+
}
304305
}
305306

306307
/**
307308
* Sets the base URI for resolving relative URIs.
308309
*/
309310
protected void setBaseURI(ParsedURI baseURI) {
310-
this.baseURI = baseURI;
311+
setBaseURI(baseURI.toString());
311312
}
312313

313314
/**
@@ -375,15 +376,15 @@ protected IRI resolveURI(String uriSpec)
375376
throws RDFParseException
376377
{
377378
// Resolve relative URIs against base URI
378-
ParsedURI uri = new ParsedURI(uriSpec);
379+
ParsedIRI uri = ParsedIRI.create(uriSpec);
379380

380-
if (uri.isRelative()) {
381+
if (!uri.isAbsolute()) {
381382
if (baseURI == null) {
382383
reportFatalError("Unable to resolve URIs, no base URI has been set");
383384
}
384385

385386
if (getParserConfig().get(BasicParserSettings.VERIFY_RELATIVE_URIS)) {
386-
if (uri.isRelative() && !uri.isSelfReference() && baseURI.isOpaque()) {
387+
if (!uri.isAbsolute() && uriSpec.length() > 0 && !uriSpec.startsWith("#") && baseURI.isOpaque()) {
387388
reportError("Relative URI '" + uriSpec
388389
+ "' cannot be resolved using the opaque base URI '" + baseURI + "'",
389390
BasicParserSettings.VERIFY_RELATIVE_URIS);

core/rio/rdfxml/src/main/java/org/eclipse/rdf4j/rio/rdfxml/RDFXMLParser.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,12 @@ protected void setBaseURI(ParsedURI baseURI) {
478478
super.setBaseURI(baseURI);
479479
}
480480

481+
@Override
482+
protected void setBaseURI(String baseURI) {
483+
// Note: we need to override this method to allow SAXFilter to access it
484+
super.setBaseURI(baseURI);
485+
}
486+
481487
void setXMLLang(String xmlLang) {
482488
if ("".equals(xmlLang)) {
483489
this.xmlLang = null;

core/rio/rdfxml/src/main/java/org/eclipse/rdf4j/rio/rdfxml/SAXFilter.java

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import java.util.Map;
1515
import java.util.Stack;
1616

17-
import org.eclipse.rdf4j.common.net.ParsedURI;
17+
import org.eclipse.rdf4j.common.net.ParsedIRI;
1818
import org.eclipse.rdf4j.common.xml.XMLUtil;
1919
import org.eclipse.rdf4j.model.vocabulary.RDF;
2020
import org.eclipse.rdf4j.rio.ParseLocationListener;
@@ -60,7 +60,7 @@ class SAXFilter implements ContentHandler {
6060
/**
6161
* The document's URI.
6262
*/
63-
private ParsedURI documentURI;
63+
private ParsedIRI documentURI;
6464

6565
/**
6666
* Flag indicating whether the parser parses stand-alone RDF documents. In stand-alone documents, the
@@ -320,7 +320,7 @@ private void reportDeferredStartElement()
320320
elInfoStack.push(deferredElement);
321321
rdfContextStackHeight++;
322322

323-
rdfParser.setBaseURI(deferredElement.baseURI);
323+
rdfParser.setBaseURI(deferredElement.baseURI.toString());
324324
rdfParser.setXMLLang(deferredElement.xmlLang);
325325

326326
rdfParser.startElement(deferredElement.namespaceURI, deferredElement.localName, deferredElement.qName,
@@ -383,7 +383,7 @@ public void endElement(String namespaceURI, String localName, String qName)
383383
// Check for any deferred start elements
384384
if (deferredElement != null) {
385385
// Start element still deferred, this is an empty element
386-
rdfParser.setBaseURI(deferredElement.baseURI);
386+
rdfParser.setBaseURI(deferredElement.baseURI.toString());
387387
rdfParser.setXMLLang(deferredElement.xmlLang);
388388

389389
rdfParser.emptyElement(deferredElement.namespaceURI, deferredElement.localName,
@@ -547,10 +547,8 @@ public void setParseLiteralMode() {
547547
unknownPrefixesInXMLLiteral.clear();
548548
}
549549

550-
private ParsedURI createBaseURI(String uriString) {
551-
ParsedURI uri = new ParsedURI(uriString);
552-
uri.normalize();
553-
return uri;
550+
private ParsedIRI createBaseURI(String uriString) {
551+
return ParsedIRI.create(uriString).normalize();
554552
}
555553

556554
/*---------------------------------*
@@ -675,7 +673,7 @@ private class ElementInfo {
675673

676674
private Map<String, String> namespaceMap;
677675

678-
public ParsedURI baseURI;
676+
public ParsedIRI baseURI;
679677

680678
public String xmlLang;
681679

0 commit comments

Comments
 (0)