Skip to content

Commit 91351d7

Browse files
author
James Leigh
authored
Merge pull request #858 from jamesrdf/issues/#850-unicode-iri
Fix #850: Apply ESCAPE_UNICODE setting to IRI in NTriples
2 parents 351f42f + b85a7a9 commit 91351d7

2 files changed

Lines changed: 125 additions & 14 deletions

File tree

core/rio/nquads/src/main/java/org/eclipse/rdf4j/rio/nquads/NQuadsWriter.java

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@
1414
import org.eclipse.rdf4j.model.Statement;
1515
import org.eclipse.rdf4j.rio.RDFFormat;
1616
import org.eclipse.rdf4j.rio.RDFHandlerException;
17-
import org.eclipse.rdf4j.rio.helpers.BasicWriterSettings;
18-
import org.eclipse.rdf4j.rio.helpers.NTriplesWriterSettings;
19-
import org.eclipse.rdf4j.rio.ntriples.NTriplesUtil;
2017
import org.eclipse.rdf4j.rio.ntriples.NTriplesWriter;
2118

2219
/**
@@ -49,21 +46,19 @@ public void handleStatement(Statement st)
4946

5047
try {
5148
// SUBJECT
52-
NTriplesUtil.append(st.getSubject(), writer);
49+
writeValue(st.getSubject());
5350
writer.write(" ");
5451

5552
// PREDICATE
56-
NTriplesUtil.append(st.getPredicate(), writer);
53+
writeValue(st.getPredicate());
5754
writer.write(" ");
5855

5956
// OBJECT
60-
NTriplesUtil.append(st.getObject(), writer,
61-
getWriterConfig().get(BasicWriterSettings.XSD_STRING_TO_PLAIN_LITERAL),
62-
getWriterConfig().get(NTriplesWriterSettings.ESCAPE_UNICODE));
57+
writeValue(st.getObject());
6358

6459
if (null != st.getContext()) {
6560
writer.write(" ");
66-
NTriplesUtil.append(st.getContext(), writer);
61+
writeValue(st.getContext());
6762
}
6863

6964
writer.write(" .\n");

core/rio/ntriples/src/main/java/org/eclipse/rdf4j/rio/ntriples/NTriplesWriter.java

Lines changed: 121 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,13 @@
1616
import java.util.HashSet;
1717
import java.util.Set;
1818

19+
import org.eclipse.rdf4j.model.BNode;
20+
import org.eclipse.rdf4j.model.IRI;
21+
import org.eclipse.rdf4j.model.Literal;
1922
import org.eclipse.rdf4j.model.Statement;
23+
import org.eclipse.rdf4j.model.Value;
24+
import org.eclipse.rdf4j.model.util.Literals;
25+
import org.eclipse.rdf4j.model.vocabulary.XMLSchema;
2026
import org.eclipse.rdf4j.rio.RDFFormat;
2127
import org.eclipse.rdf4j.rio.RDFHandlerException;
2228
import org.eclipse.rdf4j.rio.RDFWriter;
@@ -40,6 +46,10 @@ public class NTriplesWriter extends AbstractRDFWriter implements RDFWriter {
4046

4147
protected boolean writingStarted;
4248

49+
private boolean xsdStringToPlainLiteral = true;
50+
51+
private boolean escapeUnicode;
52+
4353
/*--------------*
4454
* Constructors *
4555
*--------------*/
@@ -83,6 +93,8 @@ public void startRDF()
8393
}
8494

8595
writingStarted = true;
96+
xsdStringToPlainLiteral = getWriterConfig().get(BasicWriterSettings.XSD_STRING_TO_PLAIN_LITERAL);
97+
escapeUnicode = getWriterConfig().get(NTriplesWriterSettings.ESCAPE_UNICODE);
8698
}
8799

88100
@Override
@@ -118,13 +130,11 @@ public void handleStatement(Statement st)
118130
}
119131

120132
try {
121-
NTriplesUtil.append(st.getSubject(), writer);
133+
writeValue(st.getSubject());
122134
writer.write(" ");
123-
NTriplesUtil.append(st.getPredicate(), writer);
135+
writeIRI(st.getPredicate());
124136
writer.write(" ");
125-
NTriplesUtil.append(st.getObject(), writer,
126-
getWriterConfig().get(BasicWriterSettings.XSD_STRING_TO_PLAIN_LITERAL),
127-
getWriterConfig().get(NTriplesWriterSettings.ESCAPE_UNICODE));
137+
writeValue(st.getObject());
128138

129139
writer.write(" .\n");
130140
}
@@ -156,4 +166,110 @@ public final Collection<RioSetting<?>> getSupportedSettings() {
156166

157167
return result;
158168
}
169+
170+
/**
171+
* Writes the N-Triples representation of the given {@link Value}.
172+
*
173+
* @param value
174+
* The value to write.
175+
* @throws IOException
176+
*/
177+
protected void writeValue(Value value)
178+
throws IOException
179+
{
180+
if (value instanceof IRI) {
181+
writeIRI((IRI)value);
182+
}
183+
else if (value instanceof BNode) {
184+
writeBNode((BNode)value);
185+
}
186+
else if (value instanceof Literal) {
187+
writeLiteral((Literal)value);
188+
}
189+
else {
190+
throw new IllegalArgumentException("Unknown value type: " + value.getClass());
191+
}
192+
}
193+
194+
private void writeIRI(IRI iri)
195+
throws IOException
196+
{
197+
writer.append("<");
198+
writeString(iri.stringValue());
199+
writer.append(">");
200+
}
201+
202+
private void writeBNode(BNode bNode)
203+
throws IOException
204+
{
205+
String nextId = bNode.getID();
206+
writer.append("_:");
207+
208+
if (nextId.isEmpty()) {
209+
writer.append("genid");
210+
writer.append(Integer.toHexString(bNode.hashCode()));
211+
}
212+
else {
213+
if (!NTriplesUtil.isLetter(nextId.charAt(0))) {
214+
writer.append("genid");
215+
writer.append(Integer.toHexString(nextId.charAt(0)));
216+
}
217+
218+
for (int i = 0; i < nextId.length(); i++) {
219+
if (NTriplesUtil.isLetterOrNumber(nextId.charAt(i))) {
220+
writer.append(nextId.charAt(i));
221+
}
222+
else {
223+
// Append the character as its hex representation
224+
writer.append(Integer.toHexString(nextId.charAt(i)));
225+
}
226+
}
227+
}
228+
}
229+
230+
/**
231+
* Write the N-Triples representation of the given {@link Literal}, optionally ignoring the xsd:string
232+
* datatype as it is implied for RDF-1.1.
233+
*
234+
* @param lit
235+
* The literal to write.
236+
* @throws IOException
237+
*/
238+
private void writeLiteral(Literal lit)
239+
throws IOException
240+
{
241+
// Do some character escaping on the label:
242+
writer.append("\"");
243+
writeString(lit.getLabel());
244+
writer.append("\"");
245+
246+
if (Literals.isLanguageLiteral(lit)) {
247+
// Append the literal's language
248+
writer.append("@");
249+
writer.append(lit.getLanguage().get());
250+
}
251+
else {
252+
// SES-1917 : In RDF-1.1, all literals have a type, and if they are not
253+
// language literals we display the type for backwards compatibility
254+
IRI datatype = lit.getDatatype();
255+
if (!datatype.equals(XMLSchema.STRING) || !xsdStringToPlainLiteral) {
256+
writer.append("^^");
257+
writeIRI(lit.getDatatype());
258+
}
259+
}
260+
}
261+
262+
/**
263+
* Writes a Unicode string to an N-Triples compatible character sequence. Any special characters are
264+
* escaped using backslashes (<tt>"</tt> becomes <tt>\"</tt>, etc.), and non-ascii/non-printable
265+
* characters are escaped using Unicode escapes (<tt>&#x5C;uxxxx</tt> and <tt>&#x5C;Uxxxxxxxx</tt>) if the
266+
* writer config is enabled.
267+
*
268+
* @throws IOException
269+
*/
270+
private void writeString(String label)
271+
throws IOException
272+
{
273+
NTriplesUtil.escapeString(label, writer, escapeUnicode);
274+
}
159275
}

0 commit comments

Comments
 (0)