Skip to content

Commit 856e896

Browse files
refactoring
1 parent 162e1f0 commit 856e896

1 file changed

Lines changed: 38 additions & 8 deletions

File tree

serializer/src/main/java/org/apache/xml/serializer/ToStream.java

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,34 @@ protected int writeUTF16Surrogate(final char high, char ch[], int i, int end)
10271027
}
10281028

10291029
final char low = ch[i+1];
1030+
return writeUTF16Surrogate(high, low);
1031+
}
1032+
1033+
1034+
/**
1035+
* Once a surrogate has been detected, write out the pair of
1036+
* characters if it is in the encoding, or if there is no
1037+
* encoding, otherwise write out an numeric character reference
1038+
* of the value of the unicode code point of the character
1039+
* represented by the high/low surrogate pair.
1040+
* <p>
1041+
* An exception is thrown if there is no low surrogate in the pair,
1042+
* because the array ends unexpectely, or if the low char is there
1043+
* but its value is such that it is not a low surrogate.
1044+
*
1045+
* @param high the first (high) part of the surrogate, which
1046+
* must be confirmed before calling this method.
1047+
* @param low the second (low) part of the presumed surrogate
1048+
* @return 0 if the pair of characters was written out as-is,
1049+
* or the unicode code point of the character represented by
1050+
* the surrogate pair if a numeric char ref with that value
1051+
* was written out. (REVIEW: Is this needed?)
1052+
*
1053+
* @throws IOException if invalid UTF-16 surrogate detected.
1054+
*/
1055+
protected int writeUTF16Surrogate(final char high, final char low)
1056+
throws IOException
1057+
{
10301058
if (!Encodings.isLowUTF16Surrogate(low)) {
10311059
throw new IOException(
10321060
Utils.messages.createMessage(
@@ -1038,35 +1066,37 @@ protected int writeUTF16Surrogate(final char high, char ch[], int i, int end)
10381066
}
10391067

10401068
final java.io.Writer writer = m_writer;
1041-
int codePoint = 0; // Nonzero iff written as NCR
1069+
int codePoint = 0; // Nonzero iff written as NCR. REVIEW: Needed?
10421070

10431071
// If we make it to here we have a valid high, low surrogate pair
10441072
if (m_encodingInfo.isInEncoding(high,low)) {
10451073
// If the character formed by the surrogate pair
10461074
// is in the encoding, so just write it out
10471075
// NOTE: Assumes same buffer
1048-
writer.write(ch,i,2);
1076+
writer.write(high);
1077+
writer.write(low);
10491078
}
10501079
else {
10511080
// Don't know what to do with this char, it is
10521081
// not in the encoding and not a high char in
10531082
// a surrogate pair, so write out as a numeric char ref
10541083
final String encoding = getEncoding();
10551084
if (encoding != null) {
1056-
/* The output encoding is known,
1057-
* so somthing is wrong.
1085+
/* The output encoding is known but does not include
1086+
* this character. Fallback: Write as NCR
10581087
*/
10591088
codePoint = Encodings.toCodePoint(high, low);
1060-
// not in the encoding, so write out a character reference
10611089
writer.write('&');
10621090
writer.write('#');
10631091
writer.write(Integer.toString(codePoint));
10641092
writer.write(';');
10651093
} else {
1066-
/* The output encoding is not known,
1067-
* so just write it out as-is.
1094+
/* The output encoding is not known, so presume
1095+
* Unicode and just write it out. This handles the
1096+
* case of serializing to a character buffer.
10681097
*/
1069-
writer.write(ch, i, 2);
1098+
writer.write(high);
1099+
writer.write(low);
10701100
}
10711101
}
10721102

0 commit comments

Comments
 (0)