@@ -145,6 +145,8 @@ public class ParsedIRI implements Cloneable, Serializable {
145145
146146 private static String [] uchar = union (unreserved , sub_delims , new String [] { ":" });
147147
148+ private static String [] hchar = union (unreserved , sub_delims );
149+
148150 private static String [] pchar = union (unreserved , sub_delims , new String [] { ":" , "@" });
149151
150152 private static String [] qchar = union (pchar , iprivate , new String [] { "/" , "?" });
@@ -856,43 +858,47 @@ private void parse()
856858 if ("jar" .equalsIgnoreCase (scheme )) {
857859 scheme = scheme + ':' + parseScheme ();
858860 }
859- if ('/' == peek (0 ) && '/' == peek (1 )) {
861+ int peek = peek ();
862+ if ('/' == peek && '/' == peek (1 )) {
860863 advance (2 );
861- userInfo = parseUserInfo ();
864+ if (iri .indexOf ('@' ) >= 0 ) {
865+ userInfo = parseUserInfo ();
866+ }
862867 host = parseHost ();
863- if (':' == peek (0 )) {
868+ if (':' == peek ()) {
864869 advance (1 );
865- String p = parseMember (DIGIT );
870+ String p = parseMember (DIGIT , '/' );
866871 if (p .length () > 0 ) {
867872 port = Integer .parseInt (p );
868873 }
869874 else {
870875 port = -1 ;
871876 }
872877 }
873- if ('/' == peek (0 ) || '?' == peek (0 ) || '#' == peek (0 ) || EOF == peek (0 )) {
878+ int next = peek ();
879+ if ('/' == next || '?' == next || '#' == next || EOF == next ) {
874880 path = parsePath ();
875881 }
876882 else {
877883 error ("absolute or empty path expected" );
878884 }
879885 }
880- else if ('/' == peek ( 0 ) || '?' == peek ( 0 ) || '#' == peek ( 0 ) || EOF == peek ( 0 ) ) {
886+ else if ('/' == peek || '?' == peek || '#' == peek || EOF == peek ) {
881887 path = parsePath ();
882888 }
883- else if ('%' == peek ( 0 ) || ':' != peek ( 0 ) && isMember (pchar , peek ( 0 ) )) {
889+ else if ('%' == peek || ':' != peek && isMember (pchar , peek )) {
884890 path = parsePath ();
885891 }
886- else if (scheme != null && ':' == peek ( 0 ) ) {
892+ else if (scheme != null && ':' == peek ) {
887893 path = parsePath ();
888894 }
889- if ('?' == peek (0 )) {
895+ if ('?' == peek ()) {
890896 advance (1 );
891- query = parsePctEncoded (qchar );
897+ query = parsePctEncoded (qchar , '#' , EOF );
892898 }
893- if ('#' == peek (0 )) {
899+ if ('#' == peek ()) {
894900 advance (1 );
895- fragment = parsePctEncoded (fchar );
901+ fragment = parsePctEncoded (fchar , '#' , EOF );
896902 }
897903 if (pos != iri .length ()) {
898904 throw error ("Unexpected character" );
@@ -931,10 +937,10 @@ private String buildIRI(String scheme, String userInfo, String host, int port, S
931937 private String parseScheme ()
932938 throws URISyntaxException
933939 {
934- if (isMember (ALPHA , peek (0 ))) {
940+ if (isMember (ALPHA , peek ())) {
935941 int start = pos ;
936- String scheme = parseMember (schar );
937- if (':' == peek (0 )) {
942+ String scheme = parseMember (schar , ':' );
943+ if (':' == peek ()) {
938944 advance (1 );
939945 return scheme ;
940946 }
@@ -949,8 +955,8 @@ private String parseUserInfo()
949955 throws URISyntaxException
950956 {
951957 int start = pos ;
952- String userinfo = parsePctEncoded (uchar );
953- if ('@' == peek (0 )) {
958+ String userinfo = parsePctEncoded (uchar , '@' , '/' );
959+ if ('@' == peek ()) {
954960 advance (1 );
955961 return userinfo ;
956962 }
@@ -964,24 +970,24 @@ private String parseHost()
964970 throws URISyntaxException
965971 {
966972 int start = pos ;
967- if ('[' == peek (0 )) {
973+ if ('[' == peek ()) {
968974 advance (1 ); // IP-Literal
969- parseMember (uchar );
970- if (']' == peek (0 )) {
975+ parseMember (uchar , ']' );
976+ if (']' == peek ()) {
971977 advance (1 );
972978 return iri .substring (start , pos );
973979 }
974980 else {
975981 throw error ("Invalid host IP address" );
976982 }
977983 }
978- else if (isMember (DIGIT , peek (0 ))) {
984+ else if (isMember (DIGIT , peek ())) {
979985 for (int i = 0 ; i < 4 ; i ++) {
980- int octet = Integer .parseInt (parseMember (DIGIT ));
986+ int octet = Integer .parseInt (parseMember (DIGIT , '.' ));
981987 if (octet < 0 || octet > 255 ) {
982988 throw error ("Invalid IPv4 address" );
983989 }
984- if ('.' == peek (0 )) {
990+ if ('.' == peek ()) {
985991 advance (1 );
986992 }
987993 else {
@@ -991,43 +997,42 @@ else if (isMember(DIGIT, peek(0))) {
991997 return iri .substring (start , pos );
992998 }
993999 else {
994- return parsePctEncoded (union ( unreserved , sub_delims ) );
1000+ return parsePctEncoded (hchar , ':' , '/' );
9951001 }
9961002 }
9971003
9981004 private String parsePath ()
9991005 throws URISyntaxException
10001006 {
1001- int start = pos ;
1002- if ('/' != peek (0 )) {
1003- parsePctEncoded (pchar );
1004- }
1005- while ('/' == peek (0 )) {
1006- advance (1 );
1007- parsePctEncoded (pchar );
1008- }
1009- return iri .substring (start , pos );
1007+ return parsePctEncoded (fchar , '?' , '#' );
10101008 }
10111009
1012- private String parsePctEncoded (String [] set )
1010+ private String parsePctEncoded (String [] set , int end1 , int end2 )
10131011 throws URISyntaxException
10141012 {
1015- if ('%' != peek (0 ) && !isMember (set , peek (0 ))) {
1016- return "" ;
1017- }
10181013 int start = pos ;
1019- while ('%' == peek (0 ) || isMember (set , peek (0 ))) {
1020- if ('%' == peek (0 )) {
1014+ while (true ) {
1015+ int chr = peek ();
1016+ if (chr == EOF || chr == end1 || chr == end2 ) {
1017+ break ; // optimize end character
1018+ }
1019+ else if (('a' <= chr && chr <= 'z' ) || ('A' <= chr && chr <= 'Z' ) || ('0' <= chr && chr <= '9' )) {
1020+ advance (1 );
1021+ }
1022+ else if ('%' == chr ) {
10211023 if (isMember (HEXDIG , peek (1 )) && isMember (HEXDIG , peek (2 ))) {
10221024 advance (3 );
10231025 }
10241026 else {
1025- throw error ("Illegal Percent encoding" );
1027+ throw error ("Illegal percent encoding" );
10261028 }
10271029 }
1028- else {
1030+ else if ( isMember ( set , chr )) {
10291031 advance (1 );
10301032 }
1033+ else {
1034+ break ;
1035+ }
10311036 }
10321037 return iri .substring (start , pos );
10331038 }
@@ -1051,19 +1056,34 @@ private boolean isMember(String range, int chr) {
10511056 }
10521057 }
10531058
1054- private String parseMember (String [] set )
1059+ private String parseMember (String [] set , int end )
10551060 throws URISyntaxException
10561061 {
1057- if (!isMember (set , peek (0 ))) {
1058- return "" ;
1059- }
10601062 int start = pos ;
1061- while (isMember (set , peek (0 ))) {
1062- advance (1 );
1063+ while (true ) {
1064+ int chr = peek ();
1065+ if (chr == EOF || chr == end ) {
1066+ break ;
1067+ }
1068+ else if (isMember (set , chr )) {
1069+ advance (1 );
1070+ }
1071+ else {
1072+ break ;
1073+ }
10631074 }
10641075 return iri .substring (start , pos );
10651076 }
10661077
1078+ private int peek () {
1079+ if (pos < iri .length ()) {
1080+ return iri .codePointAt (pos );
1081+ }
1082+ else {
1083+ return EOF ;
1084+ }
1085+ }
1086+
10671087 private int peek (int ahead ) {
10681088 if (pos + ahead < iri .length ()) {
10691089 return iri .codePointAt (iri .offsetByCodePoints (pos , ahead ));
0 commit comments