@@ -724,93 +724,97 @@ ly_pat_compile_posix(const char *pattern, void **pat_comp, struct ly_err_item **
724724static LY_ERR
725725ly_pat_compile_xmlschema_chblocks_xmlschema2perl (const char * pattern , char * * regex , struct ly_err_item * * err )
726726{
727- #define URANGE_LEN 19
728- char * ublock2urange [][2 ] = {
729- {"BasicLatin" , "[\\x{0000}-\\x{007F}]" },
730- {"Latin-1Supplement" , "[\\x{0080}-\\x{00FF}]" },
731- {"LatinExtended-A" , "[\\x{0100}-\\x{017F}]" },
732- {"LatinExtended-B" , "[\\x{0180}-\\x{024F}]" },
733- {"IPAExtensions" , "[\\x{0250}-\\x{02AF}]" },
734- {"SpacingModifierLetters" , "[\\x{02B0}-\\x{02FF}]" },
735- {"CombiningDiacriticalMarks" , "[\\x{0300}-\\x{036F}]" },
736- {"Greek" , "[\\x{0370}-\\x{03FF}]" },
737- {"Cyrillic" , "[\\x{0400}-\\x{04FF}]" },
738- {"Armenian" , "[\\x{0530}-\\x{058F}]" },
739- {"Hebrew" , "[\\x{0590}-\\x{05FF}]" },
740- {"Arabic" , "[\\x{0600}-\\x{06FF}]" },
741- {"Syriac" , "[\\x{0700}-\\x{074F}]" },
742- {"Thaana" , "[\\x{0780}-\\x{07BF}]" },
743- {"Devanagari" , "[\\x{0900}-\\x{097F}]" },
744- {"Bengali" , "[\\x{0980}-\\x{09FF}]" },
745- {"Gurmukhi" , "[\\x{0A00}-\\x{0A7F}]" },
746- {"Gujarati" , "[\\x{0A80}-\\x{0AFF}]" },
747- {"Oriya" , "[\\x{0B00}-\\x{0B7F}]" },
748- {"Tamil" , "[\\x{0B80}-\\x{0BFF}]" },
749- {"Telugu" , "[\\x{0C00}-\\x{0C7F}]" },
750- {"Kannada" , "[\\x{0C80}-\\x{0CFF}]" },
751- {"Malayalam" , "[\\x{0D00}-\\x{0D7F}]" },
752- {"Sinhala" , "[\\x{0D80}-\\x{0DFF}]" },
753- {"Thai" , "[\\x{0E00}-\\x{0E7F}]" },
754- {"Lao" , "[\\x{0E80}-\\x{0EFF}]" },
755- {"Tibetan" , "[\\x{0F00}-\\x{0FFF}]" },
756- {"Myanmar" , "[\\x{1000}-\\x{109F}]" },
757- {"Georgian" , "[\\x{10A0}-\\x{10FF}]" },
758- {"HangulJamo" , "[\\x{1100}-\\x{11FF}]" },
759- {"Ethiopic" , "[\\x{1200}-\\x{137F}]" },
760- {"Cherokee" , "[\\x{13A0}-\\x{13FF}]" },
761- {"UnifiedCanadianAboriginalSyllabics" , "[\\x{1400}-\\x{167F}]" },
762- {"Ogham" , "[\\x{1680}-\\x{169F}]" },
763- {"Runic" , "[\\x{16A0}-\\x{16FF}]" },
764- {"Khmer" , "[\\x{1780}-\\x{17FF}]" },
765- {"Mongolian" , "[\\x{1800}-\\x{18AF}]" },
766- {"LatinExtendedAdditional" , "[\\x{1E00}-\\x{1EFF}]" },
767- {"GreekExtended" , "[\\x{1F00}-\\x{1FFF}]" },
768- {"GeneralPunctuation" , "[\\x{2000}-\\x{206F}]" },
769- {"SuperscriptsandSubscripts" , "[\\x{2070}-\\x{209F}]" },
770- {"CurrencySymbols" , "[\\x{20A0}-\\x{20CF}]" },
771- {"CombiningMarksforSymbols" , "[\\x{20D0}-\\x{20FF}]" },
772- {"LetterlikeSymbols" , "[\\x{2100}-\\x{214F}]" },
773- {"NumberForms" , "[\\x{2150}-\\x{218F}]" },
774- {"Arrows" , "[\\x{2190}-\\x{21FF}]" },
775- {"MathematicalOperators" , "[\\x{2200}-\\x{22FF}]" },
776- {"MiscellaneousTechnical" , "[\\x{2300}-\\x{23FF}]" },
777- {"ControlPictures" , "[\\x{2400}-\\x{243F}]" },
778- {"OpticalCharacterRecognition" , "[\\x{2440}-\\x{245F}]" },
779- {"EnclosedAlphanumerics" , "[\\x{2460}-\\x{24FF}]" },
780- {"BoxDrawing" , "[\\x{2500}-\\x{257F}]" },
781- {"BlockElements" , "[\\x{2580}-\\x{259F}]" },
782- {"GeometricShapes" , "[\\x{25A0}-\\x{25FF}]" },
783- {"MiscellaneousSymbols" , "[\\x{2600}-\\x{26FF}]" },
784- {"Dingbats" , "[\\x{2700}-\\x{27BF}]" },
785- {"BraillePatterns" , "[\\x{2800}-\\x{28FF}]" },
786- {"CJKRadicalsSupplement" , "[\\x{2E80}-\\x{2EFF}]" },
787- {"KangxiRadicals" , "[\\x{2F00}-\\x{2FDF}]" },
788- {"IdeographicDescriptionCharacters" , "[\\x{2FF0}-\\x{2FFF}]" },
789- {"CJKSymbolsandPunctuation" , "[\\x{3000}-\\x{303F}]" },
790- {"Hiragana" , "[\\x{3040}-\\x{309F}]" },
791- {"Katakana" , "[\\x{30A0}-\\x{30FF}]" },
792- {"Bopomofo" , "[\\x{3100}-\\x{312F}]" },
793- {"HangulCompatibilityJamo" , "[\\x{3130}-\\x{318F}]" },
794- {"Kanbun" , "[\\x{3190}-\\x{319F}]" },
795- {"BopomofoExtended" , "[\\x{31A0}-\\x{31BF}]" },
796- {"EnclosedCJKLettersandMonths" , "[\\x{3200}-\\x{32FF}]" },
797- {"CJKCompatibility" , "[\\x{3300}-\\x{33FF}]" },
798- {"CJKUnifiedIdeographsExtensionA" , "[\\x{3400}-\\x{4DB5}]" },
799- {"CJKUnifiedIdeographs" , "[\\x{4E00}-\\x{9FFF}]" },
800- {"YiSyllables" , "[\\x{A000}-\\x{A48F}]" },
801- {"YiRadicals" , "[\\x{A490}-\\x{A4CF}]" },
802- {"HangulSyllables" , "[\\x{AC00}-\\x{D7A3}]" },
803- {"PrivateUse" , "[\\x{E000}-\\x{F8FF}]" },
804- {"CJKCompatibilityIdeographs" , "[\\x{F900}-\\x{FAFF}]" },
805- {"AlphabeticPresentationForms" , "[\\x{FB00}-\\x{FB4F}]" },
806- {"ArabicPresentationForms-A" , "[\\x{FB50}-\\x{FDFF}]" },
807- {"CombiningHalfMarks" , "[\\x{FE20}-\\x{FE2F}]" },
808- {"CJKCompatibilityForms" , "[\\x{FE30}-\\x{FE4F}]" },
809- {"SmallFormVariants" , "[\\x{FE50}-\\x{FE6F}]" },
810- {"ArabicPresentationForms-B" , "[\\x{FE70}-\\x{FEFE}]" },
811- {"HalfwidthandFullwidthForms" , "[\\x{FF00}-\\x{FFEF}]" },
812- {"Specials" , "[\\x{FEFF}|\\x{FFF0}-\\x{FFFD}]" },
813- {NULL , NULL }
727+ struct ublock_s {
728+ char * ublock ;
729+ char * urange ;
730+ size_t size ;
731+ };
732+ struct ublock_s ublock2urange [] = {
733+ {"BasicLatin" , "[\\x{0000}-\\x{007F}]" ,19 },
734+ {"Latin-1Supplement" , "[\\x{0080}-\\x{00FF}]" , 19 },
735+ {"LatinExtended-A" , "[\\x{0100}-\\x{017F}]" , 19 },
736+ {"LatinExtended-B" , "[\\x{0180}-\\x{024F}]" , 19 },
737+ {"IPAExtensions" , "[\\x{0250}-\\x{02AF}]" , 19 },
738+ {"SpacingModifierLetters" , "[\\x{02B0}-\\x{02FF}]" , 19 },
739+ {"CombiningDiacriticalMarks" , "[\\x{0300}-\\x{036F}]" , 19 },
740+ {"Greek" , "[\\x{0370}-\\x{03FF}]" , 19 },
741+ {"Cyrillic" , "[\\x{0400}-\\x{04FF}]" , 19 },
742+ {"Armenian" , "[\\x{0530}-\\x{058F}]" , 19 },
743+ {"Hebrew" , "[\\x{0590}-\\x{05FF}]" , 19 },
744+ {"Arabic" , "[\\x{0600}-\\x{06FF}]" , 19 },
745+ {"Syriac" , "[\\x{0700}-\\x{074F}]" , 19 },
746+ {"Thaana" , "[\\x{0780}-\\x{07BF}]" , 19 },
747+ {"Devanagari" , "[\\x{0900}-\\x{097F}]" , 19 },
748+ {"Bengali" , "[\\x{0980}-\\x{09FF}]" , 19 },
749+ {"Gurmukhi" , "[\\x{0A00}-\\x{0A7F}]" , 19 },
750+ {"Gujarati" , "[\\x{0A80}-\\x{0AFF}]" , 19 },
751+ {"Oriya" , "[\\x{0B00}-\\x{0B7F}]" , 19 },
752+ {"Tamil" , "[\\x{0B80}-\\x{0BFF}]" , 19 },
753+ {"Telugu" , "[\\x{0C00}-\\x{0C7F}]" , 19 },
754+ {"Kannada" , "[\\x{0C80}-\\x{0CFF}]" , 19 },
755+ {"Malayalam" , "[\\x{0D00}-\\x{0D7F}]" , 19 },
756+ {"Sinhala" , "[\\x{0D80}-\\x{0DFF}]" , 19 },
757+ {"Thai" , "[\\x{0E00}-\\x{0E7F}]" , 19 },
758+ {"Lao" , "[\\x{0E80}-\\x{0EFF}]" , 19 },
759+ {"Tibetan" , "[\\x{0F00}-\\x{0FFF}]" , 19 },
760+ {"Myanmar" , "[\\x{1000}-\\x{109F}]" , 19 },
761+ {"Georgian" , "[\\x{10A0}-\\x{10FF}]" , 19 },
762+ {"HangulJamo" , "[\\x{1100}-\\x{11FF}]" , 19 },
763+ {"Ethiopic" , "[\\x{1200}-\\x{137F}]" , 19 },
764+ {"Cherokee" , "[\\x{13A0}-\\x{13FF}]" , 19 },
765+ {"UnifiedCanadianAboriginalSyllabics" , "[\\x{1400}-\\x{167F}]" , 19 },
766+ {"Ogham" , "[\\x{1680}-\\x{169F}]" , 19 },
767+ {"Runic" , "[\\x{16A0}-\\x{16FF}]" , 19 },
768+ {"Khmer" , "[\\x{1780}-\\x{17FF}]" , 19 },
769+ {"Mongolian" , "[\\x{1800}-\\x{18AF}]" , 19 },
770+ {"LatinExtendedAdditional" , "[\\x{1E00}-\\x{1EFF}]" , 19 },
771+ {"GreekExtended" , "[\\x{1F00}-\\x{1FFF}]" , 19 },
772+ {"GeneralPunctuation" , "[\\x{2000}-\\x{206F}]" , 19 },
773+ {"SuperscriptsandSubscripts" , "[\\x{2070}-\\x{209F}]" , 19 },
774+ {"CurrencySymbols" , "[\\x{20A0}-\\x{20CF}]" , 19 },
775+ {"CombiningMarksforSymbols" , "[\\x{20D0}-\\x{20FF}]" , 19 },
776+ {"LetterlikeSymbols" , "[\\x{2100}-\\x{214F}]" , 19 },
777+ {"NumberForms" , "[\\x{2150}-\\x{218F}]" , 19 },
778+ {"Arrows" , "[\\x{2190}-\\x{21FF}]" , 19 },
779+ {"MathematicalOperators" , "[\\x{2200}-\\x{22FF}]" , 19 },
780+ {"MiscellaneousTechnical" , "[\\x{2300}-\\x{23FF}]" , 19 },
781+ {"ControlPictures" , "[\\x{2400}-\\x{243F}]" , 19 },
782+ {"OpticalCharacterRecognition" , "[\\x{2440}-\\x{245F}]" , 19 },
783+ {"EnclosedAlphanumerics" , "[\\x{2460}-\\x{24FF}]" , 19 },
784+ {"BoxDrawing" , "[\\x{2500}-\\x{257F}]" , 19 },
785+ {"BlockElements" , "[\\x{2580}-\\x{259F}]" , 19 },
786+ {"GeometricShapes" , "[\\x{25A0}-\\x{25FF}]" , 19 },
787+ {"MiscellaneousSymbols" , "[\\x{2600}-\\x{26FF}]" , 19 },
788+ {"Dingbats" , "[\\x{2700}-\\x{27BF}]" , 19 },
789+ {"BraillePatterns" , "[\\x{2800}-\\x{28FF}]" , 19 },
790+ {"CJKRadicalsSupplement" , "[\\x{2E80}-\\x{2EFF}]" , 19 },
791+ {"KangxiRadicals" , "[\\x{2F00}-\\x{2FDF}]" , 19 },
792+ {"IdeographicDescriptionCharacters" , "[\\x{2FF0}-\\x{2FFF}]" , 19 },
793+ {"CJKSymbolsandPunctuation" , "[\\x{3000}-\\x{303F}]" , 19 },
794+ {"Hiragana" , "[\\x{3040}-\\x{309F}]" , 19 },
795+ {"Katakana" , "[\\x{30A0}-\\x{30FF}]" , 19 },
796+ {"Bopomofo" , "[\\x{3100}-\\x{312F}]" , 19 },
797+ {"HangulCompatibilityJamo" , "[\\x{3130}-\\x{318F}]" , 19 },
798+ {"Kanbun" , "[\\x{3190}-\\x{319F}]" , 19 },
799+ {"BopomofoExtended" , "[\\x{31A0}-\\x{31BF}]" , 19 },
800+ {"EnclosedCJKLettersandMonths" , "[\\x{3200}-\\x{32FF}]" , 19 },
801+ {"CJKCompatibility" , "[\\x{3300}-\\x{33FF}]" , 19 },
802+ {"CJKUnifiedIdeographsExtensionA" , "[\\x{3400}-\\x{4DB5}]" , 19 },
803+ {"CJKUnifiedIdeographs" , "[\\x{4E00}-\\x{9FFF}]" , 19 },
804+ {"YiSyllables" , "[\\x{A000}-\\x{A48F}]" , 19 },
805+ {"YiRadicals" , "[\\x{A490}-\\x{A4CF}]" , 19 },
806+ {"HangulSyllables" , "[\\x{AC00}-\\x{D7A3}]" , 19 },
807+ {"PrivateUse" , "[\\x{E000}-\\x{F8FF}]" , 19 },
808+ {"CJKCompatibilityIdeographs" , "[\\x{F900}-\\x{FAFF}]" , 19 },
809+ {"AlphabeticPresentationForms" , "[\\x{FB00}-\\x{FB4F}]" , 19 },
810+ {"ArabicPresentationForms-A" , "[\\x{FB50}-\\x{FDFF}]" , 19 },
811+ {"CombiningHalfMarks" , "[\\x{FE20}-\\x{FE2F}]" , 19 },
812+ {"CJKCompatibilityForms" , "[\\x{FE30}-\\x{FE4F}]" , 19 },
813+ {"SmallFormVariants" , "[\\x{FE50}-\\x{FE6F}]" , 19 },
814+ {"ArabicPresentationForms-B" , "[\\x{FE70}-\\x{FEFE}]" , 19 },
815+ {"HalfwidthandFullwidthForms" , "[\\x{FF00}-\\x{FFEF}]" , 19 },
816+ {"Specials" , "[\\x{FEFF}|\\x{FFF0}-\\x{FFFD}]" , 28 },
817+ {NULL , NULL , 0 }
814818 };
815819
816820 size_t idx , idx2 , start , end , ublock ;
@@ -829,28 +833,31 @@ ly_pat_compile_xmlschema_chblocks_xmlschema2perl(const char *pattern, char **reg
829833 }
830834 end = (ptr - perl_regex ) + 1 ;
831835
832- /* need more space */
833- if (end - start < URANGE_LEN ) {
834- perl_regex = ly_realloc (perl_regex , strlen (perl_regex ) + (URANGE_LEN - (end - start )) + 1 );
835- * regex = perl_regex ;
836- if (!perl_regex ) {
837- return ly_err_new (err , LY_EMEM , 0 , NULL , NULL , LY_EMEM_MSG );
838- }
839- }
840836
841837 /* find our range */
842- for (idx = 0 ; ublock2urange [idx ][ 0 ] ; ++ idx ) {
838+ for (idx = 0 ; ublock2urange [idx ]. ublock ; ++ idx ) {
843839 if (!strncmp (perl_regex + start + ly_strlen_const ("\\p{Is" ),
844- ublock2urange [idx ][ 0 ] , strlen (ublock2urange [idx ][ 0 ] ))) {
840+ ublock2urange [idx ]. ublock , strlen (ublock2urange [idx ]. ublock ))) {
845841 break ;
846842 }
847843 }
848- if (!ublock2urange [idx ][ 0 ] ) {
844+ if (!ublock2urange [idx ]. ublock ) {
849845 return ly_err_new (err , LY_EVALID , 0 , NULL , NULL , "Regular expression \"%s\" is not valid (\"%s\": %s)." ,
850846 pattern , perl_regex + start + 5 , "unknown block name" );
851847 }
852848 ublock = idx ;
853849
850+ /* need more space */
851+ size_t urange_len = ublock2urange [ublock ].size ;
852+ if (end - start < urange_len ) {
853+ perl_regex = ly_realloc (perl_regex , strlen (perl_regex ) + (urange_len - (end - start )) + 1 );
854+ * regex = perl_regex ;
855+ if (!perl_regex ) {
856+ return ly_err_new (err , LY_EMEM , 0 , NULL , NULL , LY_EMEM_MSG );
857+ }
858+ }
859+
860+
854861 /* make the space in the string and replace the block (but we cannot include brackets if it was already enclosed in them) */
855862 for (idx2 = 0 , idx = 0 ; idx2 < start ; ++ idx2 ) {
856863 if ((perl_regex [idx2 ] == '[' ) && (!idx2 || (perl_regex [idx2 - 1 ] != '\\' ))) {
@@ -863,11 +870,11 @@ ly_pat_compile_xmlschema_chblocks_xmlschema2perl(const char *pattern, char **reg
863870 }
864871 if (idx ) {
865872 /* skip brackets */
866- memmove (perl_regex + start + (URANGE_LEN - 2 ), perl_regex + end , strlen (perl_regex + end ) + 1 );
867- memcpy (perl_regex + start , ublock2urange [ublock ][ 1 ] + 1 , URANGE_LEN - 2 );
873+ memmove (perl_regex + start + (urange_len - 2 ), perl_regex + end , strlen (perl_regex + end ) + 1 );
874+ memcpy (perl_regex + start , ublock2urange [ublock ]. urange + 1 , urange_len - 2 );
868875 } else {
869- memmove (perl_regex + start + URANGE_LEN , perl_regex + end , strlen (perl_regex + end ) + 1 );
870- memcpy (perl_regex + start , ublock2urange [ublock ][ 1 ], URANGE_LEN );
876+ memmove (perl_regex + start + urange_len , perl_regex + end , strlen (perl_regex + end ) + 1 );
877+ memcpy (perl_regex + start , ublock2urange [ublock ]. urange , urange_len );
871878 }
872879 }
873880
@@ -1112,7 +1119,6 @@ ly_pat_match_xmlschema(const void *pat_comp, const char *pattern, const char *st
11121119 int r , match_opts = 0 ;
11131120 pcre2_code * pcode = (void * )pat_comp ;
11141121 pcre2_match_data * match_data = NULL ;
1115-
11161122 if (!pat_comp ) {
11171123 /* compile pattern first */
11181124 rc = ly_pat_compile_xmlschema (pattern , (void * * )& pcode , err );
0 commit comments