@@ -35,84 +35,84 @@ module ArabicShaping
3535 # Maps Arabic base characters to their presentation forms:
3636 # [isolated, final, initial, medial]
3737 ARABIC_FORMS = {
38- 0x0621 => [ 0xFE80 , nil , nil , nil ] , # HAMZA
39- 0x0622 => [ 0xFE81 , 0xFE82 , nil , nil ] , # ALEF WITH MADDA ABOVE
40- 0x0623 => [ 0xFE83 , 0xFE84 , nil , nil ] , # ALEF WITH HAMZA ABOVE
41- 0x0624 => [ 0xFE85 , 0xFE86 , nil , nil ] , # WAW WITH HAMZA ABOVE
42- 0x0625 => [ 0xFE87 , 0xFE88 , nil , nil ] , # ALEF WITH HAMZA BELOW
43- 0x0626 => [ 0xFE89 , 0xFE8A , 0xFE8B , 0xFE8C ] , # YEH WITH HAMZA ABOVE
44- 0x0627 => [ 0xFE8D , 0xFE8E , nil , nil ] , # ALEF
45- 0x0628 => [ 0xFE8F , 0xFE90 , 0xFE91 , 0xFE92 ] , # BEH
46- 0x0629 => [ 0xFE93 , 0xFE94 , nil , nil ] , # TEH MARBUTA
47- 0x062A => [ 0xFE95 , 0xFE96 , 0xFE97 , 0xFE98 ] , # TEH
48- 0x062B => [ 0xFE99 , 0xFE9A , 0xFE9B , 0xFE9C ] , # THEH
49- 0x062C => [ 0xFE9D , 0xFE9E , 0xFE9F , 0xFEA0 ] , # JEEM
50- 0x062D => [ 0xFEA1 , 0xFEA2 , 0xFEA3 , 0xFEA4 ] , # HAH
51- 0x062E => [ 0xFEA5 , 0xFEA6 , 0xFEA7 , 0xFEA8 ] , # KHAH
52- 0x062F => [ 0xFEA9 , 0xFEAA , nil , nil ] , # DAL
53- 0x0630 => [ 0xFEAB , 0xFEAC , nil , nil ] , # THAL
54- 0x0631 => [ 0xFEAD , 0xFEAE , nil , nil ] , # REH
55- 0x0632 => [ 0xFEAF , 0xFEB0 , nil , nil ] , # ZAIN
56- 0x0633 => [ 0xFEB1 , 0xFEB2 , 0xFEB3 , 0xFEB4 ] , # SEEN
57- 0x0634 => [ 0xFEB5 , 0xFEB6 , 0xFEB7 , 0xFEB8 ] , # SHEEN
58- 0x0635 => [ 0xFEB9 , 0xFEBA , 0xFEBB , 0xFEBC ] , # SAD
59- 0x0636 => [ 0xFEBD , 0xFEBE , 0xFEBF , 0xFEC0 ] , # DAD
60- 0x0637 => [ 0xFEC1 , 0xFEC2 , 0xFEC3 , 0xFEC4 ] , # TAH
61- 0x0638 => [ 0xFEC5 , 0xFEC6 , 0xFEC7 , 0xFEC8 ] , # ZAH
62- 0x0639 => [ 0xFEC9 , 0xFECA , 0xFECB , 0xFECC ] , # AIN
63- 0x063A => [ 0xFECD , 0xFECE , 0xFECF , 0xFED0 ] , # GHAIN
64- 0x0640 => [ 0x0640 , 0x0640 , 0x0640 , 0x0640 ] , # TATWEEL
65- 0x0641 => [ 0xFED1 , 0xFED2 , 0xFED3 , 0xFED4 ] , # FEH
66- 0x0642 => [ 0xFED5 , 0xFED6 , 0xFED7 , 0xFED8 ] , # QAF
67- 0x0643 => [ 0xFED9 , 0xFEDA , 0xFEDB , 0xFEDC ] , # KAF
68- 0x0644 => [ 0xFEDD , 0xFEDE , 0xFEDF , 0xFEE0 ] , # LAM
69- 0x0645 => [ 0xFEE1 , 0xFEE2 , 0xFEE3 , 0xFEE4 ] , # MEEM
70- 0x0646 => [ 0xFEE5 , 0xFEE6 , 0xFEE7 , 0xFEE8 ] , # NOON
71- 0x0647 => [ 0xFEE9 , 0xFEEA , 0xFEEB , 0xFEEC ] , # HEH
72- 0x0648 => [ 0xFEED , 0xFEEE , nil , nil ] , # WAW
73- 0x0649 => [ 0xFEEF , 0xFEF0 , nil , nil ] , # ALEF MAKSURA
74- 0x064A => [ 0xFEF1 , 0xFEF2 , 0xFEF3 , 0xFEF4 ] , # YEH
38+ 0x0621 => [ 0xFE80 , nil , nil , nil ] , # HAMZA
39+ 0x0622 => [ 0xFE81 , 0xFE82 , nil , nil ] , # ALEF WITH MADDA ABOVE
40+ 0x0623 => [ 0xFE83 , 0xFE84 , nil , nil ] , # ALEF WITH HAMZA ABOVE
41+ 0x0624 => [ 0xFE85 , 0xFE86 , nil , nil ] , # WAW WITH HAMZA ABOVE
42+ 0x0625 => [ 0xFE87 , 0xFE88 , nil , nil ] , # ALEF WITH HAMZA BELOW
43+ 0x0626 => [ 0xFE89 , 0xFE8A , 0xFE8B , 0xFE8C ] , # YEH WITH HAMZA ABOVE
44+ 0x0627 => [ 0xFE8D , 0xFE8E , nil , nil ] , # ALEF
45+ 0x0628 => [ 0xFE8F , 0xFE90 , 0xFE91 , 0xFE92 ] , # BEH
46+ 0x0629 => [ 0xFE93 , 0xFE94 , nil , nil ] , # TEH MARBUTA
47+ 0x062A => [ 0xFE95 , 0xFE96 , 0xFE97 , 0xFE98 ] , # TEH
48+ 0x062B => [ 0xFE99 , 0xFE9A , 0xFE9B , 0xFE9C ] , # THEH
49+ 0x062C => [ 0xFE9D , 0xFE9E , 0xFE9F , 0xFEA0 ] , # JEEM
50+ 0x062D => [ 0xFEA1 , 0xFEA2 , 0xFEA3 , 0xFEA4 ] , # HAH
51+ 0x062E => [ 0xFEA5 , 0xFEA6 , 0xFEA7 , 0xFEA8 ] , # KHAH
52+ 0x062F => [ 0xFEA9 , 0xFEAA , nil , nil ] , # DAL
53+ 0x0630 => [ 0xFEAB , 0xFEAC , nil , nil ] , # THAL
54+ 0x0631 => [ 0xFEAD , 0xFEAE , nil , nil ] , # REH
55+ 0x0632 => [ 0xFEAF , 0xFEB0 , nil , nil ] , # ZAIN
56+ 0x0633 => [ 0xFEB1 , 0xFEB2 , 0xFEB3 , 0xFEB4 ] , # SEEN
57+ 0x0634 => [ 0xFEB5 , 0xFEB6 , 0xFEB7 , 0xFEB8 ] , # SHEEN
58+ 0x0635 => [ 0xFEB9 , 0xFEBA , 0xFEBB , 0xFEBC ] , # SAD
59+ 0x0636 => [ 0xFEBD , 0xFEBE , 0xFEBF , 0xFEC0 ] , # DAD
60+ 0x0637 => [ 0xFEC1 , 0xFEC2 , 0xFEC3 , 0xFEC4 ] , # TAH
61+ 0x0638 => [ 0xFEC5 , 0xFEC6 , 0xFEC7 , 0xFEC8 ] , # ZAH
62+ 0x0639 => [ 0xFEC9 , 0xFECA , 0xFECB , 0xFECC ] , # AIN
63+ 0x063A => [ 0xFECD , 0xFECE , 0xFECF , 0xFED0 ] , # GHAIN
64+ 0x0640 => [ 0x0640 , 0x0640 , 0x0640 , 0x0640 ] , # TATWEEL
65+ 0x0641 => [ 0xFED1 , 0xFED2 , 0xFED3 , 0xFED4 ] , # FEH
66+ 0x0642 => [ 0xFED5 , 0xFED6 , 0xFED7 , 0xFED8 ] , # QAF
67+ 0x0643 => [ 0xFED9 , 0xFEDA , 0xFEDB , 0xFEDC ] , # KAF
68+ 0x0644 => [ 0xFEDD , 0xFEDE , 0xFEDF , 0xFEE0 ] , # LAM
69+ 0x0645 => [ 0xFEE1 , 0xFEE2 , 0xFEE3 , 0xFEE4 ] , # MEEM
70+ 0x0646 => [ 0xFEE5 , 0xFEE6 , 0xFEE7 , 0xFEE8 ] , # NOON
71+ 0x0647 => [ 0xFEE9 , 0xFEEA , 0xFEEB , 0xFEEC ] , # HEH
72+ 0x0648 => [ 0xFEED , 0xFEEE , nil , nil ] , # WAW
73+ 0x0649 => [ 0xFEEF , 0xFEF0 , nil , nil ] , # ALEF MAKSURA
74+ 0x064A => [ 0xFEF1 , 0xFEF2 , 0xFEF3 , 0xFEF4 ] , # YEH
7575
7676 # Extended Arabic (Farsi, Urdu, etc.)
77- 0x0671 => [ 0xFB50 , 0xFB51 , nil , nil ] , # ALEF WASLA
78- 0x0679 => [ 0xFB66 , 0xFB67 , 0xFB68 , 0xFB69 ] , # TTEH
79- 0x067A => [ 0xFB5E , 0xFB5F , 0xFB60 , 0xFB61 ] , # TTEHEH
80- 0x067B => [ 0xFB52 , 0xFB53 , 0xFB54 , 0xFB55 ] , # BEEH
81- 0x067E => [ 0xFB56 , 0xFB57 , 0xFB58 , 0xFB59 ] , # PEH
82- 0x067F => [ 0xFB62 , 0xFB63 , 0xFB64 , 0xFB65 ] , # TEHEH
83- 0x0680 => [ 0xFB5A , 0xFB5B , 0xFB5C , 0xFB5D ] , # BEHEH
84- 0x0683 => [ 0xFB76 , 0xFB77 , 0xFB78 , 0xFB79 ] , # NYEH
85- 0x0684 => [ 0xFB72 , 0xFB73 , 0xFB74 , 0xFB75 ] , # DYEH
86- 0x0686 => [ 0xFB7A , 0xFB7B , 0xFB7C , 0xFB7D ] , # TCHEH
87- 0x0687 => [ 0xFB7E , 0xFB7F , 0xFB80 , 0xFB81 ] , # TCHEHEH
88- 0x0688 => [ 0xFB88 , 0xFB89 , nil , nil ] , # DDAL
89- 0x068C => [ 0xFB84 , 0xFB85 , nil , nil ] , # DAHAL
90- 0x068D => [ 0xFB82 , 0xFB83 , nil , nil ] , # DDAHAL
91- 0x068E => [ 0xFB86 , 0xFB87 , nil , nil ] , # DUL
92- 0x0691 => [ 0xFB8C , 0xFB8D , nil , nil ] , # RREH
93- 0x0698 => [ 0xFB8A , 0xFB8B , nil , nil ] , # JEH
94- 0x06A4 => [ 0xFB6A , 0xFB6B , 0xFB6C , 0xFB6D ] , # VEH
95- 0x06A6 => [ 0xFB6E , 0xFB6F , 0xFB70 , 0xFB71 ] , # PEHEH
96- 0x06A9 => [ 0xFB8E , 0xFB8F , 0xFB90 , 0xFB91 ] , # KEHEH
97- 0x06AD => [ 0xFBD3 , 0xFBD4 , 0xFBD5 , 0xFBD6 ] , # NG
98- 0x06AF => [ 0xFB92 , 0xFB93 , 0xFB94 , 0xFB95 ] , # GAF
99- 0x06B1 => [ 0xFB9A , 0xFB9B , 0xFB9C , 0xFB9D ] , # NGOEH
100- 0x06B3 => [ 0xFB96 , 0xFB97 , 0xFB98 , 0xFB99 ] , # GUEH
101- 0x06BA => [ 0xFB9E , 0xFB9F , nil , nil ] , # NOON GHUNNA
102- 0x06BB => [ 0xFBA0 , 0xFBA1 , 0xFBA2 , 0xFBA3 ] , # RNOON
103- 0x06BE => [ 0xFBAA , 0xFBAB , 0xFBAC , 0xFBAD ] , # HEH DOACHASHMEE
104- 0x06C0 => [ 0xFBA4 , 0xFBA5 , nil , nil ] , # HEH WITH YEH ABOVE
105- 0x06C1 => [ 0xFBA6 , 0xFBA7 , 0xFBA8 , 0xFBA9 ] , # HEH GOAL
106- 0x06C5 => [ 0xFBE0 , 0xFBE1 , nil , nil ] , # KIRGHIZ OE
107- 0x06C6 => [ 0xFBD9 , 0xFBDA , nil , nil ] , # OE
108- 0x06C7 => [ 0xFBD7 , 0xFBD8 , nil , nil ] , # U
109- 0x06C8 => [ 0xFBDB , 0xFBDC , nil , nil ] , # YU
110- 0x06C9 => [ 0xFBE2 , 0xFBE3 , nil , nil ] , # KIRGHIZ YU
111- 0x06CB => [ 0xFBDE , 0xFBDF , nil , nil ] , # VE
112- 0x06CC => [ 0xFBFC , 0xFBFD , 0xFBFE , 0xFBFF ] , # FARSI YEH
113- 0x06D0 => [ 0xFBE4 , 0xFBE5 , 0xFBE6 , 0xFBE7 ] , # E
114- 0x06D2 => [ 0xFBAE , 0xFBAF , nil , nil ] , # YEH BARREE
115- 0x06D3 => [ 0xFBB0 , 0xFBB1 , nil , nil ] , # YEH BARREE WITH HAMZA ABOVE
77+ 0x0671 => [ 0xFB50 , 0xFB51 , nil , nil ] , # ALEF WASLA
78+ 0x0679 => [ 0xFB66 , 0xFB67 , 0xFB68 , 0xFB69 ] , # TTEH
79+ 0x067A => [ 0xFB5E , 0xFB5F , 0xFB60 , 0xFB61 ] , # TTEHEH
80+ 0x067B => [ 0xFB52 , 0xFB53 , 0xFB54 , 0xFB55 ] , # BEEH
81+ 0x067E => [ 0xFB56 , 0xFB57 , 0xFB58 , 0xFB59 ] , # PEH
82+ 0x067F => [ 0xFB62 , 0xFB63 , 0xFB64 , 0xFB65 ] , # TEHEH
83+ 0x0680 => [ 0xFB5A , 0xFB5B , 0xFB5C , 0xFB5D ] , # BEHEH
84+ 0x0683 => [ 0xFB76 , 0xFB77 , 0xFB78 , 0xFB79 ] , # NYEH
85+ 0x0684 => [ 0xFB72 , 0xFB73 , 0xFB74 , 0xFB75 ] , # DYEH
86+ 0x0686 => [ 0xFB7A , 0xFB7B , 0xFB7C , 0xFB7D ] , # TCHEH
87+ 0x0687 => [ 0xFB7E , 0xFB7F , 0xFB80 , 0xFB81 ] , # TCHEHEH
88+ 0x0688 => [ 0xFB88 , 0xFB89 , nil , nil ] , # DDAL
89+ 0x068C => [ 0xFB84 , 0xFB85 , nil , nil ] , # DAHAL
90+ 0x068D => [ 0xFB82 , 0xFB83 , nil , nil ] , # DDAHAL
91+ 0x068E => [ 0xFB86 , 0xFB87 , nil , nil ] , # DUL
92+ 0x0691 => [ 0xFB8C , 0xFB8D , nil , nil ] , # RREH
93+ 0x0698 => [ 0xFB8A , 0xFB8B , nil , nil ] , # JEH
94+ 0x06A4 => [ 0xFB6A , 0xFB6B , 0xFB6C , 0xFB6D ] , # VEH
95+ 0x06A6 => [ 0xFB6E , 0xFB6F , 0xFB70 , 0xFB71 ] , # PEHEH
96+ 0x06A9 => [ 0xFB8E , 0xFB8F , 0xFB90 , 0xFB91 ] , # KEHEH
97+ 0x06AD => [ 0xFBD3 , 0xFBD4 , 0xFBD5 , 0xFBD6 ] , # NG
98+ 0x06AF => [ 0xFB92 , 0xFB93 , 0xFB94 , 0xFB95 ] , # GAF
99+ 0x06B1 => [ 0xFB9A , 0xFB9B , 0xFB9C , 0xFB9D ] , # NGOEH
100+ 0x06B3 => [ 0xFB96 , 0xFB97 , 0xFB98 , 0xFB99 ] , # GUEH
101+ 0x06BA => [ 0xFB9E , 0xFB9F , nil , nil ] , # NOON GHUNNA
102+ 0x06BB => [ 0xFBA0 , 0xFBA1 , 0xFBA2 , 0xFBA3 ] , # RNOON
103+ 0x06BE => [ 0xFBAA , 0xFBAB , 0xFBAC , 0xFBAD ] , # HEH DOACHASHMEE
104+ 0x06C0 => [ 0xFBA4 , 0xFBA5 , nil , nil ] , # HEH WITH YEH ABOVE
105+ 0x06C1 => [ 0xFBA6 , 0xFBA7 , 0xFBA8 , 0xFBA9 ] , # HEH GOAL
106+ 0x06C5 => [ 0xFBE0 , 0xFBE1 , nil , nil ] , # KIRGHIZ OE
107+ 0x06C6 => [ 0xFBD9 , 0xFBDA , nil , nil ] , # OE
108+ 0x06C7 => [ 0xFBD7 , 0xFBD8 , nil , nil ] , # U
109+ 0x06C8 => [ 0xFBDB , 0xFBDC , nil , nil ] , # YU
110+ 0x06C9 => [ 0xFBE2 , 0xFBE3 , nil , nil ] , # KIRGHIZ YU
111+ 0x06CB => [ 0xFBDE , 0xFBDF , nil , nil ] , # VE
112+ 0x06CC => [ 0xFBFC , 0xFBFD , 0xFBFE , 0xFBFF ] , # FARSI YEH
113+ 0x06D0 => [ 0xFBE4 , 0xFBE5 , 0xFBE6 , 0xFBE7 ] , # E
114+ 0x06D2 => [ 0xFBAE , 0xFBAF , nil , nil ] , # YEH BARREE
115+ 0x06D3 => [ 0xFBB0 , 0xFBB1 , nil , nil ] , # YEH BARREE WITH HAMZA ABOVE
116116 } . freeze
117117
118118 # Lam-Alef mandatory ligatures
0 commit comments