1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #ifndef INCLUDED_I18NPOOL_SOURCE_NATIVENUMBER_DATA_NUMBERCHAR_H
21 #define INCLUDED_I18NPOOL_SOURCE_NATIVENUMBER_DATA_NUMBERCHAR_H
22 
23 #include <sal/types.h>
24 
25 namespace i18npool {
26 
27 const sal_Int16 NumberChar_HalfWidth     = 0;
28 const sal_Int16 NumberChar_FullWidth     = 1;
29 const sal_Int16 NumberChar_Lower_zh  = 2;
30 const sal_Int16 NumberChar_Upper_zh  = 3;
31 const sal_Int16 NumberChar_Upper_zh_TW   = 4;
32 const sal_Int16 NumberChar_Modern_ja     = 5;
33 const sal_Int16 NumberChar_Traditional_ja= 6;
34 const sal_Int16 NumberChar_Lower_ko  = 7;
35 const sal_Int16 NumberChar_Upper_ko  = 8;
36 const sal_Int16 NumberChar_Hangul_ko     = 9;
37 const sal_Int16 NumberChar_Indic_ar  = 10;
38 const sal_Int16 NumberChar_EastIndic_ar  = 11;
39 const sal_Int16 NumberChar_hi        = 12;
40 const sal_Int16 NumberChar_th        = 13;
41 const sal_Int16 NumberChar_or        = 14;
42 const sal_Int16 NumberChar_mr        = 15;
43 const sal_Int16 NumberChar_bn        = 16;
44 const sal_Int16 NumberChar_pa        = 17;
45 const sal_Int16 NumberChar_gu        = 18;
46 const sal_Int16 NumberChar_ta        = 19;
47 const sal_Int16 NumberChar_te        = 20;
48 const sal_Int16 NumberChar_kn        = 21;
49 const sal_Int16 NumberChar_ml        = 22;
50 const sal_Int16 NumberChar_lo        = 23;
51 const sal_Int16 NumberChar_bo        = 24;
52 const sal_Int16 NumberChar_my        = 25;
53 const sal_Int16 NumberChar_km        = 26;
54 const sal_Int16 NumberChar_mn        = 27;
55 const sal_Int16 NumberChar_he        = 28;
56 const sal_Int16 NumberChar_ne        = 29;
57 const sal_Int16 NumberChar_dz        = 30;
58 const sal_Int16 NumberChar_cu        = 31;
59 const sal_Int16 NumberChar_Count     = 32;
60 
61 const sal_Unicode NumberChar[][10] = {
62 //  0   1   2   3   4   5   6   7   8   9
63     { 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039 }, // Half Width (Ascii)
64     { 0xFF10, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 0xFF18, 0xFF19 }, // Full Width
65     { 0x3007, 0x4E00, 0x4E8c, 0x4E09, 0x56DB, 0x4E94, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Chinese Lower
66     { 0x96F6, 0x58F9, 0x8D30, 0x53C1, 0x8086, 0x4F0D, 0x9646, 0x67D2, 0x634C, 0x7396 }, // S. Chinese Upper
67     { 0x96F6, 0x58F9, 0x8CB3, 0x53C3, 0x8086, 0x4F0D, 0x9678, 0x67D2, 0x634C, 0x7396 }, // T. Chinese Upper
68     { 0x3007, 0x4E00, 0x4E8C, 0x4E09, 0x56DB, 0x4E94, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Japanese Modern
69     { 0x96F6, 0x58F1, 0x5F10, 0x53C2, 0x56DB, 0x4F0D, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Japanese Trad.
70     { 0x96F6, 0x4E00, 0x4E8C, 0x4E09, 0x56DB, 0x4E94, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Korean Lower
71     { 0x96F6, 0x58F9, 0x8CB3, 0x53C3, 0x56DB, 0x4F0D, 0x516D, 0x4E03, 0x516B, 0x4E5D }, // Korean Upper
72     { 0xC601, 0xC77C, 0xC774, 0xC0BC, 0xC0AC, 0xC624, 0xC721, 0xCE60, 0xD314, 0xAD6C }, // Korean Hangul
73     { 0x0660, 0x0661, 0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667, 0x0668, 0x0669 }, // Arabic Indic
74     { 0x06F0, 0x06F1, 0x06F2, 0x06F3, 0x06F4, 0x06F5, 0x06F6, 0x06F7, 0x06F8, 0x06F9 }, // Est. Arabic Indic
75     { 0x0966, 0x0967, 0x0968, 0x0969, 0x096A, 0x096B, 0x096C, 0x096D, 0x096E, 0x096F }, // Indic (Devanagari)
76     { 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, 0x0E58, 0x0E59 }, // Thai
77     { 0x0866, 0x0867, 0x0868, 0x0869, 0x086A, 0x086B, 0x086C, 0x086D, 0x086E, 0x086F }, // Odia
78     { 0x0966, 0x0967, 0x0968, 0x0969, 0x096A, 0x096B, 0x096C, 0x096D, 0x096E, 0x096F }, // Marathi
79     { 0x09E6, 0x09E7, 0x09E8, 0x09E9, 0x09EA, 0x09EB, 0x09EC, 0x09ED, 0x09EE, 0x09EF }, // Bengali
80     { 0x0A66, 0x0A67, 0x0A68, 0x0A69, 0x0A6A, 0x0A6B, 0x0A6C, 0x0A6D, 0x0A6E, 0x0A6F }, // Punjabi (Gurmukhi)
81     { 0x0AE6, 0x0AE7, 0x0AE8, 0x0AE9, 0x0AEA, 0x0AEB, 0x0AEC, 0x0AED, 0x0AEE, 0x0AEF }, // Gujarati
82     { 0x0030, 0x0BE7, 0x0BE8, 0x0BE9, 0x0BEA, 0x0BEB, 0x0BEC, 0x0BED, 0x0BEE, 0x0BEF }, // Tamil
83     { 0x0C66, 0x0C67, 0x0C68, 0x0C69, 0x0C6A, 0x0C6B, 0x0C6C, 0x0C6D, 0x0C6E, 0x0C6F }, // Telugu
84     { 0x0CE6, 0x0CE7, 0x0CE8, 0x0CE9, 0x0CEA, 0x0CEB, 0x0CEC, 0x0CED, 0x0CEE, 0x0CEF }, // Kannada
85     { 0x0DE6, 0x0DE7, 0x0DE8, 0x0DE9, 0x0DEA, 0x0DEB, 0x0DEC, 0x0DED, 0x0DEE, 0x0DEF }, // Malayalam
86     { 0x0ED0, 0x0ED1, 0x0ED2, 0x0ED3, 0x0ED4, 0x0ED5, 0x0ED6, 0x0ED7, 0x0ED8, 0x0ED9 }, // Lao
87     { 0x0F20, 0x0F21, 0x0F22, 0x0F23, 0x0F24, 0x0F25, 0x0F26, 0x0F27, 0x0F28, 0x0F29 }, // Tibetan
88     { 0x1040, 0x1041, 0x1042, 0x1043, 0x1044, 0x1045, 0x1046, 0x1047, 0x1048, 0x1049 }, // Myanmar
89     { 0x17E0, 0x17E1, 0x17E2, 0x17E3, 0x17E4, 0x17E5, 0x17E6, 0x17E7, 0x17E8, 0x17E9 }, // Cambodian (Khmer)
90     { 0x1810, 0x1811, 0x1812, 0x1813, 0x1814, 0x1815, 0x1816, 0x1817, 0x1818, 0x1819 }, // Mongolian
91     { 0x0020, 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8 }, // Hebrew
92     { 0x0966, 0x0967, 0x0968, 0x0969, 0x096A, 0x096B, 0x096C, 0x096D, 0x096E, 0x096F }, // Nepali
93     { 0x0F20, 0x0F21, 0x0F22, 0x0F23, 0x0F24, 0x0F25, 0x0F26, 0x0F27, 0x0F28, 0x0F29 }, // Dzongkha
94     { 0x0030, 0x0430, 0x0432, 0x0433, 0x0434, 0x0454, 0x0455, 0x0437, 0x0438, 0x0473 }, // Church Slavic
95 };
96 
97 static sal_Unicode DecimalChar[] = {
98     0x002E, // Half Width (Ascii)
99     0xFF0E, // Full Width
100     0xFF0E, // Chinese Lower
101     0x70B9, // S. Chinese Upper
102     0x9EDE, // T. Chinese Upper
103     0x30FB, // Japanese Modern
104     0x30FB, // Japanese Trad.
105     0xFF0E, // Korean Lower
106     0x9EDE, // Korean Upper
107     0xC810, // Korean Hangul
108     0x066B, // Arabic Indic
109     0x066B, // Est. Arabic Indic
110     0x0000, // Indic (Devanagari)
111     0x0000, // Thai
112     0x0000, // Odia
113     0x0000, // Marathi
114     0x0000, // Bengali
115     0x0000, // Punjabi (Gurmukhi)
116     0x0000, // Gujarati
117     0x0000, // Tamil
118     0x0000, // Telugu
119     0x0000, // Kannada
120     0x0000, // Malayalam
121     0x0000, // Lao
122     0x0000, // Tibetan
123     0x0000, // Myanmar
124     0x0000, // Cambodian (Khmer)
125     0x0000, // Mongolian
126     0x0000, // Hebrew
127     0x0000, // Nepali
128     0x0000, // Dzongkha
129     0x0000, // Church Slavic
130 };
131 
132 const sal_Unicode MinusChar[] = {
133     0x002D, // Half Width (Ascii)
134     0xFF0D, // Full Width
135     0xFF0D, // Chinese Lower
136     0x8D1F, // S. Chinese Upper
137     0x8CA0, // T. Chinese Upper
138     0x2212, // Japanese Modern
139     0x2212, // Japanese Trad.
140     0xFF0D, // Korean Lower
141     0xFF0D, // Korean Upper
142     0xFF0D, // Korean Hangul
143     0x0000, // Arabic Indic
144     0x2212, // Est. Arabic Indic
145     0x0000, // Indic
146     0x0000, // Thai
147     0x0000, // Odia
148     0x0000, // Marathi
149     0x0000, // Bengali
150     0x0000, // Punjabi
151     0x0000, // Gujarati
152     0x0000, // Tamil
153     0x0000, // Telugu
154     0x0000, // Kannada
155     0x0000, // Malayalam
156     0x0000, // Lao
157     0x0000, // Tibetan
158     0x0000, // Myanmar
159     0x0000, // Cambodian (Khmer)
160     0x0000, // Mongolian
161     0x0000, // Hebrew
162     0x0000, // Nepali
163     0x0000, // Dzongkha
164     0x0000, // Church Slavic
165 };
166 
167 static sal_uInt16 SeparatorChar[] = {
168     0x002C, // Half Width (Ascii)
169     0xFF0C, // Full Width
170     0x3001, // Chinese Lower
171     0x3001, // S. Chinese Upper
172     0x3001, // T. Chinese Upper
173     0x3001, // Japanese Modern
174     0x3001, // Japanese Trad.
175     0x002C, // Korean Lower
176     0x002C, // Korean Upper
177     0x002C, // Korean Hangul
178     0x0000, // Arabic Indic
179     0x066C, // Est. Arabic Indic
180     0x0000, // Indic
181     0x0000, // Thai
182     0x0000, // Odia
183     0x0000, // Marathi
184     0x0000, // Bengali
185     0x0000, // Punjabi
186     0x0000, // Gujarati
187     0x0000, // Tamil
188     0x0000, // Telugu
189     0x0000, // Kannada
190     0x0000, // Malayalam
191     0x0000, // Lao
192     0x0000, // Tibetan
193     0x0000, // Myanmar
194     0x0000, // Cambodian (Khmer)
195     0x0000, // Mongolian
196     0x0000, // Hebrew
197     0x0000, // Nepali
198     0x0000, // Dzongkha
199     0x0000, // Church Slavic
200 };
201 
202 #define NUMBER_ZERO     NumberChar[NumberChar_HalfWidth][0] // 0x0030
203 #define NUMBER_ONE      NumberChar[NumberChar_HalfWidth][1]     // 0x0031
204 #define NUMBER_NINE     NumberChar[NumberChar_HalfWidth][9]     // 0x0039
205 #define isNumber(n)     ( NUMBER_ZERO <= n && n <= NUMBER_NINE )
206 #define isDecimal(n)    ( n == DecimalChar[NumberChar_HalfWidth] )
207 #define isMinus(n)      ( n == MinusChar[NumberChar_HalfWidth] )
208 #define isSeparator(n)  ( n == SeparatorChar[NumberChar_HalfWidth] )
209 
210 const sal_Int16 Multiplier_Lower_zh     = 0;
211 const sal_Int16 Multiplier_Upper_zh     = 1;
212 const sal_Int16 Multiplier_Lower_zh_TW  = 2;
213 const sal_Int16 Multiplier_Upper_zh_TW  = 3;
214 const sal_Int16 Multiplier_Lower_ko     = 4;
215 const sal_Int16 Multiplier_Upper_ko     = 5;
216 const sal_Int16 Multiplier_Hangul_ko    = 6;
217 const sal_Int16 Multiplier_Modern_ja    = 7;
218 const sal_Int16 Multiplier_Traditional_ja = 8;
219 const sal_Int16 Multiplier_Count    = 9;
220 
221 const sal_Int16 ExponentCount_6_CJK     = 6;
222 
223 const sal_Int16 MultiplierExponent_6_CJK[ExponentCount_6_CJK] = {
224         12,     8,  4,  3,  2,  1
225 };
226 const sal_Unicode MultiplierChar_6_CJK[][ExponentCount_6_CJK] = {
227     {0x5146, 0x4EBF, 0x4E07, 0x5343, 0x767E, 0x5341},   // S. Chinese Lower
228     {0x5146, 0x4EBF, 0x4E07, 0x4EDF, 0x4F70, 0x62FE},   // S. Chinese Upper
229     {0x5146, 0x5104, 0x842C, 0x5343, 0x767E, 0x5341},   // T. Chinese Lower
230     {0x5146, 0x5104, 0x842C, 0x4EDF, 0x4F70, 0x62FE},   // T. Chinese Upper
231     {0x5146, 0x5104, 0x4E07, 0x5343, 0x767E, 0x5341},   // Korean Lower
232     {0x5146, 0x5104, 0x842C, 0x9621, 0x4F70, 0x62FE},   // Korean Upper
233     {0xC870, 0xC5B5, 0xB9CC, 0xCC9C, 0xBC31, 0xC2ED},   // Korean Hangul
234     {0x5146, 0x5104, 0x4E07, 0x5343, 0x767E, 0x5341},   // Japanese Modern
235     {0x5146, 0x5104, 0x842C, 0x9621, 0x767E, 0x62FE}    // Japanese Traditional
236 };
237 
238 const sal_Int16 ExponentCount_2_CJK     = 2;
239 
240 const sal_Int16 MultiplierExponent_2_CJK[ExponentCount_2_CJK] = {
241         8,  4,
242 };
243 
244 const sal_Unicode MultiplierChar_2_CJK[][ExponentCount_2_CJK] = {
245     {0x4EBF, 0x4E07}, // S. Chinese Lower
246     {0x4EBF, 0x4E07}, // S. Chinese Upper
247     {0x5104, 0x842C}, // T. Chinese Lower
248     {0x5104, 0x842C}, // T. Chinese Upper
249     {0x5104, 0x4E07}, // Korean Lower
250     {0x5104, 0x842C}, // Korean Upper
251     {0xC5B5, 0xB9CC}, // Korean Hangul
252     {0x5104, 0x4E07}, // Japanese Modern
253     {0x5104, 0x842C}  // Japanese Traditional
254 };
255 
256 const sal_Int16 ExponentCount_7_CJK     = 7;
257 
258 const sal_Int16 MultiplierExponent_7_CJK[ExponentCount_7_CJK] = {
259         16, 12,     8,  4,  3,  2,  1
260 };
261 const sal_Unicode MultiplierChar_7_CJK[][ExponentCount_7_CJK] = {
262     {0x4EAC, 0x5146, 0x4EBF, 0x4E07, 0x5343, 0x767E, 0x5341},   // S. Chinese Lower
263     {0x4EAC, 0x5146, 0x4EBF, 0x4E07, 0x4EDF, 0x4F70, 0x62FE},   // S. Chinese Upper
264     {0x4EAC, 0x5146, 0x5104, 0x842C, 0x5343, 0x767E, 0x5341},   // T. Chinese Lower
265     {0x4EAC, 0x5146, 0x5104, 0x842C, 0x4EDF, 0x4F70, 0x62FE},   // T. Chinese Upper
266     {0x4EAC, 0x5146, 0x5104, 0x4E07, 0x5343, 0x767E, 0x5341},   // Korean Lower
267     {0x4EAC, 0x5146, 0x5104, 0x842C, 0x9621, 0x4F70, 0x62FE},   // Korean Upper
268     {0x4EAC, 0xC870, 0xC5B5, 0xB9CC, 0xCC9C, 0xBC31, 0xC2ED},   // Korean Hangul
269     {0x4EAC, 0x5146, 0x5104, 0x4E07, 0x5343, 0x767E, 0x5341},   // Japanese Modern
270     {0x4EAC, 0x5146, 0x5104, 0x842C, 0x9621, 0x767E, 0x62FE}    // Japanese Traditional
271 };
272 
273 }
274 
275 #endif // INCLUDED_I18NPOOL_SOURCE_NATIVENUMBER_DATA_NUMBERCHAR_H
276 
277 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
278