xref: /core/sal/textenc/tencinfo.cxx (revision 2e71c439)
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <sal/config.h>
21 
22 #include <cstring>
23 
24 #include <rtl/tencinfo.h>
25 
26 #include "gettextencodingdata.hxx"
27 #include "tenchelp.hxx"
28 #include <memory>
29 
rtl_isOctetTextEncoding(rtl_TextEncoding nEncoding)30 sal_Bool SAL_CALL rtl_isOctetTextEncoding(rtl_TextEncoding nEncoding)
31 {
32     return
33         nEncoding > RTL_TEXTENCODING_DONTKNOW
34         && nEncoding != 9 // RTL_TEXTENCODING_SYSTEM
35         && nEncoding <= RTL_TEXTENCODING_MAZOVIA; // always update this!
36 }
37 
38 /* ======================================================================= */
39 
Impl_toAsciiLower(const char * pName,char * pBuf)40 static void Impl_toAsciiLower( const char* pName, char* pBuf )
41 {
42     while ( *pName )
43     {
44         /* A-Z */
45         if ( (*pName >= 0x41) && (*pName <= 0x5A) )
46             *pBuf = (*pName)+0x20;  /* toAsciiLower */
47         else
48             *pBuf = *pName;
49 
50         pBuf++;
51         pName++;
52     }
53 
54     *pBuf = '\0';
55 }
56 
57 /* ----------------------------------------------------------------------- */
58 
Impl_toAsciiLowerAndRemoveNonAlphanumeric(const char * pName,char * pBuf)59 static void Impl_toAsciiLowerAndRemoveNonAlphanumeric( const char* pName, char* pBuf )
60 {
61     while ( *pName )
62     {
63         /* A-Z */
64         if ( (*pName >= 0x41) && (*pName <= 0x5A) )
65         {
66             *pBuf = (*pName)+0x20;  /* toAsciiLower */
67             pBuf++;
68         }
69         /* a-z, 0-9 */
70         else if ( ((*pName >= 0x61) && (*pName <= 0x7A)) ||
71                   ((*pName >= 0x30) && (*pName <= 0x39)) )
72         {
73             *pBuf = *pName;
74             pBuf++;
75         }
76 
77         pName++;
78     }
79 
80     *pBuf = '\0';
81 }
82 
83 /* ----------------------------------------------------------------------- */
84 
85 /* pMatchStr must match with all characters in pCompStr */
Impl_matchString(const char * pCompStr,const char * pMatchStr)86 static bool Impl_matchString( const char* pCompStr, const char* pMatchStr )
87 {
88     /* We test only for end in MatchStr, because the last 0 character from */
89     /* pCompStr is unequal a character in MatchStr, so the loop terminates */
90     while ( *pMatchStr )
91     {
92         if ( *pCompStr != *pMatchStr )
93             return false;
94 
95         pCompStr++;
96         pMatchStr++;
97     }
98 
99     return true;
100 }
101 
102 /* ======================================================================= */
103 
104 namespace {
105 
106 struct ImplStrCharsetDef
107 {
108     const char*             mpCharsetStr;
109     rtl_TextEncoding        meTextEncoding;
110 };
111 
112 struct ImplStrFirstPartCharsetDef
113 {
114     const char*             mpCharsetStr;
115     const ImplStrCharsetDef*    mpSecondPartTab;
116 };
117 
118 }
119 
120 /* ======================================================================= */
121 
rtl_getTextEncodingInfo(rtl_TextEncoding eTextEncoding,rtl_TextEncodingInfo * pEncInfo)122 sal_Bool SAL_CALL rtl_getTextEncodingInfo( rtl_TextEncoding eTextEncoding, rtl_TextEncodingInfo* pEncInfo )
123 {
124     const ImplTextEncodingData* pData;
125 
126     pData = Impl_getTextEncodingData( eTextEncoding );
127     if ( !pData )
128     {
129         /* HACK: For not implemented encoding, because not all
130            calls handle the errors */
131         if ( pEncInfo->StructSize < 5 )
132             return false;
133         pEncInfo->MinimumCharSize = 1;
134 
135         if ( pEncInfo->StructSize < 6 )
136             return true;
137         pEncInfo->MaximumCharSize = 1;
138 
139         if ( pEncInfo->StructSize < 7 )
140             return true;
141         pEncInfo->AverageCharSize = 1;
142 
143         if ( pEncInfo->StructSize < 12 )
144             return true;
145         pEncInfo->Flags = 0;
146 
147         return false;
148     }
149 
150     if ( pEncInfo->StructSize < 5 )
151         return false;
152     pEncInfo->MinimumCharSize = pData->mnMinCharSize;
153 
154     if ( pEncInfo->StructSize < 6 )
155         return true;
156     pEncInfo->MaximumCharSize = pData->mnMaxCharSize;
157 
158     if ( pEncInfo->StructSize < 7 )
159         return true;
160     pEncInfo->AverageCharSize = pData->mnAveCharSize;
161 
162     if ( pEncInfo->StructSize < 12 )
163         return true;
164     pEncInfo->Flags = pData->mnInfoFlags;
165 
166     return true;
167 }
168 
169 /* ======================================================================= */
170 
rtl_getTextEncodingFromWindowsCharset(sal_uInt8 nWinCharset)171 rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromWindowsCharset( sal_uInt8 nWinCharset )
172 {
173     rtl_TextEncoding eTextEncoding;
174 
175     switch ( nWinCharset )
176     {
177         case 0:     eTextEncoding = RTL_TEXTENCODING_MS_1252; break;    /* ANSI_CHARSET */
178         case 2:     eTextEncoding = RTL_TEXTENCODING_SYMBOL; break;     /* SYMBOL_CHARSET */
179         case 77:    eTextEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break;/* MAC_CHARSET */
180         case 128:   eTextEncoding = RTL_TEXTENCODING_MS_932; break;     /* SHIFTJIS_CHARSET */
181         case 129:   eTextEncoding = RTL_TEXTENCODING_MS_949; break;     /* HANGEUL_CHARSET */
182         case 130:   eTextEncoding = RTL_TEXTENCODING_MS_1361; break;    /* JOHAB_CHARSET */
183         case 134:   eTextEncoding = RTL_TEXTENCODING_MS_936; break;     /* GB2312_CHARSET */
184         case 136:   eTextEncoding = RTL_TEXTENCODING_MS_950; break;     /* CHINESEBIG5_CHARSET */
185         case 161:   eTextEncoding = RTL_TEXTENCODING_MS_1253; break;    /* GREEK_CHARSET */
186         case 162:   eTextEncoding = RTL_TEXTENCODING_MS_1254; break;    /* TURKISH_CHARSET */
187         case 163:   eTextEncoding = RTL_TEXTENCODING_MS_1258; break;    /* VIETNAMESE_CHARSET !!! */
188         case 177:   eTextEncoding = RTL_TEXTENCODING_MS_1255; break;    /* HEBREW_CHARSET */
189         case 178:   eTextEncoding = RTL_TEXTENCODING_MS_1256; break;    /* ARABIC_CHARSET */
190         case 186:   eTextEncoding = RTL_TEXTENCODING_MS_1257; break;    /* BALTIC_CHARSET */
191         case 204:   eTextEncoding = RTL_TEXTENCODING_MS_1251; break;    /* RUSSIAN_CHARSET */
192         case 222:   eTextEncoding = RTL_TEXTENCODING_MS_874; break;     /* THAI_CHARSET */
193         case 238:   eTextEncoding = RTL_TEXTENCODING_MS_1250; break;    /* EASTEUROPE_CHARSET */
194         case 255:   eTextEncoding = RTL_TEXTENCODING_IBM_850; break;    /* OEM_CHARSET */
195         default:    eTextEncoding = RTL_TEXTENCODING_DONTKNOW; break;
196     }
197 
198     return eTextEncoding;
199 }
200 
201 /* ----------------------------------------------------------------------- */
202 
rtl_getTextEncodingFromUnixCharset(const char * pUnixCharset)203 rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromUnixCharset( const char* pUnixCharset )
204 {
205     /* See <ftp://ftp.x.org/pub/DOCS/registry>, section 14 ("Font Charset
206      * (Registry and Encoding) Names").
207      */
208 
209     /* All Identifiers in the tables are lower case The function search */
210     /* for the first matching string in the tables. */
211     /* Sort order: unique (first 14, then 1), important */
212 
213     static ImplStrCharsetDef const aUnixCharsetISOTab[] =
214     {
215         { "15", RTL_TEXTENCODING_ISO_8859_15 },
216         { "14", RTL_TEXTENCODING_ISO_8859_14 },
217         { "13", RTL_TEXTENCODING_ISO_8859_13 },
218         { "11", RTL_TEXTENCODING_TIS_620 },
219         { "10", RTL_TEXTENCODING_ISO_8859_10 },
220         { "1", RTL_TEXTENCODING_ISO_8859_1 },
221         { "2", RTL_TEXTENCODING_ISO_8859_2 },
222         { "3", RTL_TEXTENCODING_ISO_8859_3 },
223         { "4", RTL_TEXTENCODING_ISO_8859_4 },
224         { "5", RTL_TEXTENCODING_ISO_8859_5 },
225         { "6", RTL_TEXTENCODING_ISO_8859_6 },
226         { "7", RTL_TEXTENCODING_ISO_8859_7 },
227         { "8", RTL_TEXTENCODING_ISO_8859_8 },
228         { "9", RTL_TEXTENCODING_ISO_8859_9 },
229         { nullptr, RTL_TEXTENCODING_DONTKNOW }
230     };
231 
232     static ImplStrCharsetDef const aUnixCharsetADOBETab[] =
233     {
234         { "fontspecific", RTL_TEXTENCODING_SYMBOL },
235         { nullptr, RTL_TEXTENCODING_DONTKNOW }
236     };
237 
238     static ImplStrCharsetDef const aUnixCharsetMSTab[] =
239     {
240         { "1252", RTL_TEXTENCODING_MS_1252 },
241         { "1250", RTL_TEXTENCODING_MS_1250 },
242         { "1251", RTL_TEXTENCODING_MS_1251 },
243         { "1253", RTL_TEXTENCODING_MS_1253 },
244         { "1254", RTL_TEXTENCODING_MS_1254 },
245         { "1255", RTL_TEXTENCODING_MS_1255 },
246         { "1256", RTL_TEXTENCODING_MS_1256 },
247         { "1257", RTL_TEXTENCODING_MS_1257 },
248         { "1258", RTL_TEXTENCODING_MS_1258 },
249         { "932", RTL_TEXTENCODING_MS_932 },
250         { "936", RTL_TEXTENCODING_MS_936 },
251         { "949", RTL_TEXTENCODING_MS_949 },
252         { "950", RTL_TEXTENCODING_MS_950 },
253         { "1361", RTL_TEXTENCODING_MS_1361 },
254         { "cp1252", RTL_TEXTENCODING_MS_1252 },
255         { "cp1250", RTL_TEXTENCODING_MS_1250 },
256         { "cp1251", RTL_TEXTENCODING_MS_1251 },
257         { "cp1253", RTL_TEXTENCODING_MS_1253 },
258         { "cp1254", RTL_TEXTENCODING_MS_1254 },
259         { "cp1255", RTL_TEXTENCODING_MS_1255 },
260         { "cp1256", RTL_TEXTENCODING_MS_1256 },
261         { "cp1257", RTL_TEXTENCODING_MS_1257 },
262         { "cp1258", RTL_TEXTENCODING_MS_1258 },
263         { "cp932", RTL_TEXTENCODING_MS_932 },
264         { "cp936", RTL_TEXTENCODING_MS_936 },
265         { "cp949", RTL_TEXTENCODING_MS_949 },
266         { "cp950", RTL_TEXTENCODING_MS_950 },
267         { "cp1361", RTL_TEXTENCODING_MS_1361 },
268         { nullptr, RTL_TEXTENCODING_DONTKNOW }
269     };
270 
271     static ImplStrCharsetDef const aUnixCharsetIBMTab[] =
272     {
273         { "437", RTL_TEXTENCODING_IBM_437 },
274         { "850", RTL_TEXTENCODING_IBM_850 },
275         { "860", RTL_TEXTENCODING_IBM_860 },
276         { "861", RTL_TEXTENCODING_IBM_861 },
277         { "863", RTL_TEXTENCODING_IBM_863 },
278         { "865", RTL_TEXTENCODING_IBM_865 },
279         { "737", RTL_TEXTENCODING_IBM_737 },
280         { "775", RTL_TEXTENCODING_IBM_775 },
281         { "852", RTL_TEXTENCODING_IBM_852 },
282         { "855", RTL_TEXTENCODING_IBM_855 },
283         { "857", RTL_TEXTENCODING_IBM_857 },
284         { "862", RTL_TEXTENCODING_IBM_862 },
285         { "864", RTL_TEXTENCODING_IBM_864 },
286         { "866", RTL_TEXTENCODING_IBM_866 },
287         { "869", RTL_TEXTENCODING_IBM_869 },
288         { "874", RTL_TEXTENCODING_MS_874 },
289         { "1004", RTL_TEXTENCODING_MS_1252 },
290         { "65400", RTL_TEXTENCODING_SYMBOL },
291         { nullptr, RTL_TEXTENCODING_DONTKNOW }
292     };
293 
294     static ImplStrCharsetDef const aUnixCharsetKOI8Tab[] =
295     {
296         { "r", RTL_TEXTENCODING_KOI8_R },
297         { "u", RTL_TEXTENCODING_KOI8_U },
298         { nullptr, RTL_TEXTENCODING_DONTKNOW }
299     };
300 
301     static ImplStrCharsetDef const aUnixCharsetJISX0208Tab[] =
302     {
303         { nullptr, RTL_TEXTENCODING_JIS_X_0208 }
304     };
305 
306     static ImplStrCharsetDef const aUnixCharsetJISX0201Tab[] =
307     {
308         { nullptr, RTL_TEXTENCODING_JIS_X_0201 }
309     };
310 
311     static ImplStrCharsetDef const aUnixCharsetJISX0212Tab[] =
312     {
313         { nullptr, RTL_TEXTENCODING_JIS_X_0212 }
314     };
315 
316     static ImplStrCharsetDef const aUnixCharsetGBTab[] =
317     {
318         { nullptr, RTL_TEXTENCODING_GB_2312 }
319     };
320 
321     static ImplStrCharsetDef const aUnixCharsetGBKTab[] =
322     {
323         { nullptr, RTL_TEXTENCODING_GBK }
324     };
325 
326     static ImplStrCharsetDef const aUnixCharsetBIG5Tab[] =
327     {
328         { nullptr, RTL_TEXTENCODING_BIG5 }
329     };
330 
331     static ImplStrCharsetDef const aUnixCharsetKSC56011987Tab[] =
332     {
333         { nullptr, RTL_TEXTENCODING_EUC_KR }
334     };
335 
336     static ImplStrCharsetDef const aUnixCharsetKSC56011992Tab[] =
337     {
338         { nullptr, RTL_TEXTENCODING_MS_1361 }
339     };
340 
341     static ImplStrCharsetDef const aUnixCharsetISO10646Tab[] =
342     {
343         { nullptr, RTL_TEXTENCODING_UNICODE }
344     };
345 
346     static ImplStrCharsetDef const aUnixCharsetUNICODETab[] =
347     {
348 /* Currently every Unicode Encoding is for us Unicode */
349 /*        { "fontspecific", RTL_TEXTENCODING_UNICODE }, */
350         { nullptr, RTL_TEXTENCODING_UNICODE }
351     };
352 
353     static ImplStrCharsetDef const aUnixCharsetSymbolTab[] =
354     {
355         { nullptr, RTL_TEXTENCODING_SYMBOL }
356     };
357 
358     /* See <http://cvs.freedesktop.org/xorg/xc/fonts/encodings/iso8859-11.enc?
359        rev=1.1.1.1>: */
360     static ImplStrCharsetDef const aUnixCharsetTIS620Tab[] =
361     {
362         { "0", RTL_TEXTENCODING_TIS_620 },
363         { "2529", RTL_TEXTENCODING_TIS_620 },
364         { "2533", RTL_TEXTENCODING_TIS_620 },
365         { nullptr, RTL_TEXTENCODING_DONTKNOW }
366     };
367     static ImplStrCharsetDef const aUnixCharsetTIS6202529Tab[] =
368     {
369         { "1", RTL_TEXTENCODING_TIS_620 },
370         { nullptr, RTL_TEXTENCODING_DONTKNOW }
371     };
372     static ImplStrCharsetDef const aUnixCharsetTIS6202533Tab[] =
373     {
374         { "0", RTL_TEXTENCODING_TIS_620 },
375         { "1", RTL_TEXTENCODING_TIS_620 },
376         { nullptr, RTL_TEXTENCODING_DONTKNOW }
377     };
378 
379     static ImplStrFirstPartCharsetDef const aUnixCharsetFirstPartTab[] =
380     {
381         { "iso8859", aUnixCharsetISOTab },
382         { "adobe", aUnixCharsetADOBETab },
383         { "ansi", aUnixCharsetMSTab },
384         { "microsoft", aUnixCharsetMSTab },
385         { "ibm", aUnixCharsetIBMTab },
386         { "koi8", aUnixCharsetKOI8Tab },
387         { "jisx0208", aUnixCharsetJISX0208Tab },
388         { "jisx0208.1983", aUnixCharsetJISX0208Tab },
389         { "jisx0201", aUnixCharsetJISX0201Tab },
390         { "jisx0201.1976", aUnixCharsetJISX0201Tab },
391         { "jisx0212", aUnixCharsetJISX0212Tab },
392         { "jisx0212.1990", aUnixCharsetJISX0212Tab },
393         { "gb2312", aUnixCharsetGBTab },
394         { "gbk", aUnixCharsetGBKTab },
395         { "big5", aUnixCharsetBIG5Tab },
396         { "iso10646", aUnixCharsetISO10646Tab },
397 /*      { "unicode", aUnixCharsetUNICODETab }, */ /* fonts contain only default chars */
398         { "sunolcursor", aUnixCharsetSymbolTab },
399         { "sunolglyph", aUnixCharsetSymbolTab },
400         { "iso10646", aUnixCharsetUNICODETab },
401         { "ksc5601.1987", aUnixCharsetKSC56011987Tab },
402         { "ksc5601.1992", aUnixCharsetKSC56011992Tab },
403         { "tis620.2529", aUnixCharsetTIS6202529Tab },
404         { "tis620.2533", aUnixCharsetTIS6202533Tab },
405         { "tis620", aUnixCharsetTIS620Tab },
406 /*        { "sunudcja.1997",  },        */
407 /*        { "sunudcko.1997",  },        */
408 /*        { "sunudczh.1997",  },        */
409 /*        { "sunudczhtw.1997",  },      */
410         { nullptr, nullptr }
411     };
412 
413     rtl_TextEncoding    eEncoding = RTL_TEXTENCODING_DONTKNOW;
414     char*           pTempBuf;
415     sal_uInt32          nBufLen = strlen( pUnixCharset )+1;
416     const char*     pFirstPart;
417     const char*     pSecondPart;
418 
419     /* Alloc Buffer and map to lower case */
420     std::unique_ptr<char[]> pBuf(new char[nBufLen]);
421     Impl_toAsciiLower( pUnixCharset, pBuf.get() );
422 
423     /* Search FirstPart */
424     pFirstPart = pBuf.get();
425     pSecondPart = nullptr;
426     pTempBuf = pBuf.get();
427     while ( *pTempBuf )
428     {
429         if ( *pTempBuf == '-' )
430         {
431             *pTempBuf = '\0';
432             pSecondPart = pTempBuf+1;
433             break;
434         }
435 
436         pTempBuf++;
437     }
438 
439     /* found part separator */
440     if ( pSecondPart )
441     {
442         /* Search for the part tab */
443         const ImplStrFirstPartCharsetDef* pFirstPartData = aUnixCharsetFirstPartTab;
444         while ( pFirstPartData->mpCharsetStr )
445         {
446             if ( Impl_matchString( pFirstPart, pFirstPartData->mpCharsetStr ) )
447             {
448                 /* Search for the charset in the second part tab */
449                 const ImplStrCharsetDef* pData = pFirstPartData->mpSecondPartTab;
450                 while ( pData->mpCharsetStr )
451                 {
452                     if ( Impl_matchString( pSecondPart, pData->mpCharsetStr ) )
453                     {
454                         break;
455                     }
456 
457                     pData++;
458                 }
459 
460                 /* use default encoding for first part */
461                 eEncoding = pData->meTextEncoding;
462                 break;
463             }
464 
465             pFirstPartData++;
466         }
467     }
468 
469     return eEncoding;
470 }
471 
472 /* ----------------------------------------------------------------------- */
473 
rtl_getTextEncodingFromMimeCharset(const char * pMimeCharset)474 rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromMimeCharset( const char* pMimeCharset )
475 {
476     /* All Identifiers are in lower case and contain only alphanumeric */
477     /* characters. The function search for the first equal string in */
478     /* the table. In this table are only the most used mime types. */
479     /* Sort order: important */
480     static ImplStrCharsetDef const aVIPMimeCharsetTab[] =
481     {
482         { "usascii", RTL_TEXTENCODING_ASCII_US },
483         { "utf8", RTL_TEXTENCODING_UTF8 },
484         { "utf7", RTL_TEXTENCODING_UTF7 },
485         { "iso88591", RTL_TEXTENCODING_ISO_8859_1 },
486         { "iso88592", RTL_TEXTENCODING_ISO_8859_2 },
487         { "iso88593", RTL_TEXTENCODING_ISO_8859_3 },
488         { "iso88594", RTL_TEXTENCODING_ISO_8859_4 },
489         { "iso88595", RTL_TEXTENCODING_ISO_8859_5 },
490         { "iso88596", RTL_TEXTENCODING_ISO_8859_6 },
491         { "iso88597", RTL_TEXTENCODING_ISO_8859_7 },
492         { "iso88598", RTL_TEXTENCODING_ISO_8859_8 },
493         { "iso88599", RTL_TEXTENCODING_ISO_8859_9 },
494         { "iso885910", RTL_TEXTENCODING_ISO_8859_10 },
495         { "iso885913", RTL_TEXTENCODING_ISO_8859_13 },
496         { "iso885914", RTL_TEXTENCODING_ISO_8859_14 },
497         { "iso885915", RTL_TEXTENCODING_ISO_8859_15 },
498         { "iso2022jp", RTL_TEXTENCODING_ISO_2022_JP },
499         { "iso2022jp2", RTL_TEXTENCODING_ISO_2022_JP },
500         { "iso2022cn", RTL_TEXTENCODING_ISO_2022_CN },
501         { "iso2022cnext", RTL_TEXTENCODING_ISO_2022_CN },
502         { "iso2022kr", RTL_TEXTENCODING_ISO_2022_KR },
503         { "eucjp", RTL_TEXTENCODING_EUC_JP },
504         { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS },
505         { "mskanji", RTL_TEXTENCODING_MS_932 },
506         { "gb2312", RTL_TEXTENCODING_GB_2312 },
507         { "cngb", RTL_TEXTENCODING_GB_2312 },
508         { "big5", RTL_TEXTENCODING_BIG5 },
509         { "cnbig5", RTL_TEXTENCODING_BIG5 },
510         { "cngb12345", RTL_TEXTENCODING_GBT_12345 },
511         { "euckr", RTL_TEXTENCODING_EUC_KR },
512         { "koi8r", RTL_TEXTENCODING_KOI8_R },
513         { "windows1252", RTL_TEXTENCODING_MS_1252 },
514         { "windows1250", RTL_TEXTENCODING_MS_1250 },
515         { "windows1251", RTL_TEXTENCODING_MS_1251 },
516         { "windows1253", RTL_TEXTENCODING_MS_1253 },
517         { "windows1254", RTL_TEXTENCODING_MS_1254 },
518         { "windows1255", RTL_TEXTENCODING_MS_1255 },
519         { "windows1256", RTL_TEXTENCODING_MS_1256 },
520         { "windows1257", RTL_TEXTENCODING_MS_1257 },
521         { "windows1258", RTL_TEXTENCODING_MS_1258 },
522         { nullptr, RTL_TEXTENCODING_DONTKNOW }
523     };
524 
525     /* All Identifiers are in lower case and contain only alphanumeric */
526     /* characters. The function search for the first matching string in */
527     /* the table. */
528     /* Sort order: unique (first iso885914, then iso88591), important */
529     static ImplStrCharsetDef const aMimeCharsetTab[] =
530     {
531         { "unicode11utf7", RTL_TEXTENCODING_UTF7 },
532         { "caunicode11utf7", RTL_TEXTENCODING_UTF7 },
533         { "iso88591windows30", RTL_TEXTENCODING_ISO_8859_1 },
534         { "iso88591win", RTL_TEXTENCODING_MS_1252 },
535         { "iso88592win", RTL_TEXTENCODING_MS_1250 },
536         { "iso88599win", RTL_TEXTENCODING_MS_1254 },
537         { "iso885915", RTL_TEXTENCODING_ISO_8859_15 },
538         { "iso885914", RTL_TEXTENCODING_ISO_8859_14 },
539         { "iso885913", RTL_TEXTENCODING_ISO_8859_13 },
540         { "iso885911", RTL_TEXTENCODING_TIS_620 },
541             /* This is no official MIME character set name, but it might be in
542                use in Thailand. */
543         { "iso885910", RTL_TEXTENCODING_ISO_8859_10 },
544         { "iso88591", RTL_TEXTENCODING_ISO_8859_1 },
545         { "iso88592", RTL_TEXTENCODING_ISO_8859_2 },
546         { "iso88593", RTL_TEXTENCODING_ISO_8859_3 },
547         { "iso88594", RTL_TEXTENCODING_ISO_8859_4 },
548         { "iso88595", RTL_TEXTENCODING_ISO_8859_5 },
549         { "iso88596", RTL_TEXTENCODING_ISO_8859_6 },
550         { "iso88597", RTL_TEXTENCODING_ISO_8859_7 },
551         { "iso88598", RTL_TEXTENCODING_ISO_8859_8 },
552         { "iso88599", RTL_TEXTENCODING_ISO_8859_9 },
553         { "isoir100", RTL_TEXTENCODING_ISO_8859_1 },
554         { "latin1", RTL_TEXTENCODING_ISO_8859_1 },
555         { "l1", RTL_TEXTENCODING_ISO_8859_1 },
556         { "cp819", RTL_TEXTENCODING_ISO_8859_1 },
557         { "ibm819", RTL_TEXTENCODING_ISO_8859_1 },
558         { "csisolatin1", RTL_TEXTENCODING_ISO_8859_1 },
559         { "isoir101", RTL_TEXTENCODING_ISO_8859_2 },
560         { "latin2", RTL_TEXTENCODING_ISO_8859_2 },
561         { "l2", RTL_TEXTENCODING_ISO_8859_2 },
562         { "csisolatin2", RTL_TEXTENCODING_ISO_8859_2 },
563         { "isoir109", RTL_TEXTENCODING_ISO_8859_3 },
564         { "latin3", RTL_TEXTENCODING_ISO_8859_3 },
565         { "l3", RTL_TEXTENCODING_ISO_8859_3 },
566         { "csisolatin3", RTL_TEXTENCODING_ISO_8859_3 },
567         { "isoir110", RTL_TEXTENCODING_ISO_8859_4 },
568         { "latin4", RTL_TEXTENCODING_ISO_8859_4 },
569         { "l4", RTL_TEXTENCODING_ISO_8859_4 },
570         { "csisolatin4", RTL_TEXTENCODING_ISO_8859_4 },
571         { "isoir144", RTL_TEXTENCODING_ISO_8859_5 },
572         { "cyrillicasian", RTL_TEXTENCODING_PT154 },
573         { "cyrillic", RTL_TEXTENCODING_ISO_8859_5 },
574         { "csisolatincyrillic", RTL_TEXTENCODING_ISO_8859_5 },
575         { "isoir127", RTL_TEXTENCODING_ISO_8859_6 },
576         { "arabic", RTL_TEXTENCODING_ISO_8859_6 },
577         { "csisolatinarabic", RTL_TEXTENCODING_ISO_8859_6 },
578         { "ecma114", RTL_TEXTENCODING_ISO_8859_6 },
579         { "asmo708", RTL_TEXTENCODING_ISO_8859_6 },
580         { "isoir126", RTL_TEXTENCODING_ISO_8859_7 },
581         { "greek", RTL_TEXTENCODING_ISO_8859_7 },
582         { "csisolatingreek", RTL_TEXTENCODING_ISO_8859_7 },
583         { "elot928", RTL_TEXTENCODING_ISO_8859_7 },
584         { "ecma118", RTL_TEXTENCODING_ISO_8859_7 },
585         { "isoir138", RTL_TEXTENCODING_ISO_8859_8 },
586         { "hebrew", RTL_TEXTENCODING_ISO_8859_8 },
587         { "csisolatinhebrew", RTL_TEXTENCODING_ISO_8859_8 },
588         { "isoir148", RTL_TEXTENCODING_ISO_8859_9 },
589         { "latin5", RTL_TEXTENCODING_ISO_8859_9 },
590         { "l5", RTL_TEXTENCODING_ISO_8859_9 },
591         { "csisolatin5", RTL_TEXTENCODING_ISO_8859_9 },
592         { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 },
593         { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 },
594         { "cswindows31latin1", RTL_TEXTENCODING_MS_1252 },
595         { "cswindows31latin2", RTL_TEXTENCODING_MS_1250 },
596         { "cswindows31latin5", RTL_TEXTENCODING_MS_1254 },
597         { "iso10646us", RTL_TEXTENCODING_ASCII_US },
598         { "iso646irv", RTL_TEXTENCODING_ASCII_US },
599         { "cskoi8r", RTL_TEXTENCODING_KOI8_R },
600         { "ibm437", RTL_TEXTENCODING_IBM_437 },
601         { "cp437", RTL_TEXTENCODING_IBM_437 },
602         { "437", RTL_TEXTENCODING_IBM_437 },
603         { "cspc8codepage437", RTL_TEXTENCODING_IBM_437 },
604         { "ansix34", RTL_TEXTENCODING_ASCII_US },
605         { "ibm367", RTL_TEXTENCODING_ASCII_US },
606         { "cp367", RTL_TEXTENCODING_ASCII_US },
607         { "csascii", RTL_TEXTENCODING_ASCII_US },
608         { "ibm775", RTL_TEXTENCODING_IBM_775 },
609         { "cp775", RTL_TEXTENCODING_IBM_775 },
610         { "cspc775baltic", RTL_TEXTENCODING_IBM_775 },
611         { "ibm850", RTL_TEXTENCODING_IBM_850 },
612         { "cp850", RTL_TEXTENCODING_IBM_850 },
613         { "850", RTL_TEXTENCODING_IBM_850 },
614         { "cspc850multilingual", RTL_TEXTENCODING_IBM_850 },
615 /*        { "ibm851", RTL_TEXTENCODING_IBM_851 }, */
616 /*        { "cp851", RTL_TEXTENCODING_IBM_851 }, */
617 /*        { "851", RTL_TEXTENCODING_IBM_851 }, */
618 /*        { "csibm851", RTL_TEXTENCODING_IBM_851 }, */
619         { "ibm852", RTL_TEXTENCODING_IBM_852 },
620         { "cp852", RTL_TEXTENCODING_IBM_852 },
621         { "852", RTL_TEXTENCODING_IBM_852 },
622         { "cspcp852", RTL_TEXTENCODING_IBM_852 },
623         { "ibm855", RTL_TEXTENCODING_IBM_855 },
624         { "cp855", RTL_TEXTENCODING_IBM_855 },
625         { "855", RTL_TEXTENCODING_IBM_855 },
626         { "csibm855", RTL_TEXTENCODING_IBM_855 },
627         { "ibm857", RTL_TEXTENCODING_IBM_857 },
628         { "cp857", RTL_TEXTENCODING_IBM_857 },
629         { "857", RTL_TEXTENCODING_IBM_857 },
630         { "csibm857", RTL_TEXTENCODING_IBM_857 },
631         { "ibm860", RTL_TEXTENCODING_IBM_860 },
632         { "cp860", RTL_TEXTENCODING_IBM_860 },
633         { "860", RTL_TEXTENCODING_IBM_860 },
634         { "csibm860", RTL_TEXTENCODING_IBM_860 },
635         { "ibm861", RTL_TEXTENCODING_IBM_861 },
636         { "cp861", RTL_TEXTENCODING_IBM_861 },
637         { "861", RTL_TEXTENCODING_IBM_861 },
638         { "csis", RTL_TEXTENCODING_IBM_861 },
639         { "csibm861", RTL_TEXTENCODING_IBM_861 },
640         { "ibm862", RTL_TEXTENCODING_IBM_862 },
641         { "cp862", RTL_TEXTENCODING_IBM_862 },
642         { "862", RTL_TEXTENCODING_IBM_862 },
643         { "cspc862latinhebrew", RTL_TEXTENCODING_IBM_862 },
644         { "ibm863", RTL_TEXTENCODING_IBM_863 },
645         { "cp863", RTL_TEXTENCODING_IBM_863 },
646         { "863", RTL_TEXTENCODING_IBM_863 },
647         { "csibm863", RTL_TEXTENCODING_IBM_863 },
648         { "ibm864", RTL_TEXTENCODING_IBM_864 },
649         { "cp864", RTL_TEXTENCODING_IBM_864 },
650         { "864", RTL_TEXTENCODING_IBM_864 },
651         { "csibm864", RTL_TEXTENCODING_IBM_864 },
652         { "ibm865", RTL_TEXTENCODING_IBM_865 },
653         { "cp865", RTL_TEXTENCODING_IBM_865 },
654         { "865", RTL_TEXTENCODING_IBM_865 },
655         { "csibm865", RTL_TEXTENCODING_IBM_865 },
656         { "ibm866", RTL_TEXTENCODING_IBM_866 },
657         { "cp866", RTL_TEXTENCODING_IBM_866 },
658         { "866", RTL_TEXTENCODING_IBM_866 },
659         { "csibm866", RTL_TEXTENCODING_IBM_866 },
660 /*        { "ibm868", RTL_TEXTENCODING_IBM_868 }, */
661 /*        { "cp868", RTL_TEXTENCODING_IBM_868 }, */
662 /*        { "cpar", RTL_TEXTENCODING_IBM_868 }, */
663 /*        { "csibm868", RTL_TEXTENCODING_IBM_868 }, */
664         { "ibm869", RTL_TEXTENCODING_IBM_869 },
665         { "cp869", RTL_TEXTENCODING_IBM_869 },
666         { "869", RTL_TEXTENCODING_IBM_869 },
667         { "cpgr", RTL_TEXTENCODING_IBM_869 },
668         { "csibm869", RTL_TEXTENCODING_IBM_869 },
669         { "ibm869", RTL_TEXTENCODING_IBM_869 },
670         { "cp869", RTL_TEXTENCODING_IBM_869 },
671         { "869", RTL_TEXTENCODING_IBM_869 },
672         { "cpgr", RTL_TEXTENCODING_IBM_869 },
673         { "csibm869", RTL_TEXTENCODING_IBM_869 },
674         { "mac", RTL_TEXTENCODING_APPLE_ROMAN },
675         { "csmacintosh", RTL_TEXTENCODING_APPLE_ROMAN },
676         { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS },
677         { "mskanji", RTL_TEXTENCODING_MS_932 },
678         { "csshiftjis", RTL_TEXTENCODING_SHIFT_JIS },
679         { "jisx0208", RTL_TEXTENCODING_JIS_X_0208 },
680         { "jisc62261983", RTL_TEXTENCODING_JIS_X_0208 },
681         { "csiso87jisx0208", RTL_TEXTENCODING_JIS_X_0208 },
682         { "isoir86", RTL_TEXTENCODING_JIS_X_0208 },
683         { "x0208", RTL_TEXTENCODING_JIS_X_0208 },
684         { "jisx0201", RTL_TEXTENCODING_JIS_X_0201 },
685         { "cshalfwidthkatakana", RTL_TEXTENCODING_JIS_X_0201 },
686         { "x0201", RTL_TEXTENCODING_JIS_X_0201 },
687         { "jisx0212", RTL_TEXTENCODING_JIS_X_0212 },
688         { "csiso159jisx0212", RTL_TEXTENCODING_JIS_X_0212 },
689         { "isoir159", RTL_TEXTENCODING_JIS_X_0208 },
690         { "x0212", RTL_TEXTENCODING_JIS_X_0212 },
691         { "isoir6", RTL_TEXTENCODING_ASCII_US },
692         { "xsjis", RTL_TEXTENCODING_SHIFT_JIS },
693         { "sjis", RTL_TEXTENCODING_SHIFT_JIS },
694         { "ascii", RTL_TEXTENCODING_ASCII_US },
695         { "us", RTL_TEXTENCODING_ASCII_US },
696         { "gb180302000", RTL_TEXTENCODING_GB_18030 },
697             /* This is no actual MIME character set name, it is only in here
698                for backwards compatibility (before "GB18030" was officially
699                registered with IANA, this code contained some guesses of what
700                would become official names for GB18030). */
701         { "gb18030", RTL_TEXTENCODING_GB_18030 },
702         { "big5hkscs", RTL_TEXTENCODING_BIG5_HKSCS },
703         { "tis620", RTL_TEXTENCODING_TIS_620 },
704         { "gbk", RTL_TEXTENCODING_GBK },
705         { "cp936", RTL_TEXTENCODING_GBK },
706         { "ms936", RTL_TEXTENCODING_GBK },
707         { "windows936", RTL_TEXTENCODING_GBK },
708         { "cp874", RTL_TEXTENCODING_MS_874 },
709             /* This is no official MIME character set name, but it might be in
710                use in Thailand. */
711         { "ms874", RTL_TEXTENCODING_MS_874 },
712             /* This is no official MIME character set name, but it might be in
713                use in Thailand. */
714         { "windows874", RTL_TEXTENCODING_MS_874 },
715             /* This is no official MIME character set name, but it might be in
716                use in Thailand. */
717         { "koi8u", RTL_TEXTENCODING_KOI8_U },
718         { "cpis", RTL_TEXTENCODING_IBM_861 },
719         { "ksc56011987", RTL_TEXTENCODING_MS_949 },
720         { "isoir149", RTL_TEXTENCODING_MS_949 },
721         { "ksc56011989", RTL_TEXTENCODING_MS_949 },
722         { "ksc5601", RTL_TEXTENCODING_MS_949 },
723         { "korean", RTL_TEXTENCODING_MS_949 },
724         { "csksc56011987", RTL_TEXTENCODING_MS_949 },
725             /* Map KS_C_5601-1987 and aliases to MS-949 instead of EUC-KR, as
726                this character set identifier seems to be prominently used by MS
727                to stand for KS C 5601 plus MS-949 extensions */
728         { "latin9", RTL_TEXTENCODING_ISO_8859_15 },
729         { "adobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD },
730         { "csadobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD },
731         { "adobesymbolencoding", RTL_TEXTENCODING_ADOBE_SYMBOL },
732         { "cshppsmath", RTL_TEXTENCODING_ADOBE_SYMBOL },
733         { "ptcp154", RTL_TEXTENCODING_PT154 },
734         { "csptcp154", RTL_TEXTENCODING_PT154 },
735         { "pt154", RTL_TEXTENCODING_PT154 },
736         { "cp154", RTL_TEXTENCODING_PT154 },
737         { "xisciide", RTL_TEXTENCODING_ISCII_DEVANAGARI },
738             /* This is not an official MIME character set name, but is in use by
739                various windows APIs. */
740         { nullptr, RTL_TEXTENCODING_DONTKNOW }
741     };
742 
743     rtl_TextEncoding            eEncoding = RTL_TEXTENCODING_DONTKNOW;
744     const ImplStrCharsetDef*    pData = aVIPMimeCharsetTab;
745     sal_uInt32                  nBufLen = strlen( pMimeCharset )+1;
746 
747     /* Alloc Buffer and map to lower case and remove non alphanumeric chars */
748     std::unique_ptr<char[]> pBuf(new char[nBufLen]);
749     Impl_toAsciiLowerAndRemoveNonAlphanumeric( pMimeCharset, pBuf.get() );
750 
751     /* Search for equal in the VIP table */
752     while ( pData->mpCharsetStr )
753     {
754         if ( strcmp( pBuf.get(), pData->mpCharsetStr ) == 0 )
755         {
756             eEncoding = pData->meTextEncoding;
757             break;
758         }
759 
760         pData++;
761     }
762 
763     /* Search for matching in the mime table */
764     if ( eEncoding == RTL_TEXTENCODING_DONTKNOW )
765     {
766         pData = aMimeCharsetTab;
767         while ( pData->mpCharsetStr )
768         {
769             if ( Impl_matchString( pBuf.get(), pData->mpCharsetStr ) )
770             {
771                 eEncoding = pData->meTextEncoding;
772                 break;
773             }
774 
775             pData++;
776         }
777     }
778 
779     return eEncoding;
780 }
781 
782 /* ======================================================================= */
783 
rtl_getBestWindowsCharsetFromTextEncoding(rtl_TextEncoding eTextEncoding)784 sal_uInt8 SAL_CALL rtl_getBestWindowsCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
785 {
786     const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
787     if ( pData )
788         return pData->mnBestWindowsCharset;
789     return 1;
790 }
791 
792 /* ----------------------------------------------------------------------- */
793 
rtl_getBestUnixCharsetFromTextEncoding(rtl_TextEncoding eTextEncoding)794 const char* SAL_CALL rtl_getBestUnixCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding  )
795 {
796     const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
797     if ( pData )
798         return pData->mpBestUnixCharset;
799     if( eTextEncoding == RTL_TEXTENCODING_UNICODE )
800         return "iso10646-1";
801     return nullptr;
802 }
803 
804 /* ----------------------------------------------------------------------- */
805 
rtl_getMimeCharsetFromTextEncoding(rtl_TextEncoding nEncoding)806 char const * SAL_CALL rtl_getMimeCharsetFromTextEncoding(rtl_TextEncoding
807                                                              nEncoding)
808 {
809     ImplTextEncodingData const * p = Impl_getTextEncodingData(nEncoding);
810     return p && (p->mnInfoFlags & RTL_TEXTENCODING_INFO_MIME) != 0 ?
811                p->mpBestMimeCharset : nullptr;
812 }
813 
rtl_getBestMimeCharsetFromTextEncoding(rtl_TextEncoding eTextEncoding)814 const char* SAL_CALL rtl_getBestMimeCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
815 {
816     const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
817     if ( pData )
818         return pData->mpBestMimeCharset;
819     return nullptr;
820 }
821 
822 /* The following two functions are based on <http://www.sharmahd.com/tm/
823    codepages.html>, <http://msdn.microsoft.com/workshop/author/dhtml/reference/
824    charsets/charset4.asp>, and <http://www.iana.org/assignments/character-sets>.
825  */
826 
827 rtl_TextEncoding SAL_CALL
rtl_getTextEncodingFromWindowsCodePage(sal_uInt32 nCodePage)828 rtl_getTextEncodingFromWindowsCodePage(sal_uInt32 nCodePage)
829 {
830     switch (nCodePage)
831     {
832     case 42: return RTL_TEXTENCODING_SYMBOL;
833     case 437: return RTL_TEXTENCODING_IBM_437;
834     case 708: return RTL_TEXTENCODING_ISO_8859_6;
835     case 737: return RTL_TEXTENCODING_IBM_737;
836     case 775: return RTL_TEXTENCODING_IBM_775;
837     case 850: return RTL_TEXTENCODING_IBM_850;
838     case 852: return RTL_TEXTENCODING_IBM_852;
839     case 855: return RTL_TEXTENCODING_IBM_855;
840     case 857: return RTL_TEXTENCODING_IBM_857;
841     case 860: return RTL_TEXTENCODING_IBM_860;
842     case 861: return RTL_TEXTENCODING_IBM_861;
843     case 862: return RTL_TEXTENCODING_IBM_862;
844     case 863: return RTL_TEXTENCODING_IBM_863;
845     case 864: return RTL_TEXTENCODING_IBM_864;
846     case 865: return RTL_TEXTENCODING_IBM_865;
847     case 866: return RTL_TEXTENCODING_IBM_866;
848     case 869: return RTL_TEXTENCODING_IBM_869;
849     case 874: return RTL_TEXTENCODING_MS_874;
850     case 932: return RTL_TEXTENCODING_MS_932;
851     case 936: return RTL_TEXTENCODING_MS_936;
852     case 949: return RTL_TEXTENCODING_MS_949;
853     case 950: return RTL_TEXTENCODING_MS_950;
854     case 1250: return RTL_TEXTENCODING_MS_1250;
855     case 1251: return RTL_TEXTENCODING_MS_1251;
856     case 1252: return RTL_TEXTENCODING_MS_1252;
857     case 1253: return RTL_TEXTENCODING_MS_1253;
858     case 1254: return RTL_TEXTENCODING_MS_1254;
859     case 1255: return RTL_TEXTENCODING_MS_1255;
860     case 1256: return RTL_TEXTENCODING_MS_1256;
861     case 1257: return RTL_TEXTENCODING_MS_1257;
862     case 1258: return RTL_TEXTENCODING_MS_1258;
863     case 1361: return RTL_TEXTENCODING_MS_1361;
864     case 10000: return RTL_TEXTENCODING_APPLE_ROMAN;
865     case 10001: return RTL_TEXTENCODING_APPLE_JAPANESE;
866     case 10002: return RTL_TEXTENCODING_APPLE_CHINTRAD;
867     case 10003: return RTL_TEXTENCODING_APPLE_KOREAN;
868     case 10004: return RTL_TEXTENCODING_APPLE_ARABIC;
869     case 10005: return RTL_TEXTENCODING_APPLE_HEBREW;
870     case 10006: return RTL_TEXTENCODING_APPLE_GREEK;
871     case 10007: return RTL_TEXTENCODING_APPLE_CYRILLIC;
872     case 10008: return RTL_TEXTENCODING_APPLE_CHINSIMP;
873     case 10010: return RTL_TEXTENCODING_APPLE_ROMANIAN;
874     case 10017: return RTL_TEXTENCODING_APPLE_UKRAINIAN;
875     case 10029: return RTL_TEXTENCODING_APPLE_CENTEURO;
876     case 10079: return RTL_TEXTENCODING_APPLE_ICELAND;
877     case 10081: return RTL_TEXTENCODING_APPLE_TURKISH;
878     case 10082: return RTL_TEXTENCODING_APPLE_CROATIAN;
879     case 20127: return RTL_TEXTENCODING_ASCII_US;
880     case 20866: return RTL_TEXTENCODING_KOI8_R;
881     case 21866: return RTL_TEXTENCODING_KOI8_U;
882     case 28591: return RTL_TEXTENCODING_ISO_8859_1;
883     case 28592: return RTL_TEXTENCODING_ISO_8859_2;
884     case 28593: return RTL_TEXTENCODING_ISO_8859_3;
885     case 28594: return RTL_TEXTENCODING_ISO_8859_4;
886     case 28595: return RTL_TEXTENCODING_ISO_8859_5;
887     case 28596: return RTL_TEXTENCODING_ISO_8859_6;
888     case 28597: return RTL_TEXTENCODING_ISO_8859_7;
889     case 28598: return RTL_TEXTENCODING_ISO_8859_8;
890     case 28599: return RTL_TEXTENCODING_ISO_8859_9;
891     case 28605: return RTL_TEXTENCODING_ISO_8859_15;
892     case 50220: return RTL_TEXTENCODING_ISO_2022_JP;
893     case 50225: return RTL_TEXTENCODING_ISO_2022_KR;
894     case 51932: return RTL_TEXTENCODING_EUC_JP;
895     case 51936: return RTL_TEXTENCODING_EUC_CN;
896     case 51949: return RTL_TEXTENCODING_EUC_KR;
897     case 57002: return RTL_TEXTENCODING_ISCII_DEVANAGARI;
898     case 65000: return RTL_TEXTENCODING_UTF7;
899     case 65001: return RTL_TEXTENCODING_UTF8;
900     default: return RTL_TEXTENCODING_DONTKNOW;
901     }
902 }
903 
904 sal_uInt32 SAL_CALL
rtl_getWindowsCodePageFromTextEncoding(rtl_TextEncoding nEncoding)905 rtl_getWindowsCodePageFromTextEncoding(rtl_TextEncoding nEncoding)
906 {
907     switch (nEncoding)
908     {
909     case RTL_TEXTENCODING_SYMBOL: return 42;
910     case RTL_TEXTENCODING_IBM_437: return 437;
911  /* case RTL_TEXTENCODING_ISO_8859_6: return 708; */
912     case RTL_TEXTENCODING_IBM_737: return 737;
913     case RTL_TEXTENCODING_IBM_775: return 775;
914     case RTL_TEXTENCODING_IBM_850: return 850;
915     case RTL_TEXTENCODING_IBM_852: return 852;
916     case RTL_TEXTENCODING_IBM_855: return 855;
917     case RTL_TEXTENCODING_IBM_857: return 857;
918     case RTL_TEXTENCODING_IBM_860: return 860;
919     case RTL_TEXTENCODING_IBM_861: return 861;
920     case RTL_TEXTENCODING_IBM_862: return 862;
921     case RTL_TEXTENCODING_IBM_863: return 863;
922     case RTL_TEXTENCODING_IBM_864: return 864;
923     case RTL_TEXTENCODING_IBM_865: return 865;
924     case RTL_TEXTENCODING_IBM_866: return 866;
925     case RTL_TEXTENCODING_IBM_869: return 869;
926     case RTL_TEXTENCODING_MS_874: return 874;
927     case RTL_TEXTENCODING_MS_932: return 932;
928     case RTL_TEXTENCODING_MS_936: return 936;
929     case RTL_TEXTENCODING_MS_949: return 949;
930     case RTL_TEXTENCODING_MS_950: return 950;
931     case RTL_TEXTENCODING_MS_1250: return 1250;
932     case RTL_TEXTENCODING_MS_1251: return 1251;
933     case RTL_TEXTENCODING_MS_1252: return 1252;
934     case RTL_TEXTENCODING_MS_1253: return 1253;
935     case RTL_TEXTENCODING_MS_1254: return 1254;
936     case RTL_TEXTENCODING_MS_1255: return 1255;
937     case RTL_TEXTENCODING_MS_1256: return 1256;
938     case RTL_TEXTENCODING_MS_1257: return 1257;
939     case RTL_TEXTENCODING_MS_1258: return 1258;
940     case RTL_TEXTENCODING_MS_1361: return 1361;
941     case RTL_TEXTENCODING_APPLE_ROMAN: return 10000;
942     case RTL_TEXTENCODING_APPLE_JAPANESE: return 10001;
943     case RTL_TEXTENCODING_APPLE_CHINTRAD: return 10002;
944     case RTL_TEXTENCODING_APPLE_KOREAN: return 10003;
945     case RTL_TEXTENCODING_APPLE_ARABIC: return 10004;
946     case RTL_TEXTENCODING_APPLE_HEBREW: return 10005;
947     case RTL_TEXTENCODING_APPLE_GREEK: return 10006;
948     case RTL_TEXTENCODING_APPLE_CYRILLIC: return 10007;
949     case RTL_TEXTENCODING_APPLE_CHINSIMP: return 10008;
950     case RTL_TEXTENCODING_APPLE_ROMANIAN: return 10010;
951     case RTL_TEXTENCODING_APPLE_UKRAINIAN: return 10017;
952     case RTL_TEXTENCODING_APPLE_CENTEURO: return 10029;
953     case RTL_TEXTENCODING_APPLE_ICELAND: return 10079;
954     case RTL_TEXTENCODING_APPLE_TURKISH: return 10081;
955     case RTL_TEXTENCODING_APPLE_CROATIAN: return 10082;
956     case RTL_TEXTENCODING_ASCII_US: return 20127;
957     case RTL_TEXTENCODING_KOI8_R: return 20866;
958     case RTL_TEXTENCODING_KOI8_U: return 21866;
959     case RTL_TEXTENCODING_ISO_8859_1: return 28591;
960     case RTL_TEXTENCODING_ISO_8859_2: return 28592;
961     case RTL_TEXTENCODING_ISO_8859_3: return 28593;
962     case RTL_TEXTENCODING_ISO_8859_4: return 28594;
963     case RTL_TEXTENCODING_ISO_8859_5: return 28595;
964     case RTL_TEXTENCODING_ISO_8859_6: return 28596;
965     case RTL_TEXTENCODING_ISO_8859_7: return 28597;
966     case RTL_TEXTENCODING_ISO_8859_8: return 28598;
967     case RTL_TEXTENCODING_ISO_8859_9: return 28599;
968     case RTL_TEXTENCODING_ISO_8859_15: return 28605;
969     case RTL_TEXTENCODING_ISO_2022_JP: return 50220;
970     case RTL_TEXTENCODING_ISO_2022_KR: return 50225;
971     case RTL_TEXTENCODING_EUC_JP: return 51932;
972     case RTL_TEXTENCODING_EUC_CN: return 51936;
973     case RTL_TEXTENCODING_EUC_KR: return 51949;
974     case RTL_TEXTENCODING_ISCII_DEVANAGARI: return 57002;
975     case RTL_TEXTENCODING_UTF7: return 65000;
976     case RTL_TEXTENCODING_UTF8: return 65001;
977     default: return 0;
978     }
979 }
980 
981 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
982