1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <cclass_unicode.hxx>
21 #include <com/sun/star/i18n/KCharacterType.hpp>
22 #include <com/sun/star/i18n/WordType.hpp>
23 #include <com/sun/star/lang/WrappedTargetRuntimeException.hpp>
24 #include <unicode/uchar.h>
25 #include <cppuhelper/exc_hlp.hxx>
26 #include <cppuhelper/supportsservice.hxx>
27 #include <breakiteratorImpl.hxx>
28 #include <transliteration_body.hxx>
29 #include <rtl/ref.hxx>
30 #include <o3tl/string_view.hxx>
31 #include <utility>
32 
33 using namespace ::com::sun::star;
34 using namespace ::com::sun::star::uno;
35 using namespace ::com::sun::star::i18n;
36 using namespace ::com::sun::star::lang;
37 
38 namespace i18npool {
39 
40 //  class cclass_Unicode
41 //  ----------------------------------------------------;
42 
cclass_Unicode(uno::Reference<XComponentContext> xContext)43 cclass_Unicode::cclass_Unicode( uno::Reference < XComponentContext > xContext ) :
44         transToUpper( new Transliteration_casemapping() ),
45         transToLower( new Transliteration_casemapping() ),
46         transToTitle( new Transliteration_casemapping() ),
47         m_xContext(std::move( xContext )),
48         nStartTypes( 0 ),
49         nContTypes( 0 ),
50         cGroupSep( ',' ),
51         cDecimalSep( '.' ),
52         cDecimalSepAlt( 0 )
53 {
54     transToUpper->setMappingType(MappingType::ToUpper);
55     transToLower->setMappingType(MappingType::ToLower);
56     transToTitle->setMappingType(MappingType::ToTitle);
57 }
58 
~cclass_Unicode()59 cclass_Unicode::~cclass_Unicode() {
60     destroyParserTable();
61 }
62 
63 
64 OUString SAL_CALL
toUpper(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale & rLocale)65 cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) {
66     sal_Int32 len = Text.getLength();
67     if (nPos >= len)
68         return OUString();
69     if (nCount + nPos > len)
70         nCount = len - nPos;
71 
72     transToUpper->setLocale(rLocale);
73     return transToUpper->transliterateString2String(Text, nPos, nCount);
74 }
75 
76 OUString SAL_CALL
toLower(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale & rLocale)77 cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) {
78     sal_Int32 len = Text.getLength();
79     if (nPos >= len)
80         return OUString();
81     if (nCount + nPos > len)
82         nCount = len - nPos;
83 
84     transToLower->setLocale(rLocale);
85     return transToLower->transliterateString2String(Text, nPos, nCount);
86 }
87 
88 OUString SAL_CALL
toTitle(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale & rLocale)89 cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) {
90     try
91     {
92         sal_Int32 len = Text.getLength();
93         if (nPos >= len)
94             return OUString();
95         if (nCount + nPos > len)
96             nCount = len - nPos;
97 
98         transToTitle->setLocale(rLocale);
99         rtl_uString* pStr = rtl_uString_alloc(nCount);
100         sal_Unicode* out = pStr->buffer;
101         rtl::Reference< BreakIteratorImpl > xBrk(new BreakIteratorImpl(m_xContext));
102         Boundary bdy = xBrk->getWordBoundary(Text, nPos, rLocale,
103                     WordType::ANYWORD_IGNOREWHITESPACES, true);
104         for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) {
105             if (i >= bdy.endPos)
106                 bdy = xBrk->nextWord(Text, bdy.endPos, rLocale,
107                             WordType::ANYWORD_IGNOREWHITESPACES);
108             *out = (i == bdy.startPos) ?
109                 transToTitle->transliterateChar2Char(Text[i]) : Text[i];
110         }
111         *out = 0;
112         return OUString( pStr, SAL_NO_ACQUIRE );
113     }
114     catch (const RuntimeException&)
115     {
116         throw;
117     }
118     catch (const Exception& e)
119     {
120         uno::Any a(cppu::getCaughtException());
121         throw lang::WrappedTargetRuntimeException(
122             "wrapped " + a.getValueTypeName() + ": " + e.Message,
123             uno::Reference<uno::XInterface>(), a);
124     }
125 }
126 
127 sal_Int16 SAL_CALL
getType(const OUString & Text,sal_Int32 nPos)128 cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) {
129     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
130     return static_cast<sal_Int16>(u_charType(Text.iterateCodePoints(&nPos, 0)));
131 }
132 
133 sal_Int16 SAL_CALL
getCharacterDirection(const OUString & Text,sal_Int32 nPos)134 cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) {
135     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
136     return static_cast<sal_Int16>(u_charDirection(Text.iterateCodePoints(&nPos, 0)));
137 }
138 
139 
140 sal_Int16 SAL_CALL
getScript(const OUString & Text,sal_Int32 nPos)141 cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) {
142     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
143     // ICU Unicode script type UBlockCode starts from 1 for Basic Latin,
144     // while OO.o enum UnicideScript starts from 0.
145     // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
146     return static_cast<sal_Int16>(ublock_getCode(Text.iterateCodePoints(&nPos, 0)))-1;
147 }
148 
149 
150 sal_Int32
getCharType(std::u16string_view Text,sal_Int32 * nPos,sal_Int32 increment)151 cclass_Unicode::getCharType( std::u16string_view Text, sal_Int32* nPos, sal_Int32 increment) {
152     using namespace ::com::sun::star::i18n::KCharacterType;
153 
154     sal_uInt32 ch = o3tl::iterateCodePoints(Text, nPos, increment);
155     switch ( u_charType(ch) ) {
156     // Upper
157     case U_UPPERCASE_LETTER :
158         return UPPER|LETTER|PRINTABLE|BASE_FORM;
159 
160     // Lower
161     case U_LOWERCASE_LETTER :
162         return LOWER|LETTER|PRINTABLE|BASE_FORM;
163 
164     // Title
165     case U_TITLECASE_LETTER :
166         return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;
167 
168     // Letter
169     case U_MODIFIER_LETTER :
170     case U_OTHER_LETTER :
171         return LETTER|PRINTABLE|BASE_FORM;
172 
173     // Digit
174     case U_DECIMAL_DIGIT_NUMBER:
175     case U_LETTER_NUMBER:
176     case U_OTHER_NUMBER:
177         return DIGIT|PRINTABLE|BASE_FORM;
178 
179     // Base
180     case U_NON_SPACING_MARK:
181     case U_ENCLOSING_MARK:
182     case U_COMBINING_SPACING_MARK:
183         return BASE_FORM|PRINTABLE;
184 
185     // Print
186     case U_SPACE_SEPARATOR:
187 
188     case U_DASH_PUNCTUATION:
189     case U_INITIAL_PUNCTUATION:
190     case U_FINAL_PUNCTUATION:
191     case U_CONNECTOR_PUNCTUATION:
192     case U_OTHER_PUNCTUATION:
193 
194     case U_MATH_SYMBOL:
195     case U_CURRENCY_SYMBOL:
196     case U_MODIFIER_SYMBOL:
197     case U_OTHER_SYMBOL:
198         return PRINTABLE;
199 
200     // Control
201     case U_CONTROL_CHAR:
202     case U_FORMAT_CHAR:
203         return CONTROL;
204 
205     case U_LINE_SEPARATOR:
206     case U_PARAGRAPH_SEPARATOR:
207         return CONTROL|PRINTABLE;
208 
209     // for all others
210     default:
211         return U_GENERAL_OTHER_TYPES;
212     }
213 }
214 
215 sal_Int32 SAL_CALL
getCharacterType(const OUString & Text,sal_Int32 nPos,const Locale &)216 cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) {
217     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
218     return getCharType(Text, &nPos, 0);
219 
220 }
221 
222 sal_Int32 SAL_CALL
getStringType(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale &)223 cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) {
224     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
225 
226     sal_Int32 result = 0;
227 
228     while (nCount > 0 && nPos < Text.getLength())
229     {
230         sal_Int32 nOrigPos = nPos;
231         result |= getCharType(Text, &nPos, 1);
232         sal_Int32 nUtf16Units = nPos - nOrigPos;
233         nCount -= nUtf16Units;
234     }
235 
236     return result;
237 }
238 
parseAnyToken(const OUString & Text,sal_Int32 nPos,const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)239 ParseResult SAL_CALL cclass_Unicode::parseAnyToken(
240             const OUString& Text,
241             sal_Int32 nPos,
242             const Locale& rLocale,
243             sal_Int32 startCharTokenType,
244             const OUString& userDefinedCharactersStart,
245             sal_Int32 contCharTokenType,
246             const OUString& userDefinedCharactersCont )
247 {
248     ParseResult r;
249     if ( Text.getLength() <= nPos )
250         return r;
251 
252     setupParserTable( rLocale,
253         startCharTokenType, userDefinedCharactersStart,
254         contCharTokenType, userDefinedCharactersCont );
255     parseText( r, Text, nPos );
256 
257     return r;
258 }
259 
260 
parsePredefinedToken(sal_Int32 nTokenType,const OUString & Text,sal_Int32 nPos,const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)261 ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken(
262             sal_Int32 nTokenType,
263             const OUString& Text,
264             sal_Int32 nPos,
265             const Locale& rLocale,
266             sal_Int32 startCharTokenType,
267             const OUString& userDefinedCharactersStart,
268             sal_Int32 contCharTokenType,
269             const OUString& userDefinedCharactersCont )
270 {
271     ParseResult r;
272     if ( Text.getLength() <= nPos )
273         return r;
274 
275     setupParserTable( rLocale,
276         startCharTokenType, userDefinedCharactersStart,
277         contCharTokenType, userDefinedCharactersCont );
278     parseText( r, Text, nPos, nTokenType );
279 
280     return r;
281 }
282 
getImplementationName()283 OUString SAL_CALL cclass_Unicode::getImplementationName()
284 {
285     return u"com.sun.star.i18n.CharacterClassification_Unicode"_ustr;
286 }
287 
supportsService(const OUString & rServiceName)288 sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName)
289 {
290     return cppu::supportsService(this, rServiceName);
291 }
292 
getSupportedServiceNames()293 Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames()
294 {
295     return { u"com.sun.star.i18n.CharacterClassification_Unicode"_ustr };
296 }
297 
298 }
299 
300 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
com_sun_star_i18n_CharacterClassification_Unicode_get_implementation(css::uno::XComponentContext * context,css::uno::Sequence<css::uno::Any> const &)301 com_sun_star_i18n_CharacterClassification_Unicode_get_implementation(
302     css::uno::XComponentContext *context,
303     css::uno::Sequence<css::uno::Any> const &)
304 {
305     return cppu::acquire(new i18npool::cclass_Unicode(context));
306 }
307 
308 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
309