xref: /core/editeng/source/misc/svxacorr.cxx (revision ed0b12f4eadf1f2242f06cbd56804f75376274b1)
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <memory>
21 #include <utility>
22 #include <algorithm>
23 #include <string_view>
24 #include <sal/config.h>
25 
26 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
27 #include <com/sun/star/embed/XStorage.hpp>
28 #include <com/sun/star/io/IOException.hpp>
29 #include <com/sun/star/io/XStream.hpp>
30 #include <tools/urlobj.hxx>
31 #include <i18nlangtag/mslangid.hxx>
32 #include <i18nutil/transliteration.hxx>
33 #include <sal/log.hxx>
34 #include <osl/diagnose.h>
35 #include <vcl/svapp.hxx>
36 #include <vcl/settings.hxx>
37 #include <svl/fstathelper.hxx>
38 #include <svl/urihelper.hxx>
39 #include <unotools/charclass.hxx>
40 #include <com/sun/star/i18n/UnicodeType.hpp>
41 #include <unotools/collatorwrapper.hxx>
42 #include <com/sun/star/i18n/UnicodeScript.hpp>
43 #include <com/sun/star/i18n/OrdinalSuffix.hpp>
44 #include <unotools/localedatawrapper.hxx>
45 #include <unotools/transliterationwrapper.hxx>
46 #include <comphelper/processfactory.hxx>
47 #include <comphelper/sequence.hxx>
48 #include <comphelper/storagehelper.hxx>
49 #include <o3tl/string_view.hxx>
50 #include <editeng/editids.hrc>
51 #include <sot/storage.hxx>
52 #include <editeng/udlnitem.hxx>
53 #include <editeng/wghtitem.hxx>
54 #include <editeng/postitem.hxx>
55 #include <editeng/crossedoutitem.hxx>
56 #include <editeng/escapementitem.hxx>
57 #include <editeng/svxacorr.hxx>
58 #include <editeng/unolingu.hxx>
59 #include <vcl/window.hxx>
60 #include <com/sun/star/xml/sax/InputSource.hpp>
61 #include <com/sun/star/xml/sax/FastParser.hpp>
62 #include <com/sun/star/xml/sax/Writer.hpp>
63 #include <com/sun/star/xml/sax/SAXParseException.hpp>
64 #include <unotools/streamwrap.hxx>
65 #include "SvXMLAutoCorrectImport.hxx"
66 #include "SvXMLAutoCorrectExport.hxx"
67 #include "SvXMLAutoCorrectTokenHandler.hxx"
68 #include <ucbhelper/content.hxx>
69 #include <com/sun/star/ucb/ContentCreationException.hpp>
70 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
71 #include <com/sun/star/ucb/TransferInfo.hpp>
72 #include <com/sun/star/ucb/NameClash.hpp>
73 #include <comphelper/diagnose_ex.hxx>
74 #include <xmloff/xmltoken.hxx>
75 #include <unordered_map>
76 #include <rtl/character.hxx>
77 
78 using namespace ::com::sun::star::ucb;
79 using namespace ::com::sun::star::uno;
80 using namespace ::com::sun::star::xml::sax;
81 using namespace ::com::sun::star;
82 using namespace ::xmloff::token;
83 using namespace ::utl;
84 
85 namespace {
86 
87 enum class Flags {
88     NONE            = 0x00,
89     FullStop        = 0x01,
90     ExclamationMark = 0x02,
91     QuestionMark    = 0x04,
92 };
93 
94 }
95 
96 namespace o3tl {
97     template<> struct typed_flags<Flags> : is_typed_flags<Flags, 0x07> {};
98 }
99 const sal_Unicode cNonBreakingSpace = 0xA0; // UNICODE code for no break space
100 
101 constexpr OUString pXMLImplWordStart_ExcptLstStr = u"WordExceptList.xml"_ustr;
102 constexpr OUString pXMLImplCplStt_ExcptLstStr = u"SentenceExceptList.xml"_ustr;
103 constexpr OUString pXMLImplAutocorr_ListStr = u"DocumentList.xml"_ustr;
104 
105 // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks
106 // Curious, why these \u0083\u0084\u0089\u0091\u0092\u0093\u0094 are handled as "begin characters"?
107 constexpr std::u16string_view
108     /* also at these beginnings - Brackets and all kinds of begin characters */
109     sImplSttSkipChars = u"\"'([{\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094",
110     /* also at these ends - Brackets and all kinds of begin characters */
111     sImplEndSkipChars = u"\"')]}\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094";
112 
113 static OUString EncryptBlockName_Imp(std::u16string_view rName);
114 
NonFieldWordDelim(const sal_Unicode c)115 static bool NonFieldWordDelim( const sal_Unicode c )
116 {
117     return ' ' == c || '\t' == c || 0x0a == c ||
118             cNonBreakingSpace == c || 0x2011 == c;
119 }
120 
IsWordDelim(const sal_Unicode c)121 static bool IsWordDelim( const sal_Unicode c )
122 {
123     return c == 0x1 || NonFieldWordDelim(c);
124 }
125 
126 
IsLowerLetter(sal_Int32 nCharType)127 static bool IsLowerLetter( sal_Int32 nCharType )
128 {
129     return CharClass::isLetterType( nCharType ) &&
130            ( css::i18n::KCharacterType::LOWER & nCharType);
131 }
132 
IsUpperLetter(sal_Int32 nCharType)133 static bool IsUpperLetter( sal_Int32 nCharType )
134 {
135     return CharClass::isLetterType( nCharType ) &&
136             ( css::i18n::KCharacterType::UPPER & nCharType);
137 }
138 
lcl_IsUnsupportedUnicodeChar(CharClass const & rCC,const OUString & rTxt,sal_Int32 nStt,sal_Int32 nEnd)139 static bool lcl_IsUnsupportedUnicodeChar( CharClass const & rCC, const OUString& rTxt,
140                                    sal_Int32 nStt, sal_Int32 nEnd )
141 {
142     for( ; nStt < nEnd; ++nStt )
143     {
144         css::i18n::UnicodeScript nScript = rCC.getScript( rTxt, nStt );
145         switch( nScript )
146         {
147             case css::i18n::UnicodeScript_kCJKRadicalsSupplement:
148             case css::i18n::UnicodeScript_kHangulJamo:
149             case css::i18n::UnicodeScript_kCJKSymbolPunctuation:
150             case css::i18n::UnicodeScript_kHiragana:
151             case css::i18n::UnicodeScript_kKatakana:
152             case css::i18n::UnicodeScript_kHangulCompatibilityJamo:
153             case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth:
154             case css::i18n::UnicodeScript_kCJKCompatibility:
155             case css::i18n::UnicodeScript_kCJKUnifiedIdeographsExtensionA:
156             case css::i18n::UnicodeScript_kCJKUnifiedIdeograph:
157             case css::i18n::UnicodeScript_kHangulSyllable:
158             case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph:
159             case css::i18n::UnicodeScript_kHalfwidthFullwidthForm:
160                 return true;
161             default: ; //do nothing
162         }
163     }
164     return false;
165 }
166 
lcl_IsSymbolChar(CharClass const & rCC,const OUString & rTxt,sal_Int32 nStt,sal_Int32 nEnd)167 static bool lcl_IsSymbolChar( CharClass const & rCC, const OUString& rTxt,
168                                   sal_Int32 nStt, sal_Int32 nEnd )
169 {
170     for( ; nStt < nEnd; ++nStt )
171     {
172         if( css::i18n::UnicodeType::PRIVATE_USE == rCC.getType( rTxt, nStt ))
173             return true;
174     }
175     return false;
176 }
177 
lcl_IsInArr(std::u16string_view arr,const sal_uInt32 c)178 static bool lcl_IsInArr(std::u16string_view arr, const sal_uInt32 c)
179 {
180     return std::any_of(arr.begin(), arr.end(), [c](const auto c1) { return c1 == c; });
181 }
182 
~SvxAutoCorrDoc()183 SvxAutoCorrDoc::~SvxAutoCorrDoc()
184 {
185 }
186 
187 // Called by the functions:
188 //  - FnCapitalStartWord
189 //  - FnCapitalStartSentence
190 // after the exchange of characters. Then the words, if necessary, can be inserted
191 // into the exception list.
SaveCpltSttWord(ACFlags,sal_Int32,const OUString &,sal_Unicode)192 void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&,
193                                         sal_Unicode )
194 {
195 }
196 
GetLanguage(sal_Int32) const197 LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32 ) const
198 {
199     return LANGUAGE_SYSTEM;
200 }
201 
GetAppLang()202 static const LanguageTag& GetAppLang()
203 {
204     return Application::GetSettings().GetLanguageTag();
205 }
206 
207 /// Never use an unresolved LANGUAGE_SYSTEM.
GetDocLanguage(const SvxAutoCorrDoc & rDoc,sal_Int32 nPos)208 static LanguageType GetDocLanguage( const SvxAutoCorrDoc& rDoc, sal_Int32 nPos )
209 {
210     LanguageType eLang = rDoc.GetLanguage( nPos );
211     if (eLang == LANGUAGE_SYSTEM)
212         eLang = GetAppLang().getLanguageType();     // the current work locale
213     return eLang;
214 }
215 
GetLocaleDataWrapper(LanguageType nLang)216 static LocaleDataWrapper& GetLocaleDataWrapper( LanguageType nLang )
217 {
218     static std::unique_ptr<LocaleDataWrapper> xLclDtWrp;
219     LanguageTag aLcl( nLang );
220     if (!xLclDtWrp || xLclDtWrp->getLoadedLanguageTag() != aLcl)
221         xLclDtWrp.reset(new LocaleDataWrapper(std::move(aLcl)));
222     return *xLclDtWrp;
223 }
GetIgnoreTranslWrapper()224 static TransliterationWrapper& GetIgnoreTranslWrapper()
225 {
226     static int bIsInit = 0;
227     static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(),
228                 TransliterationFlags::IGNORE_KANA |
229                 TransliterationFlags::IGNORE_WIDTH );
230     if( !bIsInit )
231     {
232         aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() );
233         bIsInit = 1;
234     }
235     return aWrp;
236 }
GetCollatorWrapper()237 static CollatorWrapper& GetCollatorWrapper()
238 {
239     static CollatorWrapper aCollWrp = []()
240     {
241         CollatorWrapper tmp( ::comphelper::getProcessComponentContext() );
242         tmp.loadDefaultCollator( GetAppLang().getLocale(), 0 );
243         return tmp;
244     }();
245     return aCollWrp;
246 }
247 
IsAutoCorrectChar(sal_Unicode cChar)248 bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar )
249 {
250     return  cChar == '\0' || cChar == '\t' || cChar == 0x0a ||
251             cChar == ' '  || cChar == '\'' || cChar == '\"' ||
252             cChar == '*'  || cChar == '_'  || cChar == '%' ||
253             cChar == '.'  || cChar == ','  || cChar == ';' ||
254             cChar == ':'  || cChar == '?' || cChar == '!' ||
255             cChar == '<'  || cChar == '>' ||
256             cChar == '/'  || cChar == '-';
257 }
258 
259 namespace
260 {
IsCompoundWordDelimChar(sal_Unicode cChar)261     bool IsCompoundWordDelimChar(sal_Unicode cChar)
262     {
263         return  cChar == '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar);
264     }
265 }
266 
NeedsHardspaceAutocorr(sal_Unicode cChar)267 bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar )
268 {
269     return cChar == '%' || cChar == ';' || cChar == ':'  || cChar == '?' || cChar == '!' ||
270         cChar == '/' /*case for the urls exception*/;
271 }
272 
GetDefaultFlags()273 ACFlags SvxAutoCorrect::GetDefaultFlags()
274 {
275     ACFlags nRet = ACFlags::Autocorrect
276                     | ACFlags::CapitalStartSentence
277                     | ACFlags::CapitalStartWord
278                     | ACFlags::ChgOrdinalNumber
279                     | ACFlags::ChgToEnEmDash
280                     | ACFlags::AddNonBrkSpace
281                     | ACFlags::TransliterateRTL
282                     | ACFlags::ChgAngleQuotes
283                     | ACFlags::ChgWeightUnderl
284                     | ACFlags::SetINetAttr
285                     | ACFlags::SetDOIAttr
286                     | ACFlags::ChgQuotes
287                     | ACFlags::SaveWordCplSttLst
288                     | ACFlags::SaveWordWordStartLst
289                     | ACFlags::CorrectCapsLock;
290     LanguageType eLang = GetAppLang().getLanguageType();
291     if( eLang.anyOf(
292         LANGUAGE_ENGLISH,
293         LANGUAGE_ENGLISH_US,
294         LANGUAGE_ENGLISH_UK,
295         LANGUAGE_ENGLISH_AUS,
296         LANGUAGE_ENGLISH_CAN,
297         LANGUAGE_ENGLISH_NZ,
298         LANGUAGE_ENGLISH_EIRE,
299         LANGUAGE_ENGLISH_SAFRICA,
300         LANGUAGE_ENGLISH_JAMAICA,
301         LANGUAGE_ENGLISH_CARIBBEAN))
302         nRet &= ~ACFlags(ACFlags::ChgQuotes|ACFlags::ChgSglQuotes);
303     return nRet;
304 }
305 
306 constexpr sal_Unicode cEmDash = 0x2014;
307 constexpr sal_Unicode cEnDash = 0x2013;
308 constexpr OUString sEmDash(u"\u2014"_ustr);
309 constexpr OUString sEnDash(u"\u2013"_ustr);
310 constexpr sal_Unicode cApostrophe = 0x2019;
311 constexpr sal_Unicode cLeftDoubleAngleQuote = 0xAB;
312 constexpr sal_Unicode cRightDoubleAngleQuote = 0xBB;
313 constexpr sal_Unicode cLeftSingleAngleQuote = 0x2039;
314 constexpr sal_Unicode cRightSingleAngleQuote = 0x203A;
315 // stop characters for searching preceding quotes
316 // (the first character is also the opening quote we are looking for)
317 const sal_Unicode aStopDoubleAngleQuoteStart[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,,
318 const sal_Unicode aStopDoubleAngleQuoteEnd[] = { cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0x201D, 0x201E, 0 }; // preceding >>
319 // preceding << for Romanian, handle also alternative primary closing quotation mark U+201C
320 const sal_Unicode aStopDoubleAngleQuoteEndRo[] = { cLeftDoubleAngleQuote, cRightDoubleAngleQuote, 0x201D, 0x201E, 0x201C, 0 };
321 const sal_Unicode aStopSingleQuoteEnd[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 };
322 const sal_Unicode aStopSingleQuoteEndRuUa[] = { 0x201E, 0x201C, cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0 };
323 
SvxAutoCorrect(OUString aShareAutocorrFile,OUString aUserAutocorrFile)324 SvxAutoCorrect::SvxAutoCorrect( OUString aShareAutocorrFile,
325                                 OUString aUserAutocorrFile )
326     : sShareAutoCorrFile(std::move( aShareAutocorrFile ))
327     , sUserAutoCorrFile(std::move( aUserAutocorrFile ))
328     , eCharClassLang( LANGUAGE_DONTKNOW )
329     , nFlags(SvxAutoCorrect::GetDefaultFlags())
330     , cStartDQuote( 0 )
331     , cEndDQuote( 0 )
332     , cStartSQuote( 0 )
333     , cEndSQuote( 0 )
334 {
335 }
336 
SvxAutoCorrect(const SvxAutoCorrect & rCpy)337 SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy )
338     : sShareAutoCorrFile( rCpy.sShareAutoCorrFile )
339     , sUserAutoCorrFile( rCpy.sUserAutoCorrFile )
340     , aSwFlags( rCpy.aSwFlags )
341     , eCharClassLang(rCpy.eCharClassLang)
342     , nFlags( rCpy.nFlags & ~ACFlags(ACFlags::ChgWordLstLoad|ACFlags::CplSttLstLoad|ACFlags::WordStartLstLoad))
343     , cStartDQuote( rCpy.cStartDQuote )
344     , cEndDQuote( rCpy.cEndDQuote )
345     , cStartSQuote( rCpy.cStartSQuote )
346     , cEndSQuote( rCpy.cEndSQuote )
347 {
348 }
349 
350 
~SvxAutoCorrect()351 SvxAutoCorrect::~SvxAutoCorrect()
352 {
353 }
354 
GetCharClass_(LanguageType eLang)355 void SvxAutoCorrect::GetCharClass_( LanguageType eLang )
356 {
357     moCharClass.emplace( LanguageTag( eLang) );
358     eCharClassLang = eLang;
359 }
360 
SetAutoCorrFlag(ACFlags nFlag,bool bOn)361 void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag, bool bOn )
362 {
363     ACFlags nOld = nFlags;
364     nFlags = bOn ? nFlags | nFlag
365                  : nFlags & ~nFlag;
366 
367     if( !bOn )
368     {
369         if( (nOld & ACFlags::CapitalStartSentence) != (nFlags & ACFlags::CapitalStartSentence) )
370             nFlags &= ~ACFlags::CplSttLstLoad;
371         if( (nOld & ACFlags::CapitalStartWord) != (nFlags & ACFlags::CapitalStartWord) )
372             nFlags &= ~ACFlags::WordStartLstLoad;
373         if( (nOld & ACFlags::Autocorrect) != (nFlags & ACFlags::Autocorrect) )
374             nFlags &= ~ACFlags::ChgWordLstLoad;
375     }
376 }
377 
378 
379 // Correct TWo INitial CApitals
FnCapitalStartWord(SvxAutoCorrDoc & rDoc,const OUString & rTxt,sal_Int32 nSttPos,sal_Int32 nEndPos,LanguageType eLang)380 void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
381                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
382                                     LanguageType eLang )
383 {
384     CharClass& rCC = GetCharClass( eLang );
385 
386     // Delete all non alphanumeric. Test the characters at the beginning/end of
387     // the word ( recognizes: "(min.", "/min.", and so on.)
388     for( ; nSttPos < nEndPos; ++nSttPos )
389         if( rCC.isLetterNumeric( rTxt, nSttPos ))
390             break;
391     for( ; nSttPos < nEndPos; --nEndPos )
392         if( rCC.isLetterNumeric( rTxt, nEndPos - 1 ))
393             break;
394 
395     // Is the word a compounded word separated by delimiters?
396     // If so, keep track of all delimiters so each constituent
397     // word can be checked for two initial capital letters.
398     std::deque<sal_Int32> aDelimiters;
399 
400     // Always check for two capitals at the beginning
401     // of the entire word, so start at nSttPos.
402     aDelimiters.push_back(nSttPos);
403 
404     // Find all compound word delimiters
405     for (sal_Int32 n = nSttPos; n < nEndPos; ++n)
406     {
407         if (IsCompoundWordDelimChar(rTxt[ n ]))
408         {
409             aDelimiters.push_back( n + 1 ); // Get position of char after delimiter
410         }
411     }
412 
413     // Decide where to put the terminating delimiter.
414     // If the last AutoCorrect char was a newline, then the AutoCorrect
415     // char will not be included in rTxt.
416     // If the last AutoCorrect char was not a newline, then the AutoCorrect
417     // character will be the last character in rTxt.
418     if (!IsCompoundWordDelimChar(rTxt[nEndPos-1]))
419         aDelimiters.push_back(nEndPos);
420 
421     // Iterate through the word and all words that compose it.
422     // Two capital letters at the beginning of word?
423     for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI)
424     {
425         nSttPos = aDelimiters[nI];
426         nEndPos = aDelimiters[nI + 1];
427 
428         if( nSttPos+2 < nEndPos &&
429             IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) &&
430             IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) &&
431             // Is the third character a lower case
432             IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) &&
433             // Do not replace special attributes
434             0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ])
435         {
436             // test if the word is in an exception list
437             OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 ));
438             if( !FindInWordStartExceptList(eLang, sWord) )
439             {
440                 // Check that word isn't correctly spelt before correcting:
441                 css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller =
442                     LinguMgr::GetSpellChecker();
443                 if( xSpeller->hasLanguage(static_cast<sal_uInt16>(eLang)) )
444                 {
445                     Sequence< css::beans::PropertyValue > aEmptySeq;
446                     if (xSpeller->isValid(sWord, static_cast<sal_uInt16>(eLang), aEmptySeq))
447                     {
448                         return;
449                     }
450                 }
451                 sal_Unicode cSave = rTxt[ nSttPos ];
452                 OUString sChar = rCC.lowercase( OUString(cSave) );
453                 if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ))
454                 {
455                     if( ACFlags::SaveWordWordStartLst & nFlags )
456                         rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave );
457                 }
458             }
459         }
460     }
461 }
462 
463 // Format ordinal numbers suffixes (1st -> 1^st)
FnChgOrdinalNumber(SvxAutoCorrDoc & rDoc,const OUString & rTxt,sal_Int32 nSttPos,sal_Int32 nEndPos,LanguageType eLang)464 bool SvxAutoCorrect::FnChgOrdinalNumber(
465     SvxAutoCorrDoc& rDoc, const OUString& rTxt,
466     sal_Int32 nSttPos, sal_Int32 nEndPos,
467     LanguageType eLang)
468 {
469     // 1st, 2nd, 3rd, 4 - 0th
470     // 201th or 201st
471     // 12th or 12nd
472     bool bChg = false;
473 
474     // In some languages ordinal suffixes should never be
475     // changed to superscript. Let's break for those languages.
476     if (!eLang.anyOf(
477          LANGUAGE_CATALAN,              // tdf#156792
478          LANGUAGE_CATALAN_VALENCIAN,
479          LANGUAGE_SWEDISH,
480          LANGUAGE_SWEDISH_FINLAND))
481     {
482         CharClass& rCC = GetCharClass(eLang);
483 
484         for (; nSttPos < nEndPos; ++nSttPos)
485             if (!lcl_IsInArr(sImplSttSkipChars, rTxt[nSttPos]))
486                 break;
487         for (; nSttPos < nEndPos; --nEndPos)
488             if (!lcl_IsInArr(sImplEndSkipChars, rTxt[nEndPos - 1]))
489                 break;
490 
491 
492         // Get the last number in the string to check
493         sal_Int32 nNumEnd = nEndPos;
494         bool bFoundEnd = false;
495         bool isValidNumber = true;
496         sal_Int32 i = nEndPos;
497         while (i > nSttPos)
498         {
499             i--;
500             bool isDigit = rCC.isDigit(rTxt, i);
501             if (bFoundEnd)
502                 isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i));
503 
504             if (isDigit && !bFoundEnd)
505             {
506                 bFoundEnd = true;
507                 nNumEnd = i;
508             }
509         }
510 
511         if (bFoundEnd && isValidNumber) {
512             sal_Int32 nNum = o3tl::toInt32(rTxt.subView(nSttPos, nNumEnd - nSttPos + 1));
513             std::u16string_view sEnd = rTxt.subView(nNumEnd + 1, nEndPos - nNumEnd - 1);
514 
515             // Check if the characters after that number correspond to the ordinal suffix
516             uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix
517                 = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext());
518 
519             uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale());
520 
521             // add extra suffixes for languages not handled by i18npool/ICU
522             if ( primary(eLang) == primary(LANGUAGE_PORTUGUESE) &&
523                             ( nEndPos == nNumEnd + 3 || nEndPos == nNumEnd + 4 ) &&
524                             ( sEnd[0] == 'a' || sEnd[0] == 'o' || sEnd[0] == 'r' ) )
525             {
526                auto aExtendedSuffixes = comphelper::sequenceToContainer< std::vector<OUString> >(aSuffixes);
527                aExtendedSuffixes.push_back(u"as"_ustr); // plural form of 'a'
528                aExtendedSuffixes.push_back(u"os"_ustr); // plural form of 'o'
529                aExtendedSuffixes.push_back(u"ra"_ustr); // alternative form of 'a'
530                aExtendedSuffixes.push_back(u"ro"_ustr); // alternative form of 'o'
531                aExtendedSuffixes.push_back(u"ras"_ustr); // alternative form of "as"
532                aExtendedSuffixes.push_back(u"ros"_ustr); // alternative form of "os"
533                aSuffixes = comphelper::containerToSequence(aExtendedSuffixes);
534             }
535 
536             for (OUString const & sSuffix : aSuffixes)
537             {
538                 if (sSuffix == sEnd)
539                 {
540                     // Check if the ordinal suffix has to be set as super script
541                     if (rCC.isLetter(sSuffix))
542                     {
543                         sal_Int32 nNumberChanged = 0;
544                         sal_Int32 nSuffixChanged = 0;
545                         // exceptions for Portuguese
546                         // add missing dot: 1a -> 1.ª
547                         // and remove optional 'r': 1ro -> 1.º
548                         if ( primary(eLang) == primary(LANGUAGE_PORTUGUESE) )
549                         {
550                             if ( sSuffix.startsWith("r") )
551                             {
552                                 rDoc.Delete( nNumEnd + 1, nNumEnd + 2 );
553                                 nSuffixChanged = -1;
554                             }
555                             rDoc.Insert( nNumEnd + 1, u"."_ustr );
556                             nNumberChanged = 1;
557                         }
558 
559                         // Do the change
560                         SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER,
561                             DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT);
562                         rDoc.SetAttr(nNumEnd + 1 + nNumberChanged,
563                             nEndPos + nNumberChanged + nSuffixChanged,
564                             SID_ATTR_CHAR_ESCAPEMENT,
565                             aSvxEscapementItem);
566                         bChg = true;
567                         break;
568                     }
569                 }
570             }
571         }
572     }
573     return bChg;
574 }
575 
576 // Replace dashes
FnChgToEnEmDash(SvxAutoCorrDoc & rDoc,const OUString & rTxt,sal_Int32 nSttPos,sal_Int32 nEndPos,LanguageType eLang)577 bool SvxAutoCorrect::FnChgToEnEmDash(
578                                 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
579                                 sal_Int32 nSttPos, sal_Int32 nEndPos,
580                                 LanguageType eLang )
581 {
582     bool bRet = false;
583     CharClass& rCC = GetCharClass( eLang );
584     if (eLang == LANGUAGE_SYSTEM)
585         eLang = GetAppLang().getLanguageType();
586     bool bAlwaysUseEmDash = (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN);
587 
588     // rTxt may refer to the frame text that will change in the calls to rDoc.Delete / rDoc.Insert;
589     // keep a local copy for later use
590     OUString aOrigTxt = rTxt;
591     sal_Int32 nFirstReplacementTextLengthChange = 0;
592 
593     // replace " - " or " --" with "enDash"
594     if( 1 < nSttPos && 1 <= nEndPos - nSttPos )
595     {
596         sal_Unicode cCh = rTxt[ nSttPos ];
597         if( '-' == cCh )
598         {
599             if( 1 < nEndPos - nSttPos &&
600                 ' ' == rTxt[ nSttPos-1 ] &&
601                 '-' == rTxt[ nSttPos+1 ])
602             {
603                 sal_Int32 n;
604                 for( n = nSttPos+2; n < nEndPos && lcl_IsInArr(
605                             sImplSttSkipChars,(cCh = rTxt[ n ]));
606                         ++n )
607                     ;
608 
609                 // found: " --[<AnySttChars>][A-z0-9]
610                 if( rCC.isLetterNumeric( OUString(cCh) ) )
611                 {
612                     for( n = nSttPos-1; n && lcl_IsInArr(
613                             sImplEndSkipChars,(cCh = rTxt[ --n ])); )
614                         ;
615 
616                     // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9]
617                     if( rCC.isLetterNumeric( OUString(cCh) ))
618                     {
619                         rDoc.Delete( nSttPos, nSttPos + 2 );
620                         rDoc.Insert( nSttPos, bAlwaysUseEmDash ? sEmDash : sEnDash );
621                         nFirstReplacementTextLengthChange = -1; // 2 ch -> 1 ch
622                         bRet = true;
623                     }
624                 }
625             }
626         }
627         else if( 3 < nSttPos &&
628                  ' ' == rTxt[ nSttPos-1 ] &&
629                  '-' == rTxt[ nSttPos-2 ])
630         {
631             sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2;
632             if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) )
633             {
634                 --nTmpPos;
635                 ++nLen;
636                 cCh = rTxt[ nTmpPos-1 ];
637             }
638             if( ' ' == cCh )
639             {
640                 for( n = nSttPos; n < nEndPos && lcl_IsInArr(
641                             sImplSttSkipChars,(cCh = rTxt[ n ]));
642                         ++n )
643                     ;
644 
645                 // found: " - [<AnySttChars>][A-z0-9]
646                 if( rCC.isLetterNumeric( OUString(cCh) ) )
647                 {
648                     cCh = ' ';
649                     for( n = nTmpPos-1; n && lcl_IsInArr(
650                             sImplEndSkipChars,(cCh = rTxt[ --n ])); )
651                             ;
652                     // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9]
653                     if (rCC.isLetterNumeric(OUString(cCh)) || lcl_IsInArr(u".!?", cCh))
654                     {
655                         rDoc.Delete( nTmpPos, nTmpPos + nLen );
656                         rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? sEmDash : sEnDash );
657                         nFirstReplacementTextLengthChange = 1 - nLen; // nLen ch -> 1 ch
658                         bRet = true;
659                     }
660                 }
661             }
662         }
663     }
664 
665     // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash"
666     // [0-9]--[0-9] double dash always replaced with "enDash"
667     // Finnish and Hungarian use enDash instead of emDash.
668     bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH);
669     if( 4 <= nEndPos - nSttPos )
670     {
671         std::u16string_view sTmpView( aOrigTxt.subView( nSttPos, nEndPos - nSttPos ) );
672         size_t nFndPos = sTmpView.find(u"--");
673         if (nFndPos > 0 && nFndPos < sTmpView.size() - 2)
674         {
675             // Use proper codepoints. Currently, CharClass::isLetterNumeric is broken, it
676             // uses the index *both* as code unit index (when checking it as ASCII), *and*
677             // as code point index (when passes to css::i18n::XCharacterClassification).
678             // Oh well... Anyway, single-codepoint strings will workaround it.
679             sal_Int32 nStart = nSttPos + nFndPos;
680             sal_uInt32 chStart = aOrigTxt.iterateCodePoints(&nStart, -1);
681             OUString sStart(&chStart, 1);
682             // No idea why sImplEndSkipChars is checked at start
683             if (rCC.isLetterNumeric(sStart, 0) || lcl_IsInArr(sImplEndSkipChars, chStart))
684             {
685                 sal_Int32 nEnd = nSttPos + nFndPos + 2;
686                 sal_uInt32 chEnd = aOrigTxt.iterateCodePoints(&nEnd, 1);
687                 OUString sEnd(&chEnd, 1);
688                 // No idea why sImplSttSkipChars is checked at end
689                 if (rCC.isLetterNumeric(sEnd, 0) || lcl_IsInArr(sImplSttSkipChars, chEnd))
690                 {
691                     nSttPos = nSttPos + nFndPos + nFirstReplacementTextLengthChange;
692                     rDoc.Delete(nSttPos, nSttPos + 2);
693                     rDoc.Insert(nSttPos,
694                                 (bEnDash || (rCC.isDigit(sStart, 0) && rCC.isDigit(sEnd, 0))
695                                      ? sEnDash
696                                      : sEmDash));
697                     bRet = true;
698                 }
699             }
700         }
701     }
702     return bRet;
703 }
704 
705 // Add non-breaking space before specific punctuation marks in French text
FnAddNonBrkSpace(SvxAutoCorrDoc & rDoc,std::u16string_view rTxt,sal_Int32 nEndPos,LanguageType eLang,bool & io_bNbspRunNext)706 sal_Int32 SvxAutoCorrect::FnAddNonBrkSpace(
707                                 SvxAutoCorrDoc& rDoc, std::u16string_view rTxt,
708                                 sal_Int32 nEndPos,
709                                 LanguageType eLang, bool& io_bNbspRunNext )
710 {
711     sal_Int32 nRet = -1;
712 
713     CharClass& rCC = GetCharClass( eLang );
714 
715     if ( rCC.getLanguageTag().getLanguage() == "fr" )
716     {
717         bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA");
718         OUString allChars = u":;?!%"_ustr;
719         OUString chars( allChars );
720         if ( bFrCA )
721             chars = ":";
722 
723         sal_Unicode cChar = rTxt[ nEndPos ];
724         bool bHasSpace = chars.indexOf( cChar ) != -1;
725         bool bIsSpecial = allChars.indexOf( cChar ) != -1;
726         if ( bIsSpecial )
727         {
728             // Get the last word delimiter position
729             sal_Int32 nSttWdPos = nEndPos;
730             bool bWasWordDelim = false;
731             while( nSttWdPos )
732             {
733                 bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]);
734                 if (bWasWordDelim)
735                     break;
736             }
737 
738             //See if the text is the start of a protocol string, e.g. have text of
739             //"http" see if it is the start of "http:" and if so leave it alone
740             size_t nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0);
741             size_t nProtocolLen = nEndPos - nSttWdPos + 1;
742             if (nIndex + nProtocolLen <= rTxt.size())
743             {
744                 if (INetURLObject::CompareProtocolScheme(rTxt.substr(nIndex, nProtocolLen)) != INetProtocol::NotValid)
745                     return -1;
746             }
747 
748             // Check the presence of "://" in the word
749             size_t nStrPos = rTxt.find( u"://", nSttWdPos + 1 );
750             if ( nStrPos == std::u16string_view::npos && nEndPos > 0 )
751             {
752                 // Check the previous char
753                 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
754                 if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' )
755                 {
756                     // Remove any previous normal space
757                     sal_Int32 nPos = nEndPos - 1;
758                     while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace )
759                     {
760                         if ( nPos == 0 ) break;
761                         nPos--;
762                         cPrevChar = rTxt[ nPos ];
763                     }
764 
765                     nPos++;
766                     if ( nEndPos - nPos > 0 )
767                         rDoc.Delete( nPos, nEndPos );
768 
769                     // Add the non-breaking space at the end pos
770                     if ( bHasSpace )
771                         rDoc.Insert( nPos, OUString(cNonBreakingSpace) );
772                     io_bNbspRunNext = true;
773                     nRet = nPos;
774                 }
775                 else if ( chars.indexOf( cPrevChar ) != -1 )
776                     io_bNbspRunNext = true;
777             }
778         }
779         else if ( cChar == '/' && nEndPos > 1 && static_cast<sal_Int32>(rTxt.size()) > (nEndPos - 1) )
780         {
781             // Remove the hardspace right before to avoid formatting URLs
782             sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
783             sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ];
784             if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace )
785             {
786                 rDoc.Delete( nEndPos - 2, nEndPos - 1 );
787                 nRet = nEndPos - 1;
788             }
789         }
790     }
791 
792     return nRet;
793 }
794 
795 // URL recognition
FnSetINetAttr(SvxAutoCorrDoc & rDoc,const OUString & rTxt,sal_Int32 nSttPos,sal_Int32 nEndPos,LanguageType eLang)796 bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
797                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
798                                     LanguageType eLang )
799 {
800     OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos,
801                                                 GetCharClass( eLang ) ));
802     bool bRet = !sURL.isEmpty();
803     if( bRet )          // so, set attribute:
804         rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
805     return bRet;
806 }
807 
808 // DOI citation recognition
FnSetDOIAttr(SvxAutoCorrDoc & rDoc,std::u16string_view rTxt,sal_Int32 nSttPos,sal_Int32 nEndPos,LanguageType eLang)809 bool SvxAutoCorrect::FnSetDOIAttr( SvxAutoCorrDoc& rDoc, std::u16string_view rTxt,
810                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
811                                     LanguageType eLang )
812 {
813     OUString sURL( URIHelper::FindFirstDOIInText( rTxt, nSttPos, nEndPos, GetCharClass( eLang ) ));
814     bool bRet = !sURL.isEmpty();
815     if( bRet )          // so, set attribute:
816         rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
817     return bRet;
818 }
819 
820 // Automatic *bold*, /italic/, -strikeout- and _underline_
FnChgWeightUnderl(SvxAutoCorrDoc & rDoc,const OUString & rTxt,sal_Int32 nEndPos)821 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
822                                         sal_Int32 nEndPos )
823 {
824     // Condition:
825     //  at the beginning:   _, *, / or ~ after Space with the following !Space
826     //  at the end:         _, *, / or ~ before Space (word delimiter?)
827 
828     sal_Unicode cInsChar = rTxt[ nEndPos ];  // underline, bold, italic or strikeout
829     if( ++nEndPos != rTxt.getLength() &&
830         !IsWordDelim( rTxt[ nEndPos ] ) )
831         return false;
832 
833     --nEndPos;
834 
835     bool bAlphaNum = false;
836     sal_Int32 nPos = nEndPos;
837     sal_Int32  nFndPos = -1;
838     CharClass& rCC = GetCharClass( LANGUAGE_SYSTEM );
839 
840     while( nPos )
841     {
842         switch( sal_Unicode c = rTxt[ --nPos ] )
843         {
844         case '_':
845         case '-':
846         case '/':
847         case '*':
848             if( c == cInsChar )
849             {
850                 if( bAlphaNum && nPos+1 < nEndPos && ( !nPos ||
851                     IsWordDelim( rTxt[ nPos-1 ])) &&
852                     !IsWordDelim( rTxt[ nPos+1 ]))
853                         nFndPos = nPos;
854                 else
855                     // Condition is not satisfied, so cancel
856                     nFndPos = -1;
857                 nPos = 0;
858             }
859             break;
860         default:
861             if( !bAlphaNum )
862                 bAlphaNum = rCC.isLetterNumeric( rTxt, nPos );
863         }
864     }
865 
866     if( -1 != nFndPos )
867     {
868         // first delete the Character at the end - this allows insertion
869         // of an empty hint in SetAttr which would be removed by Delete
870         // (fdo#62536, AUTOFMT in Writer)
871         rDoc.Delete( nEndPos, nEndPos + 1 );
872 
873         // Span the Attribute over the area
874         // the end.
875         if( '*' == cInsChar )           // Bold
876         {
877             SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT );
878             rDoc.SetAttr( nFndPos + 1, nEndPos,
879                           SID_ATTR_CHAR_WEIGHT,
880                           aSvxWeightItem);
881         }
882         else if( '/' == cInsChar )           // Italic
883         {
884             SvxPostureItem aSvxPostureItem( ITALIC_NORMAL, SID_ATTR_CHAR_POSTURE );
885             rDoc.SetAttr( nFndPos + 1, nEndPos,
886                           SID_ATTR_CHAR_POSTURE,
887                           aSvxPostureItem);
888         }
889         else if( '-' == cInsChar )           // Strikeout
890         {
891             SvxCrossedOutItem aSvxCrossedOutItem( STRIKEOUT_SINGLE, SID_ATTR_CHAR_STRIKEOUT );
892             rDoc.SetAttr( nFndPos + 1, nEndPos,
893                           SID_ATTR_CHAR_STRIKEOUT,
894                           aSvxCrossedOutItem);
895         }
896         else                            // Underline
897         {
898             SvxUnderlineItem aSvxUnderlineItem( LINESTYLE_SINGLE, SID_ATTR_CHAR_UNDERLINE );
899             rDoc.SetAttr( nFndPos + 1, nEndPos,
900                           SID_ATTR_CHAR_UNDERLINE,
901                           aSvxUnderlineItem);
902         }
903         rDoc.Delete( nFndPos, nFndPos + 1 );
904     }
905 
906     return -1 != nFndPos;
907 }
908 
909 // Capitalize first letter of every sentence
FnCapitalStartSentence(SvxAutoCorrDoc & rDoc,const OUString & rTxt,bool bNormalPos,sal_Int32 nSttPos,sal_Int32 nEndPos,LanguageType eLang)910 void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc& rDoc,
911                                     const OUString& rTxt, bool bNormalPos,
912                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
913                                     LanguageType eLang )
914 {
915 
916     if( rTxt.isEmpty() || nEndPos <= nSttPos )
917         return;
918 
919     CharClass& rCC = GetCharClass( eLang );
920     OUString aText( rTxt );
921     const sal_Unicode *pStart = aText.getStr(),
922                       *pStr = pStart + nEndPos,
923                       *pWordStt = nullptr,
924                       *pDelim = nullptr;
925 
926     bool bAtStart = false;
927     do {
928         --pStr;
929         if (rCC.isLetter(aText, pStr - pStart))
930         {
931             if( !pWordStt )
932                 pDelim = pStr+1;
933             pWordStt = pStr;
934         }
935         else if (pWordStt && !rCC.isDigit(aText, pStr - pStart))
936         {
937             if( (lcl_IsInArr( u"-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words
938                 pWordStt - 1 == pStr &&
939                 // Installation at beginning of paragraph. Replaced < by <= (#i38971#)
940                 (pStart + 1) <= pStr &&
941                 rCC.isLetter(aText, pStr-1 - pStart))
942                 pWordStt = --pStr;
943             else
944                 break;
945         }
946         bAtStart = (pStart == pStr);
947     } while( !bAtStart );
948 
949     if (!pWordStt)
950         return;    // no character to be replaced
951 
952 
953     if (rCC.isDigit(aText, pStr - pStart))
954         return; // already ok
955 
956     if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart)))
957         return; // already ok
958 
959     //See if the text is the start of a protocol string, e.g. have text of
960     //"http" see if it is the start of "http:" and if so leave it alone
961     sal_Int32 nIndex = pWordStt - pStart;
962     sal_Int32 nProtocolLen = pDelim - pWordStt + 1;
963     if (nIndex + nProtocolLen <= rTxt.getLength())
964     {
965         if (INetURLObject::CompareProtocolScheme(rTxt.subView(nIndex, nProtocolLen)) != INetProtocol::NotValid)
966             return; // already ok
967     }
968 
969     if (0x1 == *pWordStt || 0x2 == *pWordStt)
970         return; // already ok
971 
972     // Only capitalize, if string before specified characters is long enough
973     if( *pDelim && 2 >= pDelim - pWordStt &&
974         lcl_IsInArr( u".-)>", *pDelim ) )
975         return;
976 
977     // tdf#59666 don't capitalize single Greek letters (except in Greek texts)
978     if ( 1 == pDelim - pWordStt && 0x03B1 <= *pWordStt && *pWordStt <= 0x03C9 && eLang != LANGUAGE_GREEK )
979         return;
980 
981     if( !bAtStart ) // Still no beginning of a paragraph?
982     {
983         if (NonFieldWordDelim(*pStr))
984         {
985             for (;;)
986             {
987                 bAtStart = (pStart == pStr--);
988                 if (bAtStart || !NonFieldWordDelim(*pStr))
989                     break;
990             }
991         }
992         // Asian full stop, full width full stop, full width exclamation mark
993         // and full width question marks are treated as word delimiters
994         else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr &&
995                   0xFF1F != *pStr )
996             return; // no valid separator -> no replacement
997     }
998 
999     // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
1000     if (FindInWordStartExceptList(eLang, OUString(pWordStt, pDelim - pWordStt)))
1001         return;
1002 
1003     if( bAtStart )  // at the beginning of a paragraph?
1004     {
1005         // Check out the previous paragraph, if it exists.
1006         // If so, then check to paragraph separator at the end.
1007         OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos);
1008         if (!pPrevPara)
1009         {
1010             // valid separator -> replace
1011             OUString sChar( *pWordStt );
1012             sChar = rCC.titlecase(sChar); //see fdo#56740
1013             if (sChar != OUStringChar(*pWordStt))
1014                rDoc.ReplaceRange( pWordStt - pStart, 1, sChar );
1015             return;
1016         }
1017 
1018         aText = *pPrevPara;
1019         bAtStart = false;
1020         pStart = aText.getStr();
1021         pStr = pStart + aText.getLength();
1022 
1023         do {            // overwrite all blanks
1024             --pStr;
1025             if (!NonFieldWordDelim(*pStr))
1026                 break;
1027             bAtStart = (pStart == pStr);
1028         } while( !bAtStart );
1029 
1030         if( bAtStart )
1031             return;  // no valid separator -> no replacement
1032     }
1033 
1034     // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator.
1035     // all three can happen, but not more than once!
1036     const sal_Unicode* pExceptStt = nullptr;
1037     bool bContinue = true;
1038     Flags nFlag = Flags::NONE;
1039     do
1040     {
1041         switch (*pStr)
1042         {
1043             // Western and Asian full stop
1044             case '.':
1045             case 0x3002:
1046             case 0xFF0E:
1047             {
1048                 if (pStr >= pStart + 2 && *(pStr - 2) == '.')
1049                 {
1050                     //e.g. text "f.o.o. word": Now currently considering
1051                     //capitalizing word but second last character of
1052                     //previous word is a .  So probably last word is an
1053                     //anagram that ends in . and not truly the end of a
1054                     //previous sentence, so don't autocapitalize this word
1055                     return;
1056                 }
1057                 if (nFlag & Flags::FullStop)
1058                     return; // no valid separator -> no replacement
1059                 nFlag |= Flags::FullStop;
1060                 pExceptStt = pStr;
1061             }
1062             break;
1063             case '!':
1064             case 0xFF01:
1065             {
1066                 if (nFlag & Flags::ExclamationMark)
1067                     return; // no valid separator -> no replacement
1068                 nFlag |= Flags::ExclamationMark;
1069             }
1070             break;
1071             case '?':
1072             case 0xFF1F:
1073             {
1074                 if (nFlag & Flags::QuestionMark)
1075                     return; // no valid separator -> no replacement
1076                 nFlag |= Flags::QuestionMark;
1077             }
1078             break;
1079             default:
1080                 if (nFlag == Flags::NONE)
1081                     return; // no valid separator -> no replacement
1082                 else
1083                     bContinue = false;
1084                 break;
1085         }
1086 
1087         if (bContinue && pStr-- == pStart)
1088         {
1089             return; // no valid separator -> no replacement
1090         }
1091     } while (bContinue);
1092     if (Flags::FullStop != nFlag)
1093         pExceptStt = nullptr;
1094 
1095     // Only capitalize, if string is long enough
1096     if( 2 > ( pStr - pStart ) )
1097         return;
1098 
1099     if (!rCC.isLetterNumeric(aText, pStr-- - pStart))
1100     {
1101         bool bValid = false, bAlphaFnd = false;
1102         const sal_Unicode* pTmpStr = pStr;
1103         while( !bValid )
1104         {
1105             if( rCC.isDigit( aText, pTmpStr - pStart ) )
1106             {
1107                 bValid = true;
1108                 pStr = pTmpStr - 1;
1109             }
1110             else if( rCC.isLetter( aText, pTmpStr - pStart ) )
1111             {
1112                 if( bAlphaFnd )
1113                 {
1114                     bValid = true;
1115                     pStr = pTmpStr;
1116                 }
1117                 else
1118                     bAlphaFnd = true;
1119             }
1120             else if (bAlphaFnd || NonFieldWordDelim(*pTmpStr))
1121                 break;
1122 
1123             if( pTmpStr == pStart )
1124                 break;
1125 
1126             --pTmpStr;
1127         }
1128 
1129         if( !bValid )
1130             return;       // no valid separator -> no replacement
1131     }
1132 
1133     bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9';
1134 
1135     // Search for the beginning of the word
1136     while (!NonFieldWordDelim(*pStr))
1137     {
1138         if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) )
1139             bNumericOnly = false;
1140 
1141         if( pStart == pStr )
1142             break;
1143 
1144         --pStr;
1145     }
1146 
1147     if( bNumericOnly )      // consists of only numbers, then not
1148         return;
1149 
1150     if (NonFieldWordDelim(*pStr))
1151         ++pStr;
1152 
1153     OUString sWord;
1154 
1155     // check on the basis of the exception list
1156     if( pExceptStt )
1157     {
1158         sWord = OUString(pStr, pExceptStt - pStr + 1);
1159         if( FindInCplSttExceptList(eLang, sWord) )
1160             return;
1161 
1162         // Delete all non alphanumeric. Test the characters at the
1163         // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.)
1164         OUString sTmp( sWord );
1165         while( !sTmp.isEmpty() &&
1166                 !rCC.isLetterNumeric( sTmp, 0 ) )
1167             sTmp = sTmp.copy(1);
1168 
1169         // Remove all non alphanumeric characters towards the end up until
1170         // the last one.
1171         sal_Int32 nLen = sTmp.getLength();
1172         while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) )
1173             --nLen;
1174         if( nLen + 1 < sTmp.getLength() )
1175             sTmp = sTmp.copy( 0, nLen + 1 );
1176 
1177         if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() &&
1178             FindInCplSttExceptList(eLang, sTmp))
1179             return;
1180 
1181         if(FindInCplSttExceptList(eLang, sWord, true))
1182             return;
1183     }
1184 
1185     // Ok, then replace
1186     sal_Unicode cSave = *pWordStt;
1187     nSttPos = pWordStt - rTxt.getStr();
1188     OUString sChar = rCC.titlecase(OUString(cSave)); //see fdo#56740
1189     bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar );
1190 
1191     // Perhaps someone wants to have the word
1192     if( bRet && ACFlags::SaveWordCplSttLst & nFlags )
1193         rDoc.SaveCpltSttWord( ACFlags::CapitalStartSentence, nSttPos, sWord, cSave );
1194 }
1195 
1196 // Correct accidental use of cAPS LOCK key
FnCorrectCapsLock(SvxAutoCorrDoc & rDoc,const OUString & rTxt,sal_Int32 nSttPos,sal_Int32 nEndPos,LanguageType eLang)1197 bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1198                                         sal_Int32 nSttPos, sal_Int32 nEndPos,
1199                                         LanguageType eLang )
1200 {
1201     if (nEndPos - nSttPos < 2)
1202         // string must be at least 2-character long.
1203         return false;
1204 
1205     CharClass& rCC = GetCharClass( eLang );
1206 
1207     // Check the first 2 letters.
1208     if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) )
1209         return false;
1210 
1211     if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) )
1212         return false;
1213 
1214     OUStringBuffer aConverted;
1215     aConverted.append( rCC.uppercase(OUString(rTxt[nSttPos])) );
1216     aConverted.append( rCC.lowercase(OUString(rTxt[nSttPos+1])) );
1217 
1218     // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
1219     if (FindInWordStartExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos)))
1220         return false;
1221 
1222     for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i )
1223     {
1224         if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) )
1225             // A lowercase letter disqualifies the whole text.
1226             return false;
1227 
1228         if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) )
1229             // Another uppercase letter.  Convert it.
1230             aConverted.append( rCC.lowercase(OUString(rTxt[i])) );
1231         else
1232             // This is not an alphabetic letter.  Leave it as-is.
1233             aConverted.append( rTxt[i] );
1234     }
1235 
1236     // Replace the word.
1237     rDoc.Delete(nSttPos, nEndPos);
1238     rDoc.Insert(nSttPos, aConverted.makeStringAndClear());
1239 
1240     return true;
1241 }
1242 
1243 
GetQuote(sal_Unicode cInsChar,bool bSttQuote,LanguageType eLang) const1244 sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote,
1245                                         LanguageType eLang ) const
1246 {
1247     sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar
1248                                     ? GetStartDoubleQuote()
1249                                     : GetStartSingleQuote() )
1250                                    : ( '\"' == cInsChar
1251                                     ? GetEndDoubleQuote()
1252                                     : GetEndSingleQuote() );
1253     if( !cRet )
1254     {
1255         // then through the Language find the right character
1256         if( LANGUAGE_NONE == eLang )
1257             cRet = cInsChar;
1258         else
1259         {
1260             LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1261             OUString sRet( bSttQuote
1262                             ? ( '\"' == cInsChar
1263                                 ? rLcl.getDoubleQuotationMarkStart()
1264                                 : rLcl.getQuotationMarkStart() )
1265                             : ( '\"' == cInsChar
1266                                 ? rLcl.getDoubleQuotationMarkEnd()
1267                                 : rLcl.getQuotationMarkEnd() ));
1268             cRet = !sRet.isEmpty() ? sRet[0] : cInsChar;
1269         }
1270     }
1271     return cRet;
1272 }
1273 
InsertQuote(SvxAutoCorrDoc & rDoc,sal_Int32 nInsPos,sal_Unicode cInsChar,bool bSttQuote,bool bIns,LanguageType eLang,ACQuotes eType) const1274 void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos,
1275                                     sal_Unicode cInsChar, bool bSttQuote,
1276                                     bool bIns, LanguageType eLang, ACQuotes eType ) const
1277 {
1278     sal_Unicode cRet;
1279 
1280     if ( eType == ACQuotes::DoubleAngleQuote )
1281     {
1282         bool bSwiss = eLang == LANGUAGE_FRENCH_SWISS;
1283         // pressing " inside a quotation -> use second level angle quotes
1284         bool bLeftQuote = '\"' == cInsChar &&
1285                 // start position and Romanian OR
1286                 // not start position and Hungarian
1287                 bSttQuote == (eLang != LANGUAGE_HUNGARIAN);
1288         cRet = ( '<' == cInsChar || bLeftQuote )
1289                 ? ( bSwiss ? cLeftSingleAngleQuote : cLeftDoubleAngleQuote )
1290                 : ( bSwiss ? cRightSingleAngleQuote : cRightDoubleAngleQuote );
1291     }
1292     else if ( eType == ACQuotes::UseApostrophe )
1293         cRet = cApostrophe;
1294     else
1295         cRet = GetQuote( cInsChar, bSttQuote, eLang );
1296 
1297     OUString sChg( cInsChar );
1298     if( bIns )
1299         rDoc.Insert( nInsPos, sChg );
1300     else
1301         rDoc.Replace( nInsPos, sChg );
1302 
1303     sChg = OUString(cRet);
1304 
1305     if( eType == ACQuotes::NonBreakingSpace )
1306     {
1307         if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, OUStringChar(cNonBreakingSpace) ))
1308         {
1309             if( !bSttQuote )
1310                 ++nInsPos;
1311         }
1312     }
1313     else if( eType == ACQuotes::DoubleAngleQuote && cInsChar != '\"' )
1314     {
1315         rDoc.Delete( nInsPos-1, nInsPos);
1316         --nInsPos;
1317     }
1318 
1319     rDoc.Replace( nInsPos, sChg );
1320 
1321     // i' -> I' in English (last step for the Undo)
1322     if( eType == ACQuotes::CapitalizeIAm )
1323         rDoc.Replace( nInsPos-1, u"I"_ustr );
1324 }
1325 
GetQuote(SvxAutoCorrDoc const & rDoc,sal_Int32 nInsPos,sal_Unicode cInsChar,bool bSttQuote)1326 OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPos,
1327                                 sal_Unicode cInsChar, bool bSttQuote )
1328 {
1329     const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1330     sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang );
1331 
1332     OUString sRet(cRet);
1333 
1334     if( '\"' == cInsChar )
1335     {
1336         if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS)
1337         {
1338             if( bSttQuote )
1339                 sRet += " ";
1340             else
1341                 sRet = " " + sRet;
1342         }
1343     }
1344     return sRet;
1345 }
1346 
1347 // search preceding opening quote in the paragraph before the insert position
lcl_HasPrecedingChar(std::u16string_view rTxt,sal_Int32 nPos,const sal_Unicode sPrecedingChar,const sal_Unicode sStopChar,const sal_Unicode * aStopChars)1348 static bool lcl_HasPrecedingChar( std::u16string_view rTxt, sal_Int32 nPos,
1349                 const sal_Unicode sPrecedingChar, const sal_Unicode sStopChar, const sal_Unicode* aStopChars )
1350 {
1351     sal_Unicode cTmpChar;
1352 
1353     do {
1354         cTmpChar = rTxt[ --nPos ];
1355         if ( cTmpChar == sPrecedingChar )
1356             return true;
1357 
1358         if ( cTmpChar == sStopChar )
1359             return false;
1360 
1361         for ( const sal_Unicode* pCh = aStopChars; *pCh; ++pCh )
1362             if ( cTmpChar == *pCh )
1363                 return false;
1364 
1365     } while ( nPos > 0 );
1366 
1367     return false;
1368 }
1369 
1370 // WARNING: rText may become invalid, see comment below
DoAutoCorrect(SvxAutoCorrDoc & rDoc,const OUString & rTxt,sal_Int32 nInsPos,sal_Unicode cChar,bool bInsert,bool & io_bNbspRunNext,vcl::Window const * pFrameWin)1371 void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1372                                     sal_Int32 nInsPos, sal_Unicode cChar,
1373                                     bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin )
1374 {
1375     bool bIsNextRun = io_bNbspRunNext;
1376     io_bNbspRunNext = false;  // if it was set, then it has to be turned off
1377 
1378     do{                                 // only for middle check loop !!
1379         if( cChar )
1380         {
1381             // Prevent double space
1382             if( nInsPos && ' ' == cChar &&
1383                 IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace ) &&
1384                 ' ' == rTxt[ nInsPos - 1 ])
1385             {
1386                 break;
1387             }
1388 
1389             bool bSingle = '\'' == cChar;
1390             bool bIsReplaceQuote =
1391                         (IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('\"' == cChar )) ||
1392                         (IsAutoCorrFlag( ACFlags::ChgSglQuotes ) && bSingle );
1393             if( bIsReplaceQuote )
1394             {
1395                 bool bSttQuote = !nInsPos;
1396                 ACQuotes eType = ACQuotes::NONE;
1397                 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1398                 if (!bSttQuote)
1399                 {
1400                     sal_Unicode cPrev = rTxt[ nInsPos-1 ];
1401                     bSttQuote = NonFieldWordDelim(cPrev) ||
1402                         lcl_IsInArr( u"([{", cPrev ) ||
1403                         ( cEmDash == cPrev ) ||
1404                         ( cEnDash == cPrev );
1405                     // tdf#38394 use opening quotation mark << in French l'<<word>>
1406                     if ( !bSingle && !bSttQuote && cPrev == cApostrophe &&
1407                         primary(eLang) == primary(LANGUAGE_FRENCH) &&
1408                         ( ( ( nInsPos == 2 || ( nInsPos > 2 && IsWordDelim( rTxt[ nInsPos-3 ] ) ) ) &&
1409                                // abbreviated form of ce, de, je, la, le, ne, me, te, se or si
1410                                u"cdjlnmtsCDJLNMTS"_ustr.indexOf( rTxt[ nInsPos-2 ] ) > -1 ) ||
1411                           ( ( nInsPos == 3 || (nInsPos > 3 && IsWordDelim( rTxt[ nInsPos-4 ] ) ) ) &&
1412                                // abbreviated form of que
1413                                ( rTxt[ nInsPos-2 ] == 'u' || rTxt[ nInsPos-2 ] == 'U' ) &&
1414                                ( rTxt[ nInsPos-3 ] == 'q' || rTxt[ nInsPos-3 ] == 'Q' ) ) ) )
1415                     {
1416                         bSttQuote = true;
1417                     }
1418                     // tdf#108423 for capitalization of English i'm
1419                     else if ( bSingle && ( cPrev == 'i' ) &&
1420                         primary(eLang) == primary(LANGUAGE_ENGLISH) &&
1421                         ( nInsPos == 1 || IsWordDelim( rTxt[ nInsPos-2 ] ) ) )
1422                     {
1423                         eType = ACQuotes::CapitalizeIAm;
1424                     }
1425                     // tdf#133524 support >>Hungarian<< and <<Romanian>> secondary level quotations
1426                     else if ( !bSingle && nInsPos &&
1427                         ( ( eLang == LANGUAGE_HUNGARIAN &&
1428                             lcl_HasPrecedingChar( rTxt, nInsPos,
1429                                 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEnd[0],
1430                                 bSttQuote ? aStopDoubleAngleQuoteStart[1] : aStopDoubleAngleQuoteEnd[1],
1431                                 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEnd + 2 ) ) ||
1432                           ( eLang.anyOf(
1433                                 LANGUAGE_ROMANIAN,
1434                                 LANGUAGE_ROMANIAN_MOLDOVA ) &&
1435                             lcl_HasPrecedingChar( rTxt, nInsPos,
1436                                 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEndRo[0],
1437                                 bSttQuote ? aStopDoubleAngleQuoteStart[1] : aStopDoubleAngleQuoteEndRo[1],
1438                                 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEndRo + 2 ) ) ) )
1439                     {
1440                         LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1441                         // only if the opening double quotation mark is the default one
1442                         if ( rLcl.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart[0]) )
1443                             eType = ACQuotes::DoubleAngleQuote;
1444                     }
1445                     else if ( bSingle && nInsPos && !bSttQuote &&
1446                         // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic,
1447                         // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018.
1448                         // tdf#123786 the same for Russian and Ukrainian
1449                         ( eLang.anyOf (
1450                                  LANGUAGE_CZECH,
1451                                  LANGUAGE_GERMAN,
1452                                  LANGUAGE_GERMAN_SWISS,
1453                                  LANGUAGE_GERMAN_AUSTRIAN,
1454                                  LANGUAGE_GERMAN_LUXEMBOURG,
1455                                  LANGUAGE_GERMAN_LIECHTENSTEIN,
1456                                  LANGUAGE_ICELANDIC,
1457                                  LANGUAGE_SLOVAK,
1458                                  LANGUAGE_SLOVENIAN ) ) )
1459                     {
1460                         sal_Unicode sStartChar = GetStartSingleQuote();
1461                         sal_Unicode sEndChar = GetEndSingleQuote();
1462                         if ( !sStartChar || !sEndChar ) {
1463                             LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1464                             if ( !sStartChar ) sStartChar = rLcl.getQuotationMarkStart()[0];
1465                             if ( !sEndChar ) sEndChar = rLcl.getQuotationMarkStart()[0];
1466                         }
1467                         if ( !lcl_HasPrecedingChar( rTxt, nInsPos, sStartChar, sEndChar, aStopSingleQuoteEnd + 1 ) )
1468                         {
1469                             CharClass& rCC = GetCharClass( eLang );
1470                             if ( rCC.isLetter(rTxt, nInsPos-1) )
1471                             {
1472                                 eType = ACQuotes::UseApostrophe;
1473                             }
1474                         }
1475                     }
1476                     else if ( bSingle && nInsPos && !bSttQuote &&
1477                           ( eLang.anyOf (
1478                                  LANGUAGE_RUSSIAN,
1479                                  LANGUAGE_UKRAINIAN ) &&
1480                             !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEndRuUa[0], aStopSingleQuoteEndRuUa[1],  aStopSingleQuoteEndRuUa + 2 ) ) )
1481                     {
1482                         LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1483                         CharClass& rCC = GetCharClass( eLang );
1484                         if ( rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa[0]) &&
1485                              // use apostrophe only after letters, not after digits or punctuation
1486                              rCC.isLetter(rTxt, nInsPos-1) )
1487                         {
1488                             eType = ACQuotes::UseApostrophe;
1489                         }
1490                     }
1491                 }
1492 
1493                 if ( eType == ACQuotes::NONE && !bSingle &&
1494                     ( primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS ) )
1495                     eType = ACQuotes::NonBreakingSpace;
1496 
1497                 InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, eLang, eType );
1498                 break;
1499             }
1500             // tdf#133524 change "<<" and ">>" to double angle quotation marks
1501             else if ( IsAutoCorrFlag( ACFlags::ChgQuotes ) &&
1502                 IsAutoCorrFlag( ACFlags::ChgAngleQuotes ) &&
1503                 ('<' == cChar || '>' == cChar) &&
1504                 nInsPos > 0 && cChar == rTxt[ nInsPos-1 ] )
1505             {
1506                 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1507                 if ( eLang.anyOf(
1508                         LANGUAGE_CATALAN,              // primary level
1509                         LANGUAGE_CATALAN_VALENCIAN,    // primary level
1510                         LANGUAGE_FINNISH,              // alternative primary level
1511                         LANGUAGE_FRENCH_SWISS,         // second level
1512                         LANGUAGE_GALICIAN,             // primary level
1513                         LANGUAGE_HUNGARIAN,            // second level
1514                         LANGUAGE_POLISH,               // second level
1515                         LANGUAGE_PORTUGUESE,           // primary level
1516                         LANGUAGE_PORTUGUESE_BRAZILIAN, // primary level
1517                         LANGUAGE_ROMANIAN,             // second level
1518                         LANGUAGE_ROMANIAN_MOLDOVA,     // second level
1519                         LANGUAGE_SWEDISH,              // alternative primary level
1520                         LANGUAGE_SWEDISH_FINLAND,      // alternative primary level
1521                         LANGUAGE_UKRAINIAN,            // primary level
1522                         LANGUAGE_USER_ARAGONESE,       // primary level
1523                         LANGUAGE_USER_ASTURIAN ) ||    // primary level
1524                     primary(eLang) == primary(LANGUAGE_GERMAN) ||  // alternative primary level
1525                     primary(eLang) == primary(LANGUAGE_SPANISH) )  // primary level
1526                 {
1527                     InsertQuote( rDoc, nInsPos, cChar, false, bInsert, eLang, ACQuotes::DoubleAngleQuote );
1528                     break;
1529                 }
1530             }
1531 
1532             if( bInsert )
1533                 rDoc.Insert( nInsPos, OUString(cChar) );
1534             else
1535                 rDoc.Replace( nInsPos, OUString(cChar) );
1536 
1537             // Hardspaces autocorrection
1538             if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace ) )
1539             {
1540                 // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1541                 // and its length may change (even become shorter) if FnAddNonBrkSpace succeeds!
1542                 sal_Int32 nUpdatedPos = -1;
1543                 if (NeedsHardspaceAutocorr(cChar))
1544                     nUpdatedPos = FnAddNonBrkSpace( rDoc, rTxt, nInsPos, GetDocLanguage( rDoc, nInsPos ), io_bNbspRunNext );
1545                 if (nUpdatedPos >= 0)
1546                 {
1547                     nInsPos = nUpdatedPos;
1548                 }
1549                 else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) )
1550                 {
1551                     // Remove the NBSP if it wasn't an autocorrection
1552                     if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) &&
1553                             cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace )
1554                     {
1555                         // Look for the last HARD_SPACE
1556                         sal_Int32 nPos = nInsPos - 1;
1557                         bool bContinue = true;
1558                         while ( bContinue )
1559                         {
1560                             const sal_Unicode cTmpChar = rTxt[ nPos ];
1561                             if ( cTmpChar == cNonBreakingSpace )
1562                             {
1563                                 rDoc.Delete( nPos, nPos + 1 );
1564                                 bContinue = false;
1565                             }
1566                             else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 )
1567                                 bContinue = false;
1568                             nPos--;
1569                         }
1570                     }
1571                 }
1572             }
1573         }
1574 
1575         if( !nInsPos )
1576             break;
1577 
1578         sal_Int32 nPos = nInsPos - 1;
1579 
1580         if( IsWordDelim( rTxt[ nPos ]))
1581             break;
1582 
1583         // Set bold or underline automatically?
1584         if (('*' == cChar || '_' == cChar || '/' == cChar || '-' == cChar) && (nPos+1 < rTxt.getLength()))
1585         {
1586             if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl ) )
1587             {
1588                 FnChgWeightUnderl( rDoc, rTxt, nPos+1 );
1589             }
1590             break;
1591         }
1592 
1593         while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1594             ;
1595 
1596         // Found a Paragraph-start or a Blank, search for the word shortcut in
1597         // auto.
1598         sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
1599         if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1600             --nCapLttrPos;          // begin of paragraph and no blank
1601 
1602         const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1603         CharClass& rCC = GetCharClass( eLang );
1604 
1605         // no symbol characters
1606         if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos ))
1607             break;
1608 
1609         if( IsAutoCorrFlag( ACFlags::Autocorrect ) &&
1610             // tdf#134940 fix regression of arrow "-->" resulted by premature
1611             // replacement of "--" since '>' was added to IsAutoCorrectChar()
1612             '>' != cChar )
1613         {
1614             // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1615             // and becomes INVALID if ChgAutoCorrWord returns true!
1616             // => use aPara/pPara to create a valid copy of the string!
1617             OUString aPara;
1618             OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr;
1619 
1620             bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos,
1621                                                     *this, pPara );
1622             if( !bChgWord )
1623             {
1624                 sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos;
1625                 while( nCapLttrPos1 < nInsPos &&
1626                         lcl_IsInArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] )
1627                         )
1628                         ++nCapLttrPos1;
1629                 while( nCapLttrPos1 < nInsPos1 && nInsPos1 &&
1630                         lcl_IsInArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] )
1631                         )
1632                         --nInsPos1;
1633 
1634                 if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) &&
1635                     nCapLttrPos1 < nInsPos1 &&
1636                     rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara ))
1637                 {
1638                     bChgWord = true;
1639                     nCapLttrPos = nCapLttrPos1;
1640                 }
1641             }
1642 
1643             if( bChgWord )
1644             {
1645                 if( !aPara.isEmpty() )
1646                 {
1647                     sal_Int32 nEnd = nCapLttrPos;
1648                     while( nEnd < aPara.getLength() &&
1649                             !IsWordDelim( aPara[ nEnd ]))
1650                         ++nEnd;
1651 
1652                     // Capital letter at beginning of paragraph?
1653                     if( IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1654                     {
1655                         FnCapitalStartSentence( rDoc, aPara, false,
1656                                                 nCapLttrPos, nEnd, eLang );
1657                     }
1658 
1659                     if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1660                     {
1661                         FnChgToEnEmDash( rDoc, aPara, nCapLttrPos, nEnd, eLang );
1662                     }
1663                 }
1664                 break;
1665             }
1666         }
1667 
1668         if( IsAutoCorrFlag( ACFlags::TransliterateRTL ) && GetDocLanguage( rDoc, nInsPos ) == LANGUAGE_HUNGARIAN )
1669         {
1670             // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1671             // and becomes INVALID if TransliterateRTLWord returns true!
1672             if ( rDoc.TransliterateRTLWord( nCapLttrPos, nInsPos ) )
1673                 break;
1674         }
1675 
1676         if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber ) &&
1677                 (nInsPos >= 2 ) &&       // fdo#69762 avoid autocorrect for 2e-3
1678                 ( '-' != cChar || 'E' != rtl::toAsciiUpperCase(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) &&
1679                 FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
1680             ( IsAutoCorrFlag( ACFlags::SetINetAttr ) &&
1681                 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
1682                 FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
1683             ( IsAutoCorrFlag( ACFlags::SetDOIAttr ) &&
1684                 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
1685                 FnSetDOIAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
1686             ;
1687         else
1688         {
1689             bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK);
1690             bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos );
1691 
1692             if ( bLockKeyOn && IsAutoCorrFlag( ACFlags::CorrectCapsLock ) &&
1693                  FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) )
1694             {
1695                 // Correct accidental use of cAPS LOCK key (do this only when
1696                 // the caps or shift lock key is pressed). Turn off the caps
1697                 // lock afterwards.
1698                 pFrameWin->SimulateKeyPress( KEY_CAPSLOCK );
1699             }
1700 
1701             // Capital letter at beginning of paragraph ?
1702             if( !bUnsupported &&
1703                 IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1704             {
1705                 FnCapitalStartSentence( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang );
1706             }
1707 
1708             // Two capital letters at beginning of word ??
1709             if( !bUnsupported &&
1710                 IsAutoCorrFlag( ACFlags::CapitalStartWord ) )
1711             {
1712                 FnCapitalStartWord( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1713             }
1714 
1715             if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1716             {
1717                 FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1718             }
1719         }
1720 
1721     } while( false );
1722 }
1723 
GetLanguageList_(LanguageType eLang)1724 SvxAutoCorrectLanguageLists& SvxAutoCorrect::GetLanguageList_(
1725                                                         LanguageType eLang )
1726 {
1727     LanguageTag aLanguageTag( eLang);
1728     if (m_aLangTable.find(aLanguageTag) == m_aLangTable.end())
1729         (void)CreateLanguageFile(aLanguageTag);
1730     const auto iter = m_aLangTable.find(aLanguageTag);
1731     assert(iter != m_aLangTable.end());
1732     return iter->second;
1733 }
1734 
SaveCplSttExceptList(LanguageType eLang)1735 void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang )
1736 {
1737     auto const iter = m_aLangTable.find(LanguageTag(eLang));
1738     if (iter != m_aLangTable.end())
1739         iter->second.SaveCplSttExceptList();
1740     else
1741     {
1742         SAL_WARN("editeng", "Save an empty list? ");
1743     }
1744 }
1745 
SaveWordStartExceptList(LanguageType eLang)1746 void SvxAutoCorrect::SaveWordStartExceptList(LanguageType eLang)
1747 {
1748     auto const iter = m_aLangTable.find(LanguageTag(eLang));
1749     if (iter != m_aLangTable.end())
1750         iter->second.SaveWordStartExceptList();
1751     else
1752     {
1753         SAL_WARN("editeng", "Save an empty list? ");
1754     }
1755 }
1756 
1757 // Adds a single word. The list will immediately be written to the file!
AddCplSttException(const OUString & rNew,LanguageType eLang)1758 bool SvxAutoCorrect::AddCplSttException( const OUString& rNew,
1759                                         LanguageType eLang )
1760 {
1761     SvxAutoCorrectLanguageLists* pLists = nullptr;
1762     // either the right language is present or it will be this in the general list
1763     auto iter = m_aLangTable.find(LanguageTag(eLang));
1764     if (iter != m_aLangTable.end())
1765         pLists = &iter->second;
1766     else
1767     {
1768         LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1769         iter = m_aLangTable.find(aLangTagUndetermined);
1770         if (iter != m_aLangTable.end())
1771             pLists = &iter->second;
1772         else if(CreateLanguageFile(aLangTagUndetermined))
1773         {
1774             iter = m_aLangTable.find(aLangTagUndetermined);
1775             assert(iter != m_aLangTable.end());
1776             pLists = &iter->second;
1777         }
1778     }
1779     OSL_ENSURE(pLists, "No auto correction data");
1780     return pLists && pLists->AddToCplSttExceptList(rNew);
1781 }
1782 
1783 // Adds a single word. The list will immediately be written to the file!
AddWordStartException(const OUString & rNew,LanguageType eLang)1784 bool SvxAutoCorrect::AddWordStartException( const OUString& rNew,
1785                                          LanguageType eLang )
1786 {
1787     SvxAutoCorrectLanguageLists* pLists = nullptr;
1788     //either the right language is present or it is set in the general list
1789     auto iter = m_aLangTable.find(LanguageTag(eLang));
1790     if (iter != m_aLangTable.end())
1791         pLists = &iter->second;
1792     else
1793     {
1794         LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1795         iter = m_aLangTable.find(aLangTagUndetermined);
1796         if (iter != m_aLangTable.end())
1797             pLists = &iter->second;
1798         else if(CreateLanguageFile(aLangTagUndetermined))
1799         {
1800             iter = m_aLangTable.find(aLangTagUndetermined);
1801             assert(iter != m_aLangTable.end());
1802             pLists = &iter->second;
1803         }
1804     }
1805     OSL_ENSURE(pLists, "No auto correction file!");
1806     return pLists && pLists->AddToWordStartExceptList(rNew);
1807 }
1808 
GetPrevAutoCorrWord(SvxAutoCorrDoc const & rDoc,const OUString & rTxt,sal_Int32 nPos)1809 OUString SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const OUString& rTxt,
1810                                              sal_Int32 nPos)
1811 {
1812     OUString sRet;
1813     if( !nPos )
1814         return sRet;
1815 
1816     sal_Int32 nEnd = nPos;
1817 
1818     // it must be followed by a blank or tab!
1819     if( ( nPos < rTxt.getLength() &&
1820         !IsWordDelim( rTxt[ nPos ])) ||
1821         IsWordDelim( rTxt[ --nPos ]))
1822         return sRet;
1823 
1824     while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1825         ;
1826 
1827     // Found a Paragraph-start or a Blank, search for the word shortcut in
1828     // auto.
1829     sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
1830     if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1831         --nCapLttrPos;          // Beginning of paragraph and no Blank!
1832 
1833     while( lcl_IsInArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) )
1834         if( ++nCapLttrPos >= nEnd )
1835             return sRet;
1836 
1837     if( 3 > nEnd - nCapLttrPos )
1838         return sRet;
1839 
1840     const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1841 
1842     CharClass& rCC = GetCharClass(eLang);
1843 
1844     if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnd ))
1845         return sRet;
1846 
1847     sRet = rTxt.copy( nCapLttrPos, nEnd - nCapLttrPos );
1848     return sRet;
1849 }
1850 
1851 // static
GetChunkForAutoText(std::u16string_view rTxt,const sal_Int32 nPos)1852 std::vector<OUString> SvxAutoCorrect::GetChunkForAutoText(std::u16string_view rTxt,
1853                                                           const sal_Int32 nPos)
1854 {
1855     constexpr sal_Int32 nMinLen = 3;
1856     constexpr sal_Int32 nMaxLen = 9;
1857     std::vector<OUString> aRes;
1858     if (nPos >= nMinLen)
1859     {
1860         sal_Int32 nBegin = std::max<sal_Int32>(nPos - nMaxLen, 0);
1861         // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation)
1862         if (nBegin > 0 && !IsWordDelim(rTxt[nBegin-1]))
1863         {
1864             while (nBegin + nMinLen <= nPos && !IsWordDelim(rTxt[nBegin]))
1865                 ++nBegin;
1866         }
1867         if (nBegin + nMinLen <= nPos)
1868         {
1869             OUString sRes( rTxt.substr(nBegin, nPos - nBegin) );
1870             aRes.push_back(sRes);
1871             bool bLastStartedWithDelim = IsWordDelim(sRes[0]);
1872             for (sal_Int32 i = 1; i <= sRes.getLength() - nMinLen; ++i)
1873             {
1874                 bool bAdd = bLastStartedWithDelim;
1875                 bLastStartedWithDelim = IsWordDelim(sRes[i]);
1876                 bAdd = bAdd || bLastStartedWithDelim;
1877                 if (bAdd)
1878                     aRes.push_back(sRes.copy(i));
1879             }
1880         }
1881     }
1882     return aRes;
1883 }
1884 
CreateLanguageFile(const LanguageTag & rLanguageTag,bool bNewFile)1885 bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile )
1886 {
1887     OSL_ENSURE(m_aLangTable.find(rLanguageTag) == m_aLangTable.end(), "Language already exists ");
1888 
1889     OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true ));
1890     OUString sShareDirFile( sUserDirFile );
1891 
1892     SvxAutoCorrectLanguageLists* pLists = nullptr;
1893 
1894     tools::Time nAktTime(tools::Time::SYSTEM);
1895 
1896     auto nFndPos = aLastFileTable.find(rLanguageTag);
1897     bool lastCheckLessThan2MinutesAgo = nFndPos != aLastFileTable.end();
1898     if (lastCheckLessThan2MinutesAgo)
1899     {
1900         const tools::Time nLastCheckTime(tools::Time::fromEncodedTime(nFndPos->second));
1901         lastCheckLessThan2MinutesAgo
1902             = nLastCheckTime < nAktTime && nAktTime - nLastCheckTime < tools::Time(0, 2);
1903     }
1904     if (lastCheckLessThan2MinutesAgo)
1905     {
1906         // no need to test the file, because the last check is not older then
1907         // 2 minutes.
1908         if( bNewFile )
1909         {
1910             sShareDirFile = sUserDirFile;
1911             auto itBool = m_aLangTable.emplace(std::piecewise_construct,
1912                             std::forward_as_tuple(rLanguageTag),
1913                             std::forward_as_tuple(*this, sShareDirFile, sUserDirFile));
1914             pLists = &itBool.first->second;
1915             aLastFileTable.erase(nFndPos);
1916         }
1917     }
1918     else if(
1919              ( FStatHelper::IsDocument( sUserDirFile ) ||
1920                FStatHelper::IsDocument( sShareDirFile =
1921                    GetAutoCorrFileName( rLanguageTag ) ) ||
1922                FStatHelper::IsDocument( sShareDirFile =
1923                    GetAutoCorrFileName( rLanguageTag, false, false, true) )
1924              ) ||
1925         ( sShareDirFile = sUserDirFile, bNewFile )
1926           )
1927     {
1928         auto itBool = m_aLangTable.emplace(std::piecewise_construct,
1929                         std::forward_as_tuple(rLanguageTag),
1930                         std::forward_as_tuple(*this, sShareDirFile, sUserDirFile));
1931         pLists = &itBool.first->second;
1932         if (nFndPos != aLastFileTable.end())
1933             aLastFileTable.erase(nFndPos);
1934     }
1935     else if( !bNewFile )
1936     {
1937         aLastFileTable[rLanguageTag] = nAktTime.GetTime();
1938     }
1939     return pLists != nullptr;
1940 }
1941 
PutText(const OUString & rShort,const OUString & rLong,LanguageType eLang)1942 bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong,
1943                                 LanguageType eLang )
1944 {
1945     LanguageTag aLanguageTag( eLang);
1946     if (auto const iter = m_aLangTable.find(aLanguageTag); iter != m_aLangTable.end())
1947         return iter->second.PutText(rShort, rLong);
1948     if (CreateLanguageFile(aLanguageTag))
1949     {
1950         auto const iter = m_aLangTable.find(aLanguageTag);
1951         assert (iter != m_aLangTable.end());
1952         return iter->second.PutText(rShort, rLong);
1953     }
1954     return false;
1955 }
1956 
MakeCombinedChanges(std::vector<SvxAutocorrWord> & aNewEntries,std::vector<SvxAutocorrWord> & aDeleteEntries,LanguageType eLang)1957 void SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries,
1958                                               std::vector<SvxAutocorrWord>& aDeleteEntries,
1959                                               LanguageType eLang )
1960 {
1961     LanguageTag aLanguageTag( eLang);
1962     auto iter = m_aLangTable.find(aLanguageTag);
1963     if (iter != m_aLangTable.end())
1964     {
1965         iter->second.MakeCombinedChanges( aNewEntries, aDeleteEntries );
1966     }
1967     else if(CreateLanguageFile( aLanguageTag ))
1968     {
1969         iter = m_aLangTable.find(aLanguageTag);
1970         assert(iter != m_aLangTable.end());
1971         iter->second.MakeCombinedChanges( aNewEntries, aDeleteEntries );
1972     }
1973 }
1974 
1975 //  - return the replacement text (only for SWG-Format, all other
1976 //    can be taken from the word list!)
GetLongText(const OUString &,OUString &)1977 bool SvxAutoCorrect::GetLongText( const OUString&, OUString& )
1978 {
1979     return false;
1980 }
1981 
refreshBlockList(const uno::Reference<embed::XStorage> &)1982 void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& )
1983 {
1984 }
1985 
1986 // Text with attribution (only the SWG - SWG format!)
PutText(const css::uno::Reference<css::embed::XStorage> &,const OUString &,const OUString &,SfxObjectShell &,OUString &)1987 bool SvxAutoCorrect::PutText( const css::uno::Reference < css::embed::XStorage >&,
1988                               const OUString&, const OUString&, SfxObjectShell&, OUString& )
1989 {
1990     return false;
1991 }
1992 
EncryptBlockName_Imp(std::u16string_view rName)1993 OUString EncryptBlockName_Imp(std::u16string_view rName)
1994 {
1995     OUStringBuffer aName;
1996     aName.append('#').append(rName);
1997     for (size_t nLen = rName.size(), nPos = 1; nPos < nLen; ++nPos)
1998     {
1999         if (lcl_IsInArr( u"!/:.\\", aName[nPos]))
2000             aName[nPos] &= 0x0f;
2001     }
2002     return aName.makeStringAndClear();
2003 }
2004 
2005 /* This code is copied from SwXMLTextBlocks::GeneratePackageName */
GeneratePackageName(std::u16string_view rShort,OUString & rPackageName)2006 static void GeneratePackageName ( std::u16string_view rShort, OUString& rPackageName )
2007 {
2008     OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7));
2009     OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US));
2010 
2011     for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos)
2012     {
2013         switch (aBuf[nPos])
2014         {
2015             case '!':
2016             case '/':
2017             case ':':
2018             case '.':
2019             case '\\':
2020             // tdf#156769 - escape the question mark in the storage name
2021             case '?':
2022                 aBuf[nPos] = '_';
2023                 break;
2024             default:
2025                 break;
2026         }
2027     }
2028 
2029     rPackageName = aBuf.makeStringAndClear();
2030 }
2031 
2032 static std::optional<SvxAutocorrWordList::WordSearchStatus>
lcl_SearchWordsInList(SvxAutoCorrectLanguageLists * pList,std::u16string_view rTxt,sal_Int32 & rStt,sal_Int32 nEndPos)2033 lcl_SearchWordsInList( SvxAutoCorrectLanguageLists* pList,
2034                        std::u16string_view rTxt,
2035                        sal_Int32& rStt, sal_Int32 nEndPos )
2036 {
2037     const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList();
2038     return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos );
2039 }
2040 
2041 // the search for the words in the substitution table
2042 std::optional<SvxAutocorrWordList::WordSearchStatus>
SearchWordsInList(std::u16string_view rTxt,sal_Int32 & rStt,sal_Int32 nEndPos,SvxAutoCorrDoc &,LanguageTag & rLang)2043 SvxAutoCorrect::SearchWordsInList(
2044     std::u16string_view rTxt, sal_Int32& rStt, sal_Int32 nEndPos,
2045     SvxAutoCorrDoc&, LanguageTag& rLang )
2046 {
2047     LanguageTag aLanguageTag( rLang);
2048     if( aLanguageTag.isSystemLocale() )
2049         aLanguageTag.reset( MsLangId::getConfiguredSystemLanguage());
2050 
2051     /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback
2052      * list instead? */
2053 
2054     // First search for eLang, then US-English -> English
2055     // and last in LANGUAGE_UNDETERMINED
2056     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
2057     {
2058         //the language is available - so bring it on
2059         const auto iter = m_aLangTable.find(aLanguageTag);
2060         assert(iter != m_aLangTable.end());
2061         SvxAutoCorrectLanguageLists & rList = iter->second;
2062         auto pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos );
2063         if( pRet )
2064         {
2065             rLang = aLanguageTag;
2066             return pRet;
2067         }
2068     }
2069 
2070     // If it still could not be found here, then keep on searching
2071     LanguageType eLang = aLanguageTag.getLanguageType();
2072     // the primary language for example EN
2073     aLanguageTag.reset(aLanguageTag.getLanguage());
2074     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2075     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2076                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2077                  CreateLanguageFile(aLanguageTag, false)))
2078     {
2079         //the language is available - so bring it on
2080         const auto iter = m_aLangTable.find(aLanguageTag);
2081         assert(iter != m_aLangTable.end());
2082         SvxAutoCorrectLanguageLists& rList = iter->second;
2083         auto pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos );
2084         if( pRet )
2085         {
2086             rLang = aLanguageTag;
2087             return pRet;
2088         }
2089     }
2090 
2091     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2092             CreateLanguageFile(aLanguageTag, false))
2093     {
2094         //the language is available - so bring it on
2095         const auto iter = m_aLangTable.find(aLanguageTag);
2096         assert(iter != m_aLangTable.end());
2097         SvxAutoCorrectLanguageLists& rList = iter->second;
2098         auto pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos );
2099         if( pRet )
2100         {
2101             rLang = std::move(aLanguageTag);
2102             return pRet;
2103         }
2104     }
2105     return std::nullopt;
2106 }
2107 
SearchWordsNext(std::u16string_view rTxt,sal_Int32 & rStt,sal_Int32 nEndPos,SvxAutocorrWordList::WordSearchStatus & rStatus)2108 bool SvxAutoCorrect::SearchWordsNext(
2109     std::u16string_view rTxt, sal_Int32& rStt, sal_Int32 nEndPos,
2110     SvxAutocorrWordList::WordSearchStatus& rStatus )
2111 {
2112     const SvxAutocorrWordList* pWordList = rStatus.GetAutocorrWordList();
2113     return pWordList->SearchWordsNext( rTxt, rStt, nEndPos, rStatus );
2114 }
2115 
FindInWordStartExceptList(LanguageType eLang,const OUString & sWord)2116 bool SvxAutoCorrect::FindInWordStartExceptList( LanguageType eLang,
2117                                              const OUString& sWord )
2118 {
2119     LanguageTag aLanguageTag( eLang);
2120 
2121     /* TODO-BCP47: again horrible ugliness */
2122 
2123     // First search for eLang, then primary language of eLang
2124     // and last in LANGUAGE_UNDETERMINED
2125 
2126     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
2127     {
2128         //the language is available - so bring it on
2129         const auto iter = m_aLangTable.find(aLanguageTag);
2130         assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2131         auto& rList = iter->second;
2132         if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() )
2133             return true;
2134     }
2135 
2136     // If it still could not be found here, then keep on searching
2137     // the primary language for example EN
2138     aLanguageTag.reset(aLanguageTag.getLanguage());
2139     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2140     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2141                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2142                  CreateLanguageFile(aLanguageTag, false)))
2143     {
2144         //the language is available - so bring it on
2145         const auto iter = m_aLangTable.find(aLanguageTag);
2146         assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2147         auto& rList = iter->second;
2148         if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() )
2149             return true;
2150     }
2151 
2152     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2153             CreateLanguageFile(aLanguageTag, false))
2154     {
2155         //the language is available - so bring it on
2156         const auto iter = m_aLangTable.find(aLanguageTag);
2157         assert(iter != m_aLangTable.end());
2158         auto& rList = iter->second;
2159         if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() )
2160             return true;
2161     }
2162     return false;
2163 }
2164 
lcl_FindAbbreviation(const SvStringsISortDtor * pList,const OUString & sWord)2165 static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord)
2166 {
2167     SvStringsISortDtor::const_iterator it = pList->find(u"~"_ustr);
2168     SvStringsISortDtor::size_type nPos = it - pList->begin();
2169     if( nPos < pList->size() )
2170     {
2171         OUString sLowerWord(sWord.toAsciiLowerCase());
2172         OUString sAbr;
2173         for( SvStringsISortDtor::size_type n = nPos; n < pList->size(); ++n )
2174         {
2175             sAbr = (*pList)[ n ];
2176             if (sAbr[0] != '~')
2177                 break;
2178             // ~ and ~. are not allowed!
2179             if( 2 < sAbr.getLength() && sAbr.getLength() - 1 <= sWord.getLength() )
2180             {
2181                 OUString sLowerAbk(sAbr.toAsciiLowerCase());
2182                 for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;)
2183                 {
2184                     if( !--i )      // agrees
2185                         return true;
2186 
2187                     if( sLowerAbk[i] != sLowerWord[--ii])
2188                         break;
2189                 }
2190             }
2191         }
2192     }
2193     OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ),
2194             "Wrongly sorted exception list?" );
2195     return false;
2196 }
2197 
FindInCplSttExceptList(LanguageType eLang,const OUString & sWord,bool bAbbreviation)2198 bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang,
2199                                 const OUString& sWord, bool bAbbreviation)
2200 {
2201     LanguageTag aLanguageTag( eLang);
2202 
2203     /* TODO-BCP47: did I mention terrible horrible ugliness? */
2204 
2205     // First search for eLang, then primary language of eLang
2206     // and last in LANGUAGE_UNDETERMINED
2207 
2208     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
2209     {
2210         //the language is available - so bring it on
2211         const auto iter = m_aLangTable.find(aLanguageTag);
2212         assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2213         const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList();
2214         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2215             return true;
2216     }
2217 
2218     // If it still could not be found here, then keep on searching
2219     // the primary language for example EN
2220     aLanguageTag.reset(aLanguageTag.getLanguage());
2221     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2222     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2223                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2224                  CreateLanguageFile(aLanguageTag, false)))
2225     {
2226         //the language is available - so bring it on
2227         const auto iter = m_aLangTable.find(aLanguageTag);
2228         assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2229         const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList();
2230         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2231             return true;
2232     }
2233 
2234     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2235             CreateLanguageFile(aLanguageTag, false))
2236     {
2237         //the language is available - so bring it on
2238         const auto iter = m_aLangTable.find(aLanguageTag);
2239         assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2240         const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList();
2241         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2242             return true;
2243     }
2244     return false;
2245 }
2246 
GetAutoCorrFileName(const LanguageTag & rLanguageTag,bool bNewFile,bool bTst,bool bUnlocalized) const2247 OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag,
2248                                             bool bNewFile, bool bTst, bool bUnlocalized ) const
2249 {
2250     OUString sRet, sExt( rLanguageTag.getBcp47() );
2251     if (bUnlocalized)
2252     {
2253         // we don't want variant, so we'll take "fr" instead of "fr-CA" for example
2254         std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false);
2255         if (!vecFallBackStrings.empty())
2256            sExt = vecFallBackStrings[0];
2257     }
2258 
2259     sExt = "_" + sExt + ".dat";
2260     if( bNewFile )
2261         sRet = sUserAutoCorrFile + sExt;
2262     else if( !bTst )
2263         sRet = sShareAutoCorrFile + sExt;
2264     else
2265     {
2266         // test first in the user directory - if not exist, then
2267         sRet = sUserAutoCorrFile + sExt;
2268         if( !FStatHelper::IsDocument( sRet ))
2269             sRet = sShareAutoCorrFile + sExt;
2270     }
2271     return sRet;
2272 }
2273 
SvxAutoCorrectLanguageLists(SvxAutoCorrect & rParent,OUString aShareAutoCorrectFile,OUString aUserAutoCorrectFile)2274 SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists(
2275                 SvxAutoCorrect& rParent,
2276                 OUString aShareAutoCorrectFile,
2277                 OUString aUserAutoCorrectFile)
2278 :   sShareAutoCorrFile(std::move( aShareAutoCorrectFile )),
2279     sUserAutoCorrFile(std::move( aUserAutoCorrectFile )),
2280     aModifiedDate( Date::EMPTY ),
2281     aModifiedTime( tools::Time::EMPTY ),
2282     aLastCheckTime( tools::Time::EMPTY ),
2283     rAutoCorrect(rParent),
2284     nFlags(ACFlags::NONE)
2285 {
2286 }
2287 
~SvxAutoCorrectLanguageLists()2288 SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists()
2289 {
2290 }
2291 
IsFileChanged_Imp()2292 bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp()
2293 {
2294     // Access the file system only every 2 minutes to check the date stamp
2295     bool bRet = false;
2296 
2297     tools::Time nMinTime( 0, 2 );
2298     tools::Time nAktTime( tools::Time::SYSTEM );
2299     if( aLastCheckTime <= nAktTime) // overflow?
2300         return false;
2301     nAktTime -= aLastCheckTime;
2302     if( nAktTime > nMinTime )     // min time past
2303     {
2304         Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY );
2305         if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2306                                             &aTstDate, &aTstTime ) &&
2307             ( aModifiedDate != aTstDate || aModifiedTime != aTstTime ))
2308         {
2309             bRet = true;
2310             // then remove all the lists fast!
2311             if( (ACFlags::CplSttLstLoad & nFlags) && pCplStt_ExcptLst )
2312             {
2313                 pCplStt_ExcptLst.reset();
2314             }
2315             if( (ACFlags::WordStartLstLoad & nFlags) && pWordStart_ExcptLst )
2316             {
2317                 pWordStart_ExcptLst.reset();
2318             }
2319             if( (ACFlags::ChgWordLstLoad & nFlags) && pAutocorr_List )
2320             {
2321                 pAutocorr_List.reset();
2322             }
2323             nFlags &= ~ACFlags(ACFlags::CplSttLstLoad | ACFlags::WordStartLstLoad | ACFlags::ChgWordLstLoad );
2324         }
2325         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2326     }
2327     return bRet;
2328 }
2329 
LoadXMLExceptList_Imp(std::unique_ptr<SvStringsISortDtor> & rpLst,const OUString & sStrmName,rtl::Reference<SotStorage> & rStg)2330 void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp(
2331                                         std::unique_ptr<SvStringsISortDtor>& rpLst,
2332                                         const OUString& sStrmName,
2333                                         rtl::Reference<SotStorage>& rStg)
2334 {
2335     if( rpLst )
2336         rpLst->clear();
2337     else
2338         rpLst.reset( new SvStringsISortDtor );
2339 
2340     {
2341         if( rStg.is() && rStg->IsStream( sStrmName ) )
2342         {
2343             rtl::Reference<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2344                 ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) );
2345             if( ERRCODE_NONE != xStrm->GetError())
2346             {
2347                 xStrm.clear();
2348                 rStg.clear();
2349                 RemoveStream_Imp( sStrmName );
2350             }
2351             else
2352             {
2353                 const uno::Reference< uno::XComponentContext >& xContext =
2354                     comphelper::getProcessComponentContext();
2355 
2356                 xml::sax::InputSource aParserInput;
2357                 aParserInput.sSystemId = sStrmName;
2358 
2359                 xStrm->Seek( 0 );
2360                 xStrm->SetBufferSize( 8 * 1024 );
2361                 aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm );
2362 
2363                 // get filter
2364                 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLExceptionListImport ( xContext, *rpLst );
2365 
2366                 // connect parser and filter
2367                 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create( xContext );
2368                 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2369                 xParser->setFastDocumentHandler( xFilter );
2370                 xParser->registerNamespace( u"http://openoffice.org/2001/block-list"_ustr, SvXMLAutoCorrectToken::NAMESPACE );
2371                 xParser->setTokenHandler( xTokenHandler );
2372 
2373                 // parse
2374                 try
2375                 {
2376                     xParser->parseStream( aParserInput );
2377                 }
2378                 catch( const xml::sax::SAXParseException& )
2379                 {
2380                     // re throw ?
2381                 }
2382                 catch( const xml::sax::SAXException& )
2383                 {
2384                     // re throw ?
2385                 }
2386                 catch( const io::IOException& )
2387                 {
2388                     // re throw ?
2389                 }
2390             }
2391         }
2392 
2393         // Set time stamp
2394         FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2395                                         &aModifiedDate, &aModifiedTime );
2396         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2397     }
2398 
2399 }
2400 
SaveExceptList_Imp(const SvStringsISortDtor & rLst,const OUString & sStrmName,rtl::Reference<SotStorage> const & rStg,bool bConvert)2401 void SvxAutoCorrectLanguageLists::SaveExceptList_Imp(
2402                             const SvStringsISortDtor& rLst,
2403                             const OUString& sStrmName,
2404                             rtl::Reference<SotStorage> const &rStg,
2405                             bool bConvert )
2406 {
2407     if( !rStg.is() )
2408         return;
2409 
2410     if( rLst.empty() )
2411     {
2412         rStg->Remove( sStrmName );
2413         rStg->Commit();
2414     }
2415     else
2416     {
2417         rtl::Reference<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2418                 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2419         if( xStrm.is() )
2420         {
2421             xStrm->SetSize( 0 );
2422             xStrm->SetBufferSize( 8192 );
2423             xStrm->SetProperty( u"MediaType"_ustr, Any(u"text/xml"_ustr) );
2424 
2425 
2426             const uno::Reference< uno::XComponentContext >& xContext =
2427                 comphelper::getProcessComponentContext();
2428 
2429             uno::Reference < xml::sax::XWriter > xWriter  = xml::sax::Writer::create(xContext);
2430             uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm );
2431             xWriter->setOutputStream(xOut);
2432 
2433             uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW);
2434             rtl::Reference< SvXMLExceptionListExport > xExp( new SvXMLExceptionListExport( xContext, rLst, sStrmName, xHandler ) );
2435 
2436             xExp->exportDoc( XML_BLOCK_LIST );
2437 
2438             xStrm->Commit();
2439             if( xStrm->GetError() == ERRCODE_NONE )
2440             {
2441                 xStrm.clear();
2442                 if (!bConvert)
2443                 {
2444                     rStg->Commit();
2445                     if( ERRCODE_NONE != rStg->GetError() )
2446                     {
2447                         rStg->Remove( sStrmName );
2448                         rStg->Commit();
2449                     }
2450                 }
2451             }
2452         }
2453     }
2454 }
2455 
LoadAutocorrWordList()2456 SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList()
2457 {
2458     if( pAutocorr_List )
2459         pAutocorr_List->DeleteAndDestroyAll();
2460     else
2461         pAutocorr_List.reset( new SvxAutocorrWordList() );
2462 
2463     try
2464     {
2465         uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ );
2466         uno::Reference < io::XStream > xStrm = xStg->openStreamElement( pXMLImplAutocorr_ListStr, embed::ElementModes::READ );
2467         const uno::Reference< uno::XComponentContext >& xContext = comphelper::getProcessComponentContext();
2468 
2469         xml::sax::InputSource aParserInput;
2470         aParserInput.sSystemId = pXMLImplAutocorr_ListStr;
2471         aParserInput.aInputStream = xStrm->getInputStream();
2472 
2473         // get parser
2474         uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext);
2475         SAL_INFO("editeng", "AutoCorrect Import" );
2476         uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List.get(), rAutoCorrect, xStg );
2477         uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2478 
2479         // connect parser and filter
2480         xParser->setFastDocumentHandler( xFilter );
2481         xParser->registerNamespace( u"http://openoffice.org/2001/block-list"_ustr, SvXMLAutoCorrectToken::NAMESPACE );
2482         xParser->setTokenHandler(xTokenHandler);
2483 
2484         // parse
2485         xParser->parseStream( aParserInput );
2486     }
2487     catch ( const uno::Exception& )
2488     {
2489         TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile);
2490     }
2491 
2492     // Set time stamp
2493     FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2494                                     &aModifiedDate, &aModifiedTime );
2495     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2496 
2497     return pAutocorr_List.get();
2498 }
2499 
GetAutocorrWordList()2500 const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList()
2501 {
2502     if( !( ACFlags::ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() )
2503     {
2504         LoadAutocorrWordList();
2505         if( !pAutocorr_List )
2506         {
2507             OSL_ENSURE( false, "No valid list" );
2508             pAutocorr_List.reset( new SvxAutocorrWordList() );
2509         }
2510         nFlags |= ACFlags::ChgWordLstLoad;
2511     }
2512     return pAutocorr_List.get();
2513 }
2514 
GetCplSttExceptList()2515 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList()
2516 {
2517     if( !( ACFlags::CplSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2518     {
2519         LoadCplSttExceptList();
2520         if( !pCplStt_ExcptLst )
2521         {
2522             OSL_ENSURE( false, "No valid list" );
2523             pCplStt_ExcptLst.reset( new SvStringsISortDtor );
2524         }
2525         nFlags |= ACFlags::CplSttLstLoad;
2526     }
2527     return pCplStt_ExcptLst.get();
2528 }
2529 
AddToCplSttExceptList(const OUString & rNew)2530 bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew)
2531 {
2532     bool bRet = false;
2533     if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second )
2534     {
2535         MakeUserStorage_Impl();
2536         rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);
2537 
2538         SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2539 
2540         xStg = nullptr;
2541         // Set time stamp
2542         FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2543                                             &aModifiedDate, &aModifiedTime );
2544         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2545         bRet = true;
2546     }
2547     return bRet;
2548 }
2549 
AddToWordStartExceptList(const OUString & rNew)2550 bool SvxAutoCorrectLanguageLists::AddToWordStartExceptList(const OUString& rNew)
2551 {
2552     bool bRet = false;
2553     if( !rNew.isEmpty() && GetWordStartExceptList()->insert( rNew ).second )
2554     {
2555         MakeUserStorage_Impl();
2556         rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);
2557 
2558         SaveExceptList_Imp( *pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );
2559 
2560         xStg = nullptr;
2561         // Set time stamp
2562         FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2563                                             &aModifiedDate, &aModifiedTime );
2564         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2565         bRet = true;
2566     }
2567     return bRet;
2568 }
2569 
LoadCplSttExceptList()2570 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList()
2571 {
2572     try
2573     {
2574         rtl::Reference<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2575         if( xStg.is() && xStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
2576             LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2577     }
2578     catch (const css::ucb::ContentCreationException&)
2579     {
2580     }
2581     return pCplStt_ExcptLst.get();
2582 }
2583 
SaveCplSttExceptList()2584 void SvxAutoCorrectLanguageLists::SaveCplSttExceptList()
2585 {
2586     MakeUserStorage_Impl();
2587     rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);
2588 
2589     SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2590 
2591     xStg = nullptr;
2592 
2593     // Set time stamp
2594     FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2595                                             &aModifiedDate, &aModifiedTime );
2596     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2597 }
2598 
LoadWordStartExceptList()2599 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWordStartExceptList()
2600 {
2601     try
2602     {
2603         rtl::Reference<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2604         if( xStg.is() && xStg->IsContained( pXMLImplWordStart_ExcptLstStr ) )
2605             LoadXMLExceptList_Imp( pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );
2606     }
2607     catch (const css::ucb::ContentCreationException &)
2608     {
2609         TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWordStartExceptList");
2610     }
2611     return pWordStart_ExcptLst.get();
2612 }
2613 
SaveWordStartExceptList()2614 void SvxAutoCorrectLanguageLists::SaveWordStartExceptList()
2615 {
2616     MakeUserStorage_Impl();
2617     rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);
2618 
2619     SaveExceptList_Imp( *pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );
2620 
2621     xStg = nullptr;
2622     // Set time stamp
2623     FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2624                                             &aModifiedDate, &aModifiedTime );
2625     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2626 }
2627 
GetWordStartExceptList()2628 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWordStartExceptList()
2629 {
2630     if( !( ACFlags::WordStartLstLoad & nFlags ) || IsFileChanged_Imp() )
2631     {
2632         LoadWordStartExceptList();
2633         if( !pWordStart_ExcptLst )
2634         {
2635             OSL_ENSURE( false, "No valid list" );
2636             pWordStart_ExcptLst.reset( new SvStringsISortDtor );
2637         }
2638         nFlags |= ACFlags::WordStartLstLoad;
2639     }
2640     return pWordStart_ExcptLst.get();
2641 }
2642 
RemoveStream_Imp(const OUString & rName)2643 void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName )
2644 {
2645     if( sShareAutoCorrFile != sUserAutoCorrFile )
2646     {
2647         rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);
2648         if( xStg.is() && ERRCODE_NONE == xStg->GetError() &&
2649             xStg->IsStream( rName ) )
2650         {
2651             xStg->Remove( rName );
2652             xStg->Commit();
2653 
2654             xStg = nullptr;
2655         }
2656     }
2657 }
2658 
MakeUserStorage_Impl()2659 void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl()
2660 {
2661     // The conversion needs to happen if the file is already in the user
2662     // directory and is in the old format. Additionally it needs to
2663     // happen when the file is being copied from share to user.
2664 
2665     bool bError = false, bConvert = false, bCopy = false;
2666     INetURLObject aDest;
2667     INetURLObject aSource;
2668 
2669     if (sUserAutoCorrFile != sShareAutoCorrFile )
2670     {
2671         aSource = INetURLObject ( sShareAutoCorrFile );
2672         aDest = INetURLObject ( sUserAutoCorrFile );
2673         if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) )
2674         {
2675             aDest.SetExtension ( u"bak" );
2676             bConvert = true;
2677         }
2678         bCopy = true;
2679     }
2680     else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) )
2681     {
2682         aSource = INetURLObject ( sUserAutoCorrFile );
2683         aDest = INetURLObject ( sUserAutoCorrFile );
2684         aDest.SetExtension ( u"bak" );
2685         bCopy = bConvert = true;
2686     }
2687     if (bCopy)
2688     {
2689         try
2690         {
2691             OUString sMain(aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ));
2692             sal_Int32 nSlashPos = sMain.lastIndexOf('/');
2693             sMain = sMain.copy(0, nSlashPos);
2694             ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2695             TransferInfo aInfo;
2696             aInfo.NameClash = NameClash::OVERWRITE;
2697             aInfo.NewTitle = aDest.GetLastName();
2698             aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DecodeMechanism::ToIUri );
2699             aInfo.MoveData  = false;
2700             aNewContent.executeCommand( u"transfer"_ustr, Any(aInfo));
2701         }
2702         catch (...)
2703         {
2704             bError = true;
2705         }
2706     }
2707     if (bConvert && !bError)
2708     {
2709         rtl::Reference<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), StreamMode::READ );
2710         rtl::Reference<SotStorage> xDstStg = new SotStorage(sUserAutoCorrFile, StreamMode::WRITE);
2711 
2712         if( xSrcStg.is() && xDstStg.is() )
2713         {
2714             std::unique_ptr<SvStringsISortDtor> pTmpWordList;
2715 
2716             if (xSrcStg->IsContained( pXMLImplWordStart_ExcptLstStr ) )
2717                 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWordStart_ExcptLstStr, xSrcStg );
2718 
2719             if (pTmpWordList)
2720             {
2721                 SaveExceptList_Imp( *pTmpWordList, pXMLImplWordStart_ExcptLstStr, xDstStg, true );
2722                 pTmpWordList.reset();
2723             }
2724 
2725 
2726             if (xSrcStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
2727                 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg );
2728 
2729             if (pTmpWordList)
2730             {
2731                 SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true );
2732                 pTmpWordList->clear();
2733             }
2734 
2735             GetAutocorrWordList();
2736             MakeBlocklist_Imp( *xDstStg );
2737             sShareAutoCorrFile = sUserAutoCorrFile;
2738             xDstStg = nullptr;
2739             try
2740             {
2741                 ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2742                 aContent.executeCommand ( u"delete"_ustr, Any ( true ) );
2743             }
2744             catch (...)
2745             {
2746             }
2747         }
2748     }
2749     else if( bCopy && !bError )
2750         sShareAutoCorrFile = sUserAutoCorrFile;
2751 }
2752 
MakeBlocklist_Imp(SotStorage & rStg)2753 bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg )
2754 {
2755     bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty();
2756     if( !bRemove )
2757     {
2758         rtl::Reference<SotStorageStream> refList = rStg.OpenSotStream( pXMLImplAutocorr_ListStr,
2759                     ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2760         if( refList.is() )
2761         {
2762             refList->SetSize( 0 );
2763             refList->SetBufferSize( 8192 );
2764             refList->SetProperty( u"MediaType"_ustr, Any(u"text/xml"_ustr) );
2765 
2766             const uno::Reference< uno::XComponentContext >& xContext =
2767                 comphelper::getProcessComponentContext();
2768 
2769             uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
2770             uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList );
2771             xWriter->setOutputStream(xOut);
2772 
2773             rtl::Reference< SvXMLAutoCorrectExport > xExp( new SvXMLAutoCorrectExport( xContext, pAutocorr_List.get(), pXMLImplAutocorr_ListStr, xWriter ) );
2774 
2775             xExp->exportDoc( XML_BLOCK_LIST );
2776 
2777             refList->Commit();
2778             bRet = ERRCODE_NONE == refList->GetError();
2779             if( bRet )
2780             {
2781                 refList.clear();
2782                 rStg.Commit();
2783                 if( ERRCODE_NONE != rStg.GetError() )
2784                 {
2785                     bRemove = true;
2786                     bRet = false;
2787                 }
2788             }
2789         }
2790         else
2791             bRet = false;
2792     }
2793 
2794     if( bRemove )
2795     {
2796         rStg.Remove( pXMLImplAutocorr_ListStr );
2797         rStg.Commit();
2798     }
2799 
2800     return bRet;
2801 }
2802 
MakeCombinedChanges(std::vector<SvxAutocorrWord> & aNewEntries,std::vector<SvxAutocorrWord> & aDeleteEntries)2803 bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries )
2804 {
2805     // First get the current list!
2806     GetAutocorrWordList();
2807 
2808     MakeUserStorage_Impl();
2809     rtl::Reference<SotStorage> xStorage = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);
2810 
2811     bool bRet = xStorage.is() && ERRCODE_NONE == xStorage->GetError();
2812 
2813     if( bRet )
2814     {
2815         for (SvxAutocorrWord & aWordToDelete : aDeleteEntries)
2816         {
2817             std::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete );
2818             if( xFoundEntry )
2819             {
2820                 if( !xFoundEntry->IsTextOnly() )
2821                 {
2822                     OUString aName( aWordToDelete.GetShort() );
2823                     if (xStorage->IsOLEStorage())
2824                         aName = EncryptBlockName_Imp(aName);
2825                     else
2826                         GeneratePackageName ( aWordToDelete.GetShort(), aName );
2827 
2828                     if( xStorage->IsContained( aName ) )
2829                     {
2830                         xStorage->Remove( aName );
2831                         bRet = xStorage->Commit();
2832                     }
2833                 }
2834             }
2835         }
2836 
2837         for (const SvxAutocorrWord & aNewEntrie : aNewEntries)
2838         {
2839             SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true );
2840             std::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd );
2841             if( xRemoved )
2842             {
2843                 if( !xRemoved->IsTextOnly() )
2844                 {
2845                     // Still have to remove the Storage
2846                     OUString sStorageName( aWordToAdd.GetShort() );
2847                     if (xStorage->IsOLEStorage())
2848                         sStorageName = EncryptBlockName_Imp(sStorageName);
2849                     else
2850                         GeneratePackageName ( aWordToAdd.GetShort(), sStorageName);
2851 
2852                     if( xStorage->IsContained( sStorageName ) )
2853                         xStorage->Remove( sStorageName );
2854                 }
2855             }
2856             bRet = pAutocorr_List->Insert( std::move(aWordToAdd) );
2857 
2858             if ( !bRet )
2859             {
2860                 break;
2861             }
2862         }
2863 
2864         if ( bRet )
2865         {
2866             bRet = MakeBlocklist_Imp( *xStorage );
2867         }
2868     }
2869     return bRet;
2870 }
2871 
PutText(const OUString & rShort,const OUString & rLong)2872 bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong )
2873 {
2874     // First get the current list!
2875     GetAutocorrWordList();
2876 
2877     MakeUserStorage_Impl();
2878     rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);
2879 
2880     bool bRet = xStg.is() && ERRCODE_NONE == xStg->GetError();
2881 
2882     // Update the word list
2883     if( bRet )
2884     {
2885         SvxAutocorrWord aNew(rShort, rLong, true );
2886         std::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew );
2887         if( xRemove )
2888         {
2889             if( !xRemove->IsTextOnly() )
2890             {
2891                 // Still have to remove the Storage
2892                 OUString sStgNm( rShort );
2893                 if (xStg->IsOLEStorage())
2894                     sStgNm = EncryptBlockName_Imp(sStgNm);
2895                 else
2896                     GeneratePackageName ( rShort, sStgNm);
2897 
2898                 if( xStg->IsContained( sStgNm ) )
2899                     xStg->Remove( sStgNm );
2900             }
2901         }
2902 
2903         if( pAutocorr_List->Insert( std::move(aNew) ) )
2904         {
2905             bRet = MakeBlocklist_Imp( *xStg );
2906             xStg = nullptr;
2907         }
2908         else
2909         {
2910             bRet = false;
2911         }
2912     }
2913     return bRet;
2914 }
2915 
PutText(const OUString & rShort,SfxObjectShell & rShell)2916 void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort,
2917                                                SfxObjectShell& rShell )
2918 {
2919     // First get the current list!
2920     GetAutocorrWordList();
2921 
2922     MakeUserStorage_Impl();
2923 
2924     try
2925     {
2926         uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE );
2927         OUString sLong;
2928         bool bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong );
2929         xStg = nullptr;
2930 
2931         // Update the word list
2932         if( bRet )
2933         {
2934             if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) )
2935             {
2936                 rtl::Reference<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2937                 MakeBlocklist_Imp( *xStor );
2938             }
2939         }
2940     }
2941     catch ( const uno::Exception& )
2942     {
2943     }
2944 }
2945 
2946 // Keep the list sorted ...
2947 struct SvxAutocorrWordList::CompareSvxAutocorrWordList
2948 {
operator ()SvxAutocorrWordList::CompareSvxAutocorrWordList2949     bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const
2950     {
2951         CollatorWrapper& rCmp = ::GetCollatorWrapper();
2952         return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0;
2953     }
2954 };
2955 
2956 namespace {
2957 
2958 typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType;
2959 
2960 }
2961 
2962 struct SvxAutocorrWordList::Impl
2963 {
2964 
2965     // only one of these contains the data
2966     // maSortedVector is manually sorted so we can optimise data movement
2967     mutable AutocorrWordSetType maSortedVector;
2968     mutable AutocorrWordHashType maHash; // key is 'Short'
2969 
DeleteAndDestroyAllSvxAutocorrWordList::Impl2970     void DeleteAndDestroyAll()
2971     {
2972         maHash.clear();
2973         maSortedVector.clear();
2974     }
2975 };
2976 
SvxAutocorrWordList()2977 SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {}
2978 
~SvxAutocorrWordList()2979 SvxAutocorrWordList::~SvxAutocorrWordList()
2980 {
2981 }
2982 
DeleteAndDestroyAll()2983 void SvxAutocorrWordList::DeleteAndDestroyAll()
2984 {
2985     mpImpl->DeleteAndDestroyAll();
2986 }
2987 
2988 struct SvxAutocorrWordList::Iterator::Impl {
2989     typedef SvxAutocorrWordList::AutocorrWordSetType::const_iterator VecIterType;
2990     typedef AutocorrWordHashType::const_iterator HashIterType;
2991 
2992     HashIterType  mHashIter, mHashEnd;
2993     VecIterType   mSortedVectorIter, mSortedVectorEnd;
2994 
ImplSvxAutocorrWordList::Iterator::Impl2995     Impl(const HashIterType& hashIter, const HashIterType& hashEnd,
2996          const VecIterType& vecIter, const VecIterType& vecEnd)
2997         : mHashIter(hashIter), mHashEnd(hashEnd),
2998           mSortedVectorIter(vecIter), mSortedVectorEnd(vecEnd) {}
2999 
StepSvxAutocorrWordList::Iterator::Impl3000     bool Step() {
3001         // Iterate hash table, followed by sorted vector
3002         if (mHashIter != mHashEnd) {
3003             return ++mHashIter != mHashEnd
3004                    || mSortedVectorIter != mSortedVectorEnd;
3005         }
3006         return ++mSortedVectorIter != mSortedVectorEnd;
3007     }
3008 
operator *SvxAutocorrWordList::Iterator::Impl3009     const SvxAutocorrWord& operator*() {
3010         return (mHashIter == mHashEnd) ? *mSortedVectorIter : mHashIter->second;
3011     }
operator ->SvxAutocorrWordList::Iterator::Impl3012     const SvxAutocorrWord* operator->() {
3013         return (mHashIter == mHashEnd) ? &*mSortedVectorIter : &mHashIter->second;
3014     }
3015 };
3016 
Iterator(std::unique_ptr<SvxAutocorrWordList::Iterator::Impl> pImpl)3017 SvxAutocorrWordList::Iterator::Iterator(
3018     std::unique_ptr<SvxAutocorrWordList::Iterator::Impl> pImpl
3019 ) : mpImpl(std::move(pImpl))
3020 {
3021 }
3022 
Iterator(const SvxAutocorrWordList::Iterator & it)3023 SvxAutocorrWordList::Iterator::Iterator(
3024     const SvxAutocorrWordList::Iterator& it
3025 ) : mpImpl(new Impl(*(it.mpImpl)))
3026 {
3027 }
3028 
~Iterator()3029 SvxAutocorrWordList::Iterator::~Iterator()
3030 {
3031 }
3032 
Step()3033 bool SvxAutocorrWordList::Iterator::Step()
3034 {
3035     return mpImpl->Step();
3036 }
3037 
operator *() const3038 const SvxAutocorrWord& SvxAutocorrWordList::Iterator::operator*() const
3039 {
3040     return **mpImpl;
3041 }
3042 
operator ->() const3043 const SvxAutocorrWord* SvxAutocorrWordList::Iterator::operator->() const
3044 {
3045     return mpImpl->operator->();
3046 }
3047 
ContainsPattern(const OUString & aShort) const3048 bool SvxAutocorrWordList::ContainsPattern(const OUString& aShort) const
3049 {
3050     // check hash table first
3051     if (mpImpl->maHash.contains(aShort)) {
3052         return true;
3053     }
3054 
3055     // then do binary search on sorted vector
3056     CollatorWrapper& rCmp = ::GetCollatorWrapper();
3057     auto it = std::lower_bound(mpImpl->maSortedVector.begin(),
3058                                mpImpl->maSortedVector.end(),
3059                                aShort,
3060                                [&](const SvxAutocorrWord& elm,
3061                                    const OUString& val) {
3062                                        return rCmp.compareString(elm.GetShort(),
3063                                                                  val) < 0;
3064                                    } );
3065     if (it != mpImpl->maSortedVector.end()
3066         && rCmp.compareString(aShort, it->GetShort()) == 0)
3067     {
3068         return true;
3069     }
3070 
3071     return false;
3072 }
3073 
3074 // returns true if inserted
Insert(SvxAutocorrWord aWord) const3075 const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const
3076 {
3077     if ( mpImpl->maSortedVector.empty() ) // use the hash
3078     {
3079         OUString aShort = aWord.GetShort();
3080         auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) );
3081         if (inserted)
3082             return &(it->second);
3083         return nullptr;
3084     }
3085     else
3086     {
3087         auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList());
3088         CollatorWrapper& rCmp = ::GetCollatorWrapper();
3089         if (it == mpImpl->maSortedVector.end() || rCmp.compareString( aWord.GetShort(), it->GetShort() ) != 0)
3090         {
3091             it = mpImpl->maSortedVector.insert(it, std::move(aWord));
3092             return &*it;
3093         }
3094         return nullptr;
3095     }
3096 }
3097 
LoadEntry(const OUString & sWrong,const OUString & sRight,bool bOnlyTxt)3098 void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt)
3099 {
3100     (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt ));
3101 }
3102 
empty() const3103 bool SvxAutocorrWordList::empty() const
3104 {
3105     return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty();
3106 }
3107 
FindAndRemove(const SvxAutocorrWord * pWord)3108 std::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord *pWord)
3109 {
3110 
3111     if ( mpImpl->maSortedVector.empty() ) // use the hash
3112     {
3113         AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() );
3114         if( it != mpImpl->maHash.end() )
3115         {
3116             SvxAutocorrWord pMatch = std::move(it->second);
3117             mpImpl->maHash.erase (it);
3118             return pMatch;
3119         }
3120     }
3121     else
3122     {
3123         auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList());
3124         if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it))
3125         {
3126             SvxAutocorrWord pMatch = std::move(*it);
3127             mpImpl->maSortedVector.erase (it);
3128             return pMatch;
3129         }
3130     }
3131     return std::optional<SvxAutocorrWord>();
3132 }
3133 
3134 // return the sorted contents - defer sorting until we have to.
getSortedContent() const3135 const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const
3136 {
3137     // convert from hash to set permanently
3138     if ( mpImpl->maSortedVector.empty() )
3139     {
3140         std::vector<SvxAutocorrWord> tmp;
3141         tmp.reserve(mpImpl->maHash.size());
3142         for (auto & rPair : mpImpl->maHash)
3143             tmp.emplace_back(std::move(rPair.second));
3144         mpImpl->maHash.clear();
3145         // sort twice - this gets the list into mostly-sorted order, which
3146         // reduces the number of times we need to invoke the expensive ICU collate fn.
3147         std::sort(tmp.begin(), tmp.end(),
3148             [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs )
3149             {
3150                 return lhs.GetShort() < rhs.GetShort();
3151             });
3152         // This beast has some O(N log(N)) in a terribly slow ICU collate fn.
3153         // stable_sort is twice as fast as sort in this situation because it does
3154         // fewer comparison operations.
3155         std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList());
3156         mpImpl->maSortedVector = std::move(tmp);
3157     }
3158     return mpImpl->maSortedVector;
3159 }
3160 
3161 std::optional<SvxAutocorrWord>
WordMatches(const SvxAutocorrWord * pFnd,std::u16string_view rTxt,sal_Int32 & rStt,sal_Int32 nEndPos) const3162 SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd,
3163                                  std::u16string_view rTxt,
3164                                  sal_Int32 &rStt,
3165                                  sal_Int32 nEndPos) const
3166 {
3167     const OUString& rChk = pFnd->GetShort();
3168 
3169     sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern?
3170     sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern?
3171     assert(nEndPos >= 0);
3172     size_t nSttWdPos = nEndPos;
3173 
3174     // direct replacement of keywords surrounded by colons (for example, ":name:")
3175     bool bColonNameColon = static_cast<sal_Int32>(rTxt.size()) > nEndPos &&
3176         rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":");
3177     if ( nEndPos + (bColonNameColon ? 1 : 0) < rChk.getLength() - left_wildcard - right_wildcard )
3178     {
3179         return std::nullopt;
3180     }
3181 
3182     bool bWasWordDelim = false;
3183     sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard;
3184     if (bColonNameColon)
3185         nCalcStt++;
3186     if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon ||
3187           ( nCalcStt < rStt &&
3188             IsWordDelim( rTxt[ nCalcStt - 1 ] ))) )
3189     {
3190         TransliterationWrapper& rCmp = GetIgnoreTranslWrapper();
3191         OUString sWord( rTxt.substr(nCalcStt, rChk.getLength() - left_wildcard) );
3192         if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) ))
3193         {
3194             rStt = nCalcStt;
3195             if (!left_wildcard)
3196             {
3197                 // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14
3198                 if (static_cast<sal_Int32>(rTxt.size()) > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1)
3199                 {
3200                     return std::nullopt;
3201                 }
3202                 return *pFnd;
3203             }
3204             // get the first word delimiter position before the matching ".*word" pattern
3205             while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ])))
3206                 ;
3207             if (bWasWordDelim) rStt++;
3208 
3209             // don't let wildcard pattern override non-wildcard one
3210             OUString aShort(rTxt.substr(rStt, nEndPos - rStt));
3211             if (ContainsPattern(aShort)) {
3212                 return std::nullopt;
3213             }
3214 
3215             OUString left_pattern( rTxt.substr(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard) );
3216             // avoid double spaces before simple "word" replacement
3217             left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().subView(1) : pFnd->GetLong();
3218             return SvxAutocorrWord(aShort, left_pattern);
3219         }
3220     } else
3221     // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support
3222     if ( right_wildcard )
3223     {
3224 
3225         OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) );
3226         // Get the last word delimiter position
3227         bool not_suffix;
3228 
3229         while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ])))
3230             ;
3231         // search the first occurrence (with a left word delimitation, if needed)
3232         size_t nFndPos = rStt - 1;
3233         do {
3234             nFndPos = rTxt.find( sTmp, nFndPos + 1);
3235             if (nFndPos == std::u16string_view::npos)
3236                 break;
3237             not_suffix = bWasWordDelim && (nSttWdPos >= (nFndPos + sTmp.getLength()));
3238         } while ( (!left_wildcard && nFndPos && !IsWordDelim( rTxt[ nFndPos - 1 ])) || not_suffix );
3239 
3240         if ( nFndPos != std::u16string_view::npos )
3241         {
3242             sal_Int32 extra_repl = static_cast<sal_Int32>(nFndPos) + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:"
3243 
3244             if ( left_wildcard )
3245             {
3246                 // get the first word delimiter position before the matching ".*word.*" pattern
3247                 while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ])))
3248                     ;
3249                 if (bWasWordDelim) nFndPos++;
3250             }
3251             if (nEndPos + extra_repl <= static_cast<sal_Int32>(nFndPos))
3252             {
3253                 return std::nullopt;
3254             }
3255             // return matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations"
3256             OUString aShort( rTxt.substr(nFndPos, nEndPos - nFndPos + extra_repl) );
3257             // don't let wildcard pattern override non-wildcard one
3258             if (ContainsPattern(aShort)) {
3259                 return std::nullopt;
3260             }
3261 
3262             OUString aLong;
3263             rStt = nFndPos;
3264             if ( !left_wildcard )
3265             {
3266                 sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength();
3267                 aLong = pFnd->GetLong() + (siz > 0 ? rTxt.substr(nFndPos + sTmp.getLength(), siz) : u"");
3268             } else {
3269                 OUStringBuffer buf;
3270                 do {
3271                     nSttWdPos = rTxt.find( sTmp, nFndPos);
3272                     if (nSttWdPos != std::u16string_view::npos)
3273                     {
3274                         sal_Int32 nTmp(nFndPos);
3275                         while (nTmp < static_cast<sal_Int32>(nSttWdPos) && !IsWordDelim(rTxt[nTmp]))
3276                         {
3277                             nTmp++;
3278                         }
3279                         if (nTmp < static_cast<sal_Int32>(nSttWdPos)) {
3280                             break; // word delimiter found
3281                         }
3282                         buf.append(rTxt.substr(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong());
3283                         nFndPos = nSttWdPos + sTmp.getLength();
3284                     }
3285                 } while (nSttWdPos != std::u16string_view::npos);
3286                 if (static_cast<sal_Int32>(nEndPos - nFndPos) > extra_repl) {
3287                     buf.append(rTxt.substr(nFndPos, nEndPos - nFndPos));
3288                 }
3289                 aLong = buf.makeStringAndClear();
3290             }
3291             if ( (static_cast<sal_Int32>(rTxt.size()) > nEndPos && IsWordDelim(rTxt[nEndPos])) || static_cast<sal_Int32>(rTxt.size()) == nEndPos )
3292             {
3293                 return SvxAutocorrWord(aShort, aLong);
3294             }
3295         }
3296     }
3297     return std::nullopt;
3298 }
3299 
3300 std::optional<SvxAutocorrWordList::WordSearchStatus>
SearchWordsInList(std::u16string_view rTxt,sal_Int32 & rStt,sal_Int32 nEndPos) const3301 SvxAutocorrWordList::SearchWordsInList(std::u16string_view rTxt,
3302                                        sal_Int32& rStt,
3303                                        sal_Int32 nEndPos) const
3304 {
3305     for (auto it = mpImpl->maHash.begin(); it != mpImpl->maHash.end(); ++it)
3306     {
3307         if(auto pTmp = WordMatches(&it->second, rTxt, rStt, nEndPos))
3308         {
3309             return WordSearchStatus(
3310                 *pTmp, this,
3311                 Iterator(std::make_unique<Iterator::Impl>(
3312                     it, mpImpl->maHash.end(),
3313                     mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end()
3314                 ))
3315             );
3316         }
3317     }
3318 
3319     for (auto it = mpImpl->maSortedVector.begin();
3320          it != mpImpl->maSortedVector.end(); ++it)
3321     {
3322         if(auto pTmp = WordMatches(&*it, rTxt, rStt, nEndPos))
3323         {
3324             return WordSearchStatus(
3325                 *pTmp, this,
3326                 Iterator(std::make_unique<Iterator::Impl>(
3327                     mpImpl->maHash.end(), mpImpl->maHash.end(),
3328                     it, mpImpl->maSortedVector.end()
3329                 ))
3330             );
3331         }
3332     }
3333 
3334     return std::nullopt;
3335 }
3336 
3337 bool
SearchWordsNext(std::u16string_view rTxt,sal_Int32 & rStt,sal_Int32 nEndPos,SvxAutocorrWordList::WordSearchStatus & rStatus) const3338 SvxAutocorrWordList::SearchWordsNext(std::u16string_view rTxt,
3339                                      sal_Int32& rStt,
3340                                      sal_Int32 nEndPos,
3341                                      SvxAutocorrWordList::WordSearchStatus& rStatus) const
3342 {
3343     while(rStatus.StepIter())
3344     {
3345         if(auto pTmp = WordMatches(rStatus.GetWordAtIter(),
3346                                    rTxt, rStt, nEndPos))
3347         {
3348             rStatus.mFnd = *pTmp;
3349             return true;
3350         }
3351     }
3352 
3353     return false;
3354 }
3355 
3356 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
3357