xref: /core/editeng/source/misc/svxacorr.cxx (revision 94306083)
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <memory>
21 #include <string_view>
22 #include <sal/config.h>
23 
24 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
25 #include <com/sun/star/embed/XStorage.hpp>
26 #include <com/sun/star/io/IOException.hpp>
27 #include <com/sun/star/io/XStream.hpp>
28 #include <tools/urlobj.hxx>
29 #include <i18nlangtag/mslangid.hxx>
30 #include <i18nutil/transliteration.hxx>
31 #include <sal/log.hxx>
32 #include <osl/diagnose.h>
33 #include <vcl/svapp.hxx>
34 #include <vcl/settings.hxx>
35 #include <svl/fstathelper.hxx>
36 #include <svl/urihelper.hxx>
37 #include <unotools/charclass.hxx>
38 #include <com/sun/star/i18n/UnicodeType.hpp>
39 #include <unotools/collatorwrapper.hxx>
40 #include <com/sun/star/i18n/UnicodeScript.hpp>
41 #include <com/sun/star/i18n/OrdinalSuffix.hpp>
42 #include <unotools/localedatawrapper.hxx>
43 #include <unotools/transliterationwrapper.hxx>
44 #include <comphelper/processfactory.hxx>
45 #include <comphelper/storagehelper.hxx>
46 #include <comphelper/string.hxx>
47 #include <editeng/editids.hrc>
48 #include <sot/storage.hxx>
49 #include <editeng/udlnitem.hxx>
50 #include <editeng/wghtitem.hxx>
51 #include <editeng/postitem.hxx>
52 #include <editeng/crossedoutitem.hxx>
53 #include <editeng/escapementitem.hxx>
54 #include <editeng/svxacorr.hxx>
55 #include <editeng/unolingu.hxx>
56 #include <vcl/window.hxx>
57 #include <com/sun/star/xml/sax/InputSource.hpp>
58 #include <com/sun/star/xml/sax/FastParser.hpp>
59 #include <com/sun/star/xml/sax/Writer.hpp>
60 #include <com/sun/star/xml/sax/SAXParseException.hpp>
61 #include <unotools/streamwrap.hxx>
62 #include "SvXMLAutoCorrectImport.hxx"
63 #include "SvXMLAutoCorrectExport.hxx"
64 #include "SvXMLAutoCorrectTokenHandler.hxx"
65 #include <ucbhelper/content.hxx>
66 #include <com/sun/star/ucb/ContentCreationException.hpp>
67 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
68 #include <com/sun/star/ucb/TransferInfo.hpp>
69 #include <com/sun/star/ucb/NameClash.hpp>
70 #include <tools/diagnose_ex.h>
71 #include <xmloff/xmltoken.hxx>
72 #include <unordered_map>
73 #include <rtl/character.hxx>
74 
75 using namespace ::com::sun::star::ucb;
76 using namespace ::com::sun::star::uno;
77 using namespace ::com::sun::star::xml::sax;
78 using namespace ::com::sun::star;
79 using namespace ::xmloff::token;
80 using namespace ::utl;
81 
82 namespace {
83 
84 enum class Flags {
85     NONE            = 0x00,
86     FullStop        = 0x01,
87     ExclamationMark = 0x02,
88     QuestionMark    = 0x04,
89 };
90 
91 }
92 
93 namespace o3tl {
94     template<> struct typed_flags<Flags> : is_typed_flags<Flags, 0x07> {};
95 }
96 const sal_Unicode cNonBreakingSpace = 0xA0; // UNICODE code for no break space
97 
98 constexpr OUStringLiteral pXMLImplWrdStt_ExcptLstStr = u"WordExceptList.xml";
99 constexpr OUStringLiteral pXMLImplCplStt_ExcptLstStr = u"SentenceExceptList.xml";
100 constexpr OUStringLiteral pXMLImplAutocorr_ListStr = u"DocumentList.xml";
101 
102 const char
103     /* also at these beginnings - Brackets and all kinds of begin characters */
104     sImplSttSkipChars[] = "\"\'([{\x83\x84\x89\x91\x92\x93\x94",
105     /* also at these ends - Brackets and all kinds of begin characters */
106     sImplEndSkipChars[] = "\"\')]}\x83\x84\x89\x91\x92\x93\x94";
107 
108 static OUString EncryptBlockName_Imp(const OUString& rName);
109 
110 static bool NonFieldWordDelim( const sal_Unicode c )
111 {
112     return ' ' == c || '\t' == c || 0x0a == c ||
113             cNonBreakingSpace == c || 0x2011 == c;
114 }
115 
116 static bool IsWordDelim( const sal_Unicode c )
117 {
118     return c == 0x1 || NonFieldWordDelim(c);
119 }
120 
121 
122 static bool IsLowerLetter( sal_Int32 nCharType )
123 {
124     return CharClass::isLetterType( nCharType ) &&
125            ( css::i18n::KCharacterType::LOWER & nCharType);
126 }
127 
128 static bool IsUpperLetter( sal_Int32 nCharType )
129 {
130     return CharClass::isLetterType( nCharType ) &&
131             ( css::i18n::KCharacterType::UPPER & nCharType);
132 }
133 
134 static bool lcl_IsUnsupportedUnicodeChar( CharClass const & rCC, const OUString& rTxt,
135                                    sal_Int32 nStt, sal_Int32 nEnd )
136 {
137     for( ; nStt < nEnd; ++nStt )
138     {
139         css::i18n::UnicodeScript nScript = rCC.getScript( rTxt, nStt );
140         switch( nScript )
141         {
142             case css::i18n::UnicodeScript_kCJKRadicalsSupplement:
143             case css::i18n::UnicodeScript_kHangulJamo:
144             case css::i18n::UnicodeScript_kCJKSymbolPunctuation:
145             case css::i18n::UnicodeScript_kHiragana:
146             case css::i18n::UnicodeScript_kKatakana:
147             case css::i18n::UnicodeScript_kHangulCompatibilityJamo:
148             case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth:
149             case css::i18n::UnicodeScript_kCJKCompatibility:
150             case css::i18n::UnicodeScript_k_CJKUnifiedIdeographsExtensionA:
151             case css::i18n::UnicodeScript_kCJKUnifiedIdeograph:
152             case css::i18n::UnicodeScript_kHangulSyllable:
153             case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph:
154             case css::i18n::UnicodeScript_kHalfwidthFullwidthForm:
155                 return true;
156             default: ; //do nothing
157         }
158     }
159     return false;
160 }
161 
162 static bool lcl_IsSymbolChar( CharClass const & rCC, const OUString& rTxt,
163                                   sal_Int32 nStt, sal_Int32 nEnd )
164 {
165     for( ; nStt < nEnd; ++nStt )
166     {
167         if( css::i18n::UnicodeType::PRIVATE_USE == rCC.getType( rTxt, nStt ))
168             return true;
169     }
170     return false;
171 }
172 
173 static bool lcl_IsInAsciiArr( const char* pArr, const sal_Unicode c )
174 {
175     // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks
176     if ( 0x2018 <= c && c <= 0x201F && (pArr == sImplSttSkipChars || pArr == sImplEndSkipChars) )
177         return true;
178 
179     bool bRet = false;
180     for( ; *pArr; ++pArr )
181         if( *pArr == c )
182         {
183             bRet = true;
184             break;
185         }
186     return bRet;
187 }
188 
189 SvxAutoCorrDoc::~SvxAutoCorrDoc()
190 {
191 }
192 
193 // Called by the functions:
194 //  - FnCapitalStartWord
195 //  - FnCapitalStartSentence
196 // after the exchange of characters. Then the words, if necessary, can be inserted
197 // into the exception list.
198 void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&,
199                                         sal_Unicode )
200 {
201 }
202 
203 LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32 ) const
204 {
205     return LANGUAGE_SYSTEM;
206 }
207 
208 static const LanguageTag& GetAppLang()
209 {
210     return Application::GetSettings().GetLanguageTag();
211 }
212 
213 /// Never use an unresolved LANGUAGE_SYSTEM.
214 static LanguageType GetDocLanguage( const SvxAutoCorrDoc& rDoc, sal_Int32 nPos )
215 {
216     LanguageType eLang = rDoc.GetLanguage( nPos );
217     if (eLang == LANGUAGE_SYSTEM)
218         eLang = GetAppLang().getLanguageType();     // the current work locale
219     return eLang;
220 }
221 
222 static LocaleDataWrapper& GetLocaleDataWrapper( LanguageType nLang )
223 {
224     static LocaleDataWrapper aLclDtWrp( GetAppLang() );
225     LanguageTag aLcl( nLang );
226     const LanguageTag& rLcl = aLclDtWrp.getLoadedLanguageTag();
227     if( aLcl != rLcl )
228         aLclDtWrp.setLanguageTag( aLcl );
229     return aLclDtWrp;
230 }
231 static TransliterationWrapper& GetIgnoreTranslWrapper()
232 {
233     static int bIsInit = 0;
234     static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(),
235                 TransliterationFlags::IGNORE_KANA |
236                 TransliterationFlags::IGNORE_WIDTH );
237     if( !bIsInit )
238     {
239         aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() );
240         bIsInit = 1;
241     }
242     return aWrp;
243 }
244 static CollatorWrapper& GetCollatorWrapper()
245 {
246     static CollatorWrapper aCollWrp = [&]()
247     {
248         CollatorWrapper tmp( ::comphelper::getProcessComponentContext() );
249         tmp.loadDefaultCollator( GetAppLang().getLocale(), 0 );
250         return tmp;
251     }();
252     return aCollWrp;
253 }
254 
255 bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar )
256 {
257     return  cChar == '\0' || cChar == '\t' || cChar == 0x0a ||
258             cChar == ' '  || cChar == '\'' || cChar == '\"' ||
259             cChar == '*'  || cChar == '_'  || cChar == '%' ||
260             cChar == '.'  || cChar == ','  || cChar == ';' ||
261             cChar == ':'  || cChar == '?' || cChar == '!' ||
262             cChar == '<'  || cChar == '>' ||
263             cChar == '/'  || cChar == '-';
264 }
265 
266 namespace
267 {
268     bool IsCompoundWordDelimChar(sal_Unicode cChar)
269     {
270         return  cChar == '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar);
271     }
272 }
273 
274 bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar )
275 {
276     return cChar == '%' || cChar == ';' || cChar == ':'  || cChar == '?' || cChar == '!' ||
277         cChar == '/' /*case for the urls exception*/;
278 }
279 
280 ACFlags SvxAutoCorrect::GetDefaultFlags()
281 {
282     ACFlags nRet = ACFlags::Autocorrect
283                     | ACFlags::CapitalStartSentence
284                     | ACFlags::CapitalStartWord
285                     | ACFlags::ChgOrdinalNumber
286                     | ACFlags::ChgToEnEmDash
287                     | ACFlags::AddNonBrkSpace
288                     | ACFlags::TransliterateRTL
289                     | ACFlags::ChgAngleQuotes
290                     | ACFlags::ChgWeightUnderl
291                     | ACFlags::SetINetAttr
292                     | ACFlags::ChgQuotes
293                     | ACFlags::SaveWordCplSttLst
294                     | ACFlags::SaveWordWrdSttLst
295                     | ACFlags::CorrectCapsLock;
296     LanguageType eLang = GetAppLang().getLanguageType();
297     if( eLang.anyOf(
298         LANGUAGE_ENGLISH,
299         LANGUAGE_ENGLISH_US,
300         LANGUAGE_ENGLISH_UK,
301         LANGUAGE_ENGLISH_AUS,
302         LANGUAGE_ENGLISH_CAN,
303         LANGUAGE_ENGLISH_NZ,
304         LANGUAGE_ENGLISH_EIRE,
305         LANGUAGE_ENGLISH_SAFRICA,
306         LANGUAGE_ENGLISH_JAMAICA,
307         LANGUAGE_ENGLISH_CARIBBEAN))
308         nRet &= ~ACFlags(ACFlags::ChgQuotes|ACFlags::ChgSglQuotes);
309     return nRet;
310 }
311 
312 constexpr sal_Unicode cEmDash = 0x2014;
313 constexpr sal_Unicode cEnDash = 0x2013;
314 constexpr sal_Unicode cApostrophe = 0x2019;
315 constexpr sal_Unicode cLeftDoubleAngleQuote = 0xAB;
316 constexpr sal_Unicode cRightDoubleAngleQuote = 0xBB;
317 constexpr sal_Unicode cLeftSingleAngleQuote = 0x2039;
318 constexpr sal_Unicode cRightSingleAngleQuote = 0x203A;
319 // stop characters for searching preceding quotes
320 // (the first character is also the opening quote we are looking for)
321 const sal_Unicode aStopDoubleAngleQuoteStart[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,,
322 const sal_Unicode aStopDoubleAngleQuoteEnd[] = { cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0x201D, 0x201E, 0 }; // preceding >>
323 // preceding << for Romanian, handle also alternative primary closing quotation mark U+201C
324 const sal_Unicode aStopDoubleAngleQuoteEndRo[] = { cLeftDoubleAngleQuote, cRightDoubleAngleQuote, 0x201D, 0x201E, 0x201C, 0 };
325 const sal_Unicode aStopSingleQuoteEnd[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 };
326 const sal_Unicode aStopSingleQuoteEndRuUa[] = { 0x201E, 0x201C, cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0 };
327 
328 SvxAutoCorrect::SvxAutoCorrect( const OUString& rShareAutocorrFile,
329                                 const OUString& rUserAutocorrFile )
330     : sShareAutoCorrFile( rShareAutocorrFile )
331     , sUserAutoCorrFile( rUserAutocorrFile )
332     , eCharClassLang( LANGUAGE_DONTKNOW )
333     , nFlags(SvxAutoCorrect::GetDefaultFlags())
334     , cStartDQuote( 0 )
335     , cEndDQuote( 0 )
336     , cStartSQuote( 0 )
337     , cEndSQuote( 0 )
338 {
339 }
340 
341 SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy )
342     : sShareAutoCorrFile( rCpy.sShareAutoCorrFile )
343     , sUserAutoCorrFile( rCpy.sUserAutoCorrFile )
344     , aSwFlags( rCpy.aSwFlags )
345     , eCharClassLang(rCpy.eCharClassLang)
346     , nFlags( rCpy.nFlags & ~ACFlags(ACFlags::ChgWordLstLoad|ACFlags::CplSttLstLoad|ACFlags::WrdSttLstLoad))
347     , cStartDQuote( rCpy.cStartDQuote )
348     , cEndDQuote( rCpy.cEndDQuote )
349     , cStartSQuote( rCpy.cStartSQuote )
350     , cEndSQuote( rCpy.cEndSQuote )
351 {
352 }
353 
354 
355 SvxAutoCorrect::~SvxAutoCorrect()
356 {
357 }
358 
359 void SvxAutoCorrect::GetCharClass_( LanguageType eLang )
360 {
361     pCharClass.reset( new CharClass( LanguageTag( eLang)) );
362     eCharClassLang = eLang;
363 }
364 
365 void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag, bool bOn )
366 {
367     ACFlags nOld = nFlags;
368     nFlags = bOn ? nFlags | nFlag
369                  : nFlags & ~nFlag;
370 
371     if( !bOn )
372     {
373         if( (nOld & ACFlags::CapitalStartSentence) != (nFlags & ACFlags::CapitalStartSentence) )
374             nFlags &= ~ACFlags::CplSttLstLoad;
375         if( (nOld & ACFlags::CapitalStartWord) != (nFlags & ACFlags::CapitalStartWord) )
376             nFlags &= ~ACFlags::WrdSttLstLoad;
377         if( (nOld & ACFlags::Autocorrect) != (nFlags & ACFlags::Autocorrect) )
378             nFlags &= ~ACFlags::ChgWordLstLoad;
379     }
380 }
381 
382 
383 // Correct TWo INitial CApitals
384 void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
385                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
386                                     LanguageType eLang )
387 {
388     CharClass& rCC = GetCharClass( eLang );
389 
390     // Delete all non alphanumeric. Test the characters at the beginning/end of
391     // the word ( recognizes: "(min.", "/min.", and so on.)
392     for( ; nSttPos < nEndPos; ++nSttPos )
393         if( rCC.isLetterNumeric( rTxt, nSttPos ))
394             break;
395     for( ; nSttPos < nEndPos; --nEndPos )
396         if( rCC.isLetterNumeric( rTxt, nEndPos - 1 ))
397             break;
398 
399     // Is the word a compounded word separated by delimiters?
400     // If so, keep track of all delimiters so each constituent
401     // word can be checked for two initial capital letters.
402     std::deque<sal_Int32> aDelimiters;
403 
404     // Always check for two capitals at the beginning
405     // of the entire word, so start at nSttPos.
406     aDelimiters.push_back(nSttPos);
407 
408     // Find all compound word delimiters
409     for (sal_Int32 n = nSttPos; n < nEndPos; ++n)
410     {
411         if (IsCompoundWordDelimChar(rTxt[ n ]))
412         {
413             aDelimiters.push_back( n + 1 ); // Get position of char after delimiter
414         }
415     }
416 
417     // Decide where to put the terminating delimiter.
418     // If the last AutoCorrect char was a newline, then the AutoCorrect
419     // char will not be included in rTxt.
420     // If the last AutoCorrect char was not a newline, then the AutoCorrect
421     // character will be the last character in rTxt.
422     if (!IsCompoundWordDelimChar(rTxt[nEndPos-1]))
423         aDelimiters.push_back(nEndPos);
424 
425     // Iterate through the word and all words that compose it.
426     // Two capital letters at the beginning of word?
427     for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI)
428     {
429         nSttPos = aDelimiters[nI];
430         nEndPos = aDelimiters[nI + 1];
431 
432         if( nSttPos+2 < nEndPos &&
433             IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) &&
434             IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) &&
435             // Is the third character a lower case
436             IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) &&
437             // Do not replace special attributes
438             0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ])
439         {
440             // test if the word is in an exception list
441             OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 ));
442             if( !FindInWrdSttExceptList(eLang, sWord) )
443             {
444                 // Check that word isn't correctly spelt before correcting:
445                 css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller =
446                     LinguMgr::GetSpellChecker();
447                 if( xSpeller->hasLanguage(static_cast<sal_uInt16>(eLang)) )
448                 {
449                     Sequence< css::beans::PropertyValue > aEmptySeq;
450                     if (xSpeller->isValid(sWord, static_cast<sal_uInt16>(eLang), aEmptySeq))
451                     {
452                         return;
453                     }
454                 }
455                 sal_Unicode cSave = rTxt[ nSttPos ];
456                 OUString sChar = rCC.lowercase( OUString(cSave) );
457                 if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ))
458                 {
459                     if( ACFlags::SaveWordWrdSttLst & nFlags )
460                         rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave );
461                 }
462             }
463         }
464     }
465 }
466 
467 // Format ordinal numbers suffixes (1st -> 1^st)
468 bool SvxAutoCorrect::FnChgOrdinalNumber(
469     SvxAutoCorrDoc& rDoc, const OUString& rTxt,
470     sal_Int32 nSttPos, sal_Int32 nEndPos,
471     LanguageType eLang)
472 {
473     // 1st, 2nd, 3rd, 4 - 0th
474     // 201th or 201st
475     // 12th or 12nd
476     bool bChg = false;
477 
478     // In some languages ordinal suffixes should never be
479     // changed to superscript. Let's break for those languages.
480     if (!eLang.anyOf(
481          LANGUAGE_SWEDISH,
482          LANGUAGE_SWEDISH_FINLAND))
483     {
484         CharClass& rCC = GetCharClass(eLang);
485 
486         for (; nSttPos < nEndPos; ++nSttPos)
487             if (!lcl_IsInAsciiArr(sImplSttSkipChars, rTxt[nSttPos]))
488                 break;
489         for (; nSttPos < nEndPos; --nEndPos)
490             if (!lcl_IsInAsciiArr(sImplEndSkipChars, rTxt[nEndPos - 1]))
491                 break;
492 
493 
494         // Get the last number in the string to check
495         sal_Int32 nNumEnd = nEndPos;
496         bool bFoundEnd = false;
497         bool isValidNumber = true;
498         sal_Int32 i = nEndPos;
499         while (i > nSttPos)
500         {
501             i--;
502             bool isDigit = rCC.isDigit(rTxt, i);
503             if (bFoundEnd)
504                 isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i));
505 
506             if (isDigit && !bFoundEnd)
507             {
508                 bFoundEnd = true;
509                 nNumEnd = i;
510             }
511         }
512 
513         if (bFoundEnd && isValidNumber) {
514             sal_Int32 nNum = rTxt.copy(nSttPos, nNumEnd - nSttPos + 1).toInt32();
515 
516             // Check if the characters after that number correspond to the ordinal suffix
517             uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix
518                 = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext());
519 
520             const uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale());
521             for (OUString const & sSuffix : aSuffixes)
522             {
523                 OUString sEnd = rTxt.copy(nNumEnd + 1, nEndPos - nNumEnd - 1);
524 
525                 if (sSuffix == sEnd)
526                 {
527                     // Check if the ordinal suffix has to be set as super script
528                     if (rCC.isLetter(sSuffix))
529                     {
530                         // Do the change
531                         SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER,
532                             DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT);
533                         rDoc.SetAttr(nNumEnd + 1, nEndPos,
534                             SID_ATTR_CHAR_ESCAPEMENT,
535                             aSvxEscapementItem);
536                         bChg = true;
537                     }
538                 }
539             }
540         }
541     }
542     return bChg;
543 }
544 
545 // Replace dashes
546 bool SvxAutoCorrect::FnChgToEnEmDash(
547                                 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
548                                 sal_Int32 nSttPos, sal_Int32 nEndPos,
549                                 LanguageType eLang )
550 {
551     bool bRet = false;
552     CharClass& rCC = GetCharClass( eLang );
553     if (eLang == LANGUAGE_SYSTEM)
554         eLang = GetAppLang().getLanguageType();
555     bool bAlwaysUseEmDash = (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN);
556 
557     // replace " - " or " --" with "enDash"
558     if( 1 < nSttPos && 1 <= nEndPos - nSttPos )
559     {
560         sal_Unicode cCh = rTxt[ nSttPos ];
561         if( '-' == cCh )
562         {
563             if( 1 < nEndPos - nSttPos &&
564                 ' ' == rTxt[ nSttPos-1 ] &&
565                 '-' == rTxt[ nSttPos+1 ])
566             {
567                 sal_Int32 n;
568                 for( n = nSttPos+2; n < nEndPos && lcl_IsInAsciiArr(
569                             sImplSttSkipChars,(cCh = rTxt[ n ]));
570                         ++n )
571                     ;
572 
573                 // found: " --[<AnySttChars>][A-z0-9]
574                 if( rCC.isLetterNumeric( OUString(cCh) ) )
575                 {
576                     for( n = nSttPos-1; n && lcl_IsInAsciiArr(
577                             sImplEndSkipChars,(cCh = rTxt[ --n ])); )
578                         ;
579 
580                     // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9]
581                     if( rCC.isLetterNumeric( OUString(cCh) ))
582                     {
583                         rDoc.Delete( nSttPos, nSttPos + 2 );
584                         rDoc.Insert( nSttPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) );
585                         bRet = true;
586                     }
587                 }
588             }
589         }
590         else if( 3 < nSttPos &&
591                  ' ' == rTxt[ nSttPos-1 ] &&
592                  '-' == rTxt[ nSttPos-2 ])
593         {
594             sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2;
595             if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) )
596             {
597                 --nTmpPos;
598                 ++nLen;
599                 cCh = rTxt[ nTmpPos-1 ];
600             }
601             if( ' ' == cCh )
602             {
603                 for( n = nSttPos; n < nEndPos && lcl_IsInAsciiArr(
604                             sImplSttSkipChars,(cCh = rTxt[ n ]));
605                         ++n )
606                     ;
607 
608                 // found: " - [<AnySttChars>][A-z0-9]
609                 if( rCC.isLetterNumeric( OUString(cCh) ) )
610                 {
611                     cCh = ' ';
612                     for( n = nTmpPos-1; n && lcl_IsInAsciiArr(
613                             sImplEndSkipChars,(cCh = rTxt[ --n ])); )
614                             ;
615                     // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9]
616                     if( rCC.isLetterNumeric( OUString(cCh) ))
617                     {
618                         rDoc.Delete( nTmpPos, nTmpPos + nLen );
619                         rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) );
620                         bRet = true;
621                     }
622                 }
623             }
624         }
625     }
626 
627     // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash"
628     // [0-9]--[0-9] double dash always replaced with "enDash"
629     // Finnish and Hungarian use enDash instead of emDash.
630     bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH);
631     if( 4 <= nEndPos - nSttPos )
632     {
633         OUString sTmp( rTxt.copy( nSttPos, nEndPos - nSttPos ) );
634         sal_Int32 nFndPos = sTmp.indexOf("--");
635         if( nFndPos != -1 && nFndPos &&
636             nFndPos + 2 < sTmp.getLength() &&
637             ( rCC.isLetterNumeric( sTmp, nFndPos - 1 ) ||
638               lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nFndPos - 1 ] )) &&
639             ( rCC.isLetterNumeric( sTmp, nFndPos + 2 ) ||
640             lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nFndPos + 2 ] )))
641         {
642             nSttPos = nSttPos + nFndPos;
643             rDoc.Delete( nSttPos, nSttPos + 2 );
644             rDoc.Insert( nSttPos, (bEnDash || (rCC.isDigit( sTmp, nFndPos - 1 ) &&
645                 rCC.isDigit( sTmp, nFndPos + 2 )) ? OUString(cEnDash) : OUString(cEmDash)) );
646             bRet = true;
647         }
648     }
649     return bRet;
650 }
651 
652 // Add non-breaking space before specific punctuation marks in French text
653 bool SvxAutoCorrect::FnAddNonBrkSpace(
654                                 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
655                                 sal_Int32 nEndPos,
656                                 LanguageType eLang, bool& io_bNbspRunNext )
657 {
658     bool bRet = false;
659 
660     CharClass& rCC = GetCharClass( eLang );
661 
662     if ( rCC.getLanguageTag().getLanguage() == "fr" )
663     {
664         bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA");
665         OUString allChars = ":;?!%";
666         OUString chars( allChars );
667         if ( bFrCA )
668             chars = ":";
669 
670         sal_Unicode cChar = rTxt[ nEndPos ];
671         bool bHasSpace = chars.indexOf( cChar ) != -1;
672         bool bIsSpecial = allChars.indexOf( cChar ) != -1;
673         if ( bIsSpecial )
674         {
675             // Get the last word delimiter position
676             sal_Int32 nSttWdPos = nEndPos;
677             bool bWasWordDelim = false;
678             while( nSttWdPos )
679             {
680                 bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]);
681                 if (bWasWordDelim)
682                     break;
683             }
684 
685             //See if the text is the start of a protocol string, e.g. have text of
686             //"http" see if it is the start of "http:" and if so leave it alone
687             sal_Int32 nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0);
688             sal_Int32 nProtocolLen = nEndPos - nSttWdPos + 1;
689             if (nIndex + nProtocolLen <= rTxt.getLength())
690             {
691                 if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid)
692                     return false;
693             }
694 
695             // Check the presence of "://" in the word
696             sal_Int32 nStrPos = rTxt.indexOf( "://", nSttWdPos + 1 );
697             if ( nStrPos == -1 && nEndPos > 0 )
698             {
699                 // Check the previous char
700                 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
701                 if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' )
702                 {
703                     // Remove any previous normal space
704                     sal_Int32 nPos = nEndPos - 1;
705                     while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace )
706                     {
707                         if ( nPos == 0 ) break;
708                         nPos--;
709                         cPrevChar = rTxt[ nPos ];
710                     }
711 
712                     nPos++;
713                     if ( nEndPos - nPos > 0 )
714                         rDoc.Delete( nPos, nEndPos );
715 
716                     // Add the non-breaking space at the end pos
717                     if ( bHasSpace )
718                         rDoc.Insert( nPos, OUString(cNonBreakingSpace) );
719                     io_bNbspRunNext = true;
720                     bRet = true;
721                 }
722                 else if ( chars.indexOf( cPrevChar ) != -1 )
723                     io_bNbspRunNext = true;
724             }
725         }
726         else if ( cChar == '/' && nEndPos > 1 && rTxt.getLength() > (nEndPos - 1) )
727         {
728             // Remove the hardspace right before to avoid formatting URLs
729             sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
730             sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ];
731             if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace )
732             {
733                 rDoc.Delete( nEndPos - 2, nEndPos - 1 );
734                 bRet = true;
735             }
736         }
737     }
738 
739     return bRet;
740 }
741 
742 // URL recognition
743 bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
744                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
745                                     LanguageType eLang )
746 {
747     OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos,
748                                                 GetCharClass( eLang ) ));
749     bool bRet = !sURL.isEmpty();
750     if( bRet )          // so, set attribute:
751         rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
752     return bRet;
753 }
754 
755 // Automatic *bold*, /italic/, -strikeout- and _underline_
756 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
757                                         sal_Int32 nEndPos )
758 {
759     // Condition:
760     //  at the beginning:   _, *, / or ~ after Space with the following !Space
761     //  at the end:         _, *, / or ~ before Space (word delimiter?)
762 
763     sal_Unicode cInsChar = rTxt[ nEndPos ];  // underline, bold, italic or strikeout
764     if( ++nEndPos != rTxt.getLength() &&
765         !IsWordDelim( rTxt[ nEndPos ] ) )
766         return false;
767 
768     --nEndPos;
769 
770     bool bAlphaNum = false;
771     sal_Int32 nPos = nEndPos;
772     sal_Int32  nFndPos = -1;
773     CharClass& rCC = GetCharClass( LANGUAGE_SYSTEM );
774 
775     while( nPos )
776     {
777         switch( sal_Unicode c = rTxt[ --nPos ] )
778         {
779         case '_':
780         case '-':
781         case '/':
782         case '*':
783             if( c == cInsChar )
784             {
785                 if( bAlphaNum && nPos+1 < nEndPos && ( !nPos ||
786                     IsWordDelim( rTxt[ nPos-1 ])) &&
787                     !IsWordDelim( rTxt[ nPos+1 ]))
788                         nFndPos = nPos;
789                 else
790                     // Condition is not satisfied, so cancel
791                     nFndPos = -1;
792                 nPos = 0;
793             }
794             break;
795         default:
796             if( !bAlphaNum )
797                 bAlphaNum = rCC.isLetterNumeric( rTxt, nPos );
798         }
799     }
800 
801     if( -1 != nFndPos )
802     {
803         // first delete the Character at the end - this allows insertion
804         // of an empty hint in SetAttr which would be removed by Delete
805         // (fdo#62536, AUTOFMT in Writer)
806         rDoc.Delete( nEndPos, nEndPos + 1 );
807         rDoc.Delete( nFndPos, nFndPos + 1 );
808         // Span the Attribute over the area
809         // the end.
810         if( '*' == cInsChar )           // Bold
811         {
812             SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT );
813             rDoc.SetAttr( nFndPos, nEndPos - 1,
814                           SID_ATTR_CHAR_WEIGHT,
815                           aSvxWeightItem);
816         }
817         else if( '/' == cInsChar )           // Italic
818         {
819             SvxPostureItem aSvxPostureItem( ITALIC_NORMAL, SID_ATTR_CHAR_POSTURE );
820             rDoc.SetAttr( nFndPos, nEndPos - 1,
821                           SID_ATTR_CHAR_POSTURE,
822                           aSvxPostureItem);
823         }
824         else if( '-' == cInsChar )           // Strikeout
825         {
826             SvxCrossedOutItem aSvxCrossedOutItem( STRIKEOUT_SINGLE, SID_ATTR_CHAR_STRIKEOUT );
827             rDoc.SetAttr( nFndPos, nEndPos - 1,
828                           SID_ATTR_CHAR_STRIKEOUT,
829                           aSvxCrossedOutItem);
830         }
831         else                            // Underline
832         {
833             SvxUnderlineItem aSvxUnderlineItem( LINESTYLE_SINGLE, SID_ATTR_CHAR_UNDERLINE );
834             rDoc.SetAttr( nFndPos, nEndPos - 1,
835                           SID_ATTR_CHAR_UNDERLINE,
836                           aSvxUnderlineItem);
837         }
838       }
839 
840     return -1 != nFndPos;
841 }
842 
843 // Capitalize first letter of every sentence
844 void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc& rDoc,
845                                     const OUString& rTxt, bool bNormalPos,
846                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
847                                     LanguageType eLang )
848 {
849 
850     if( rTxt.isEmpty() || nEndPos <= nSttPos )
851         return;
852 
853     CharClass& rCC = GetCharClass( eLang );
854     OUString aText( rTxt );
855     const sal_Unicode *pStart = aText.getStr(),
856                       *pStr = pStart + nEndPos,
857                       *pWordStt = nullptr,
858                       *pDelim = nullptr;
859 
860     bool bAtStart = false;
861     do {
862         --pStr;
863         if (rCC.isLetter(aText, pStr - pStart))
864         {
865             if( !pWordStt )
866                 pDelim = pStr+1;
867             pWordStt = pStr;
868         }
869         else if (pWordStt && !rCC.isDigit(aText, pStr - pStart))
870         {
871             if( (lcl_IsInAsciiArr( "-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words
872                 pWordStt - 1 == pStr &&
873                 // Installation at beginning of paragraph. Replaced < by <= (#i38971#)
874                 (pStart + 1) <= pStr &&
875                 rCC.isLetter(aText, pStr-1 - pStart))
876                 pWordStt = --pStr;
877             else
878                 break;
879         }
880         bAtStart = (pStart == pStr);
881     } while( !bAtStart );
882 
883     if (!pWordStt)
884         return;    // no character to be replaced
885 
886 
887     if (rCC.isDigit(aText, pStr - pStart))
888         return; // already ok
889 
890     if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart)))
891         return; // already ok
892 
893     //See if the text is the start of a protocol string, e.g. have text of
894     //"http" see if it is the start of "http:" and if so leave it alone
895     sal_Int32 nIndex = pWordStt - pStart;
896     sal_Int32 nProtocolLen = pDelim - pWordStt + 1;
897     if (nIndex + nProtocolLen <= rTxt.getLength())
898     {
899         if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid)
900             return; // already ok
901     }
902 
903     if (0x1 == *pWordStt || 0x2 == *pWordStt)
904         return; // already ok
905 
906     // Only capitalize, if string before specified characters is long enough
907     if( *pDelim && 2 >= pDelim - pWordStt &&
908         lcl_IsInAsciiArr( ".-)>", *pDelim ) )
909         return;
910 
911     // tdf#59666 don't capitalize single Greek letters (except in Greek texts)
912     if ( 1 == pDelim - pWordStt && 0x03B1 <= *pWordStt && *pWordStt <= 0x03C9 && eLang != LANGUAGE_GREEK )
913         return;
914 
915     if( !bAtStart ) // Still no beginning of a paragraph?
916     {
917         if (NonFieldWordDelim(*pStr))
918         {
919             for (;;)
920             {
921                 bAtStart = (pStart == pStr--);
922                 if (bAtStart || !NonFieldWordDelim(*pStr))
923                     break;
924             }
925         }
926         // Asian full stop, full width full stop, full width exclamation mark
927         // and full width question marks are treated as word delimiters
928         else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr &&
929                   0xFF1F != *pStr )
930             return; // no valid separator -> no replacement
931     }
932 
933     // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
934     if (FindInWrdSttExceptList(eLang, OUString(pWordStt, pDelim - pWordStt)))
935         return;
936 
937     if( bAtStart )  // at the beginning of a paragraph?
938     {
939         // Check out the previous paragraph, if it exists.
940         // If so, then check to paragraph separator at the end.
941         OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos);
942         if (!pPrevPara)
943         {
944             // valid separator -> replace
945             OUString sChar( *pWordStt );
946             sChar = rCC.titlecase(sChar); //see fdo#56740
947             if (sChar != OUStringChar(*pWordStt))
948                rDoc.ReplaceRange( pWordStt - pStart, 1, sChar );
949             return;
950         }
951 
952         aText = *pPrevPara;
953         bAtStart = false;
954         pStart = aText.getStr();
955         pStr = pStart + aText.getLength();
956 
957         do {            // overwrite all blanks
958             --pStr;
959             if (!NonFieldWordDelim(*pStr))
960                 break;
961             bAtStart = (pStart == pStr);
962         } while( !bAtStart );
963 
964         if( bAtStart )
965             return;  // no valid separator -> no replacement
966     }
967 
968     // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator.
969     // all three can happen, but not more than once!
970     const sal_Unicode* pExceptStt = nullptr;
971     bool bContinue = true;
972     Flags nFlag = Flags::NONE;
973     do
974     {
975         switch (*pStr)
976         {
977             // Western and Asian full stop
978             case '.':
979             case 0x3002:
980             case 0xFF0E:
981             {
982                 if (pStr >= pStart + 2 && *(pStr - 2) == '.')
983                 {
984                     //e.g. text "f.o.o. word": Now currently considering
985                     //capitalizing word but second last character of
986                     //previous word is a .  So probably last word is an
987                     //anagram that ends in . and not truly the end of a
988                     //previous sentence, so don't autocapitalize this word
989                     return;
990                 }
991                 if (nFlag & Flags::FullStop)
992                     return; // no valid separator -> no replacement
993                 nFlag |= Flags::FullStop;
994                 pExceptStt = pStr;
995             }
996             break;
997             case '!':
998             case 0xFF01:
999             {
1000                 if (nFlag & Flags::ExclamationMark)
1001                     return; // no valid separator -> no replacement
1002                 nFlag |= Flags::ExclamationMark;
1003             }
1004             break;
1005             case '?':
1006             case 0xFF1F:
1007             {
1008                 if (nFlag & Flags::QuestionMark)
1009                     return; // no valid separator -> no replacement
1010                 nFlag |= Flags::QuestionMark;
1011             }
1012             break;
1013             default:
1014                 if (nFlag == Flags::NONE)
1015                     return; // no valid separator -> no replacement
1016                 else
1017                     bContinue = false;
1018                 break;
1019         }
1020 
1021         if (bContinue && pStr-- == pStart)
1022         {
1023             return; // no valid separator -> no replacement
1024         }
1025     } while (bContinue);
1026     if (Flags::FullStop != nFlag)
1027         pExceptStt = nullptr;
1028 
1029     // Only capitalize, if string is long enough
1030     if( 2 > ( pStr - pStart ) )
1031         return;
1032 
1033     if (!rCC.isLetterNumeric(aText, pStr-- - pStart))
1034     {
1035         bool bValid = false, bAlphaFnd = false;
1036         const sal_Unicode* pTmpStr = pStr;
1037         while( !bValid )
1038         {
1039             if( rCC.isDigit( aText, pTmpStr - pStart ) )
1040             {
1041                 bValid = true;
1042                 pStr = pTmpStr - 1;
1043             }
1044             else if( rCC.isLetter( aText, pTmpStr - pStart ) )
1045             {
1046                 if( bAlphaFnd )
1047                 {
1048                     bValid = true;
1049                     pStr = pTmpStr;
1050                 }
1051                 else
1052                     bAlphaFnd = true;
1053             }
1054             else if (bAlphaFnd || NonFieldWordDelim(*pTmpStr))
1055                 break;
1056 
1057             if( pTmpStr == pStart )
1058                 break;
1059 
1060             --pTmpStr;
1061         }
1062 
1063         if( !bValid )
1064             return;       // no valid separator -> no replacement
1065     }
1066 
1067     bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9';
1068 
1069     // Search for the beginning of the word
1070     while (!NonFieldWordDelim(*pStr))
1071     {
1072         if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) )
1073             bNumericOnly = false;
1074 
1075         if( pStart == pStr )
1076             break;
1077 
1078         --pStr;
1079     }
1080 
1081     if( bNumericOnly )      // consists of only numbers, then not
1082         return;
1083 
1084     if (NonFieldWordDelim(*pStr))
1085         ++pStr;
1086 
1087     OUString sWord;
1088 
1089     // check on the basis of the exception list
1090     if( pExceptStt )
1091     {
1092         sWord = OUString(pStr, pExceptStt - pStr + 1);
1093         if( FindInCplSttExceptList(eLang, sWord) )
1094             return;
1095 
1096         // Delete all non alphanumeric. Test the characters at the
1097         // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.)
1098         OUString sTmp( sWord );
1099         while( !sTmp.isEmpty() &&
1100                 !rCC.isLetterNumeric( sTmp, 0 ) )
1101             sTmp = sTmp.copy(1);
1102 
1103         // Remove all non alphanumeric characters towards the end up until
1104         // the last one.
1105         sal_Int32 nLen = sTmp.getLength();
1106         while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) )
1107             --nLen;
1108         if( nLen + 1 < sTmp.getLength() )
1109             sTmp = sTmp.copy( 0, nLen + 1 );
1110 
1111         if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() &&
1112             FindInCplSttExceptList(eLang, sTmp))
1113             return;
1114 
1115         if(FindInCplSttExceptList(eLang, sWord, true))
1116             return;
1117     }
1118 
1119     // Ok, then replace
1120     sal_Unicode cSave = *pWordStt;
1121     nSttPos = pWordStt - rTxt.getStr();
1122     OUString sChar = rCC.titlecase(OUString(cSave)); //see fdo#56740
1123     bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar );
1124 
1125     // Perhaps someone wants to have the word
1126     if( bRet && ACFlags::SaveWordCplSttLst & nFlags )
1127         rDoc.SaveCpltSttWord( ACFlags::CapitalStartSentence, nSttPos, sWord, cSave );
1128 }
1129 
1130 // Correct accidental use of cAPS LOCK key
1131 bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1132                                         sal_Int32 nSttPos, sal_Int32 nEndPos,
1133                                         LanguageType eLang )
1134 {
1135     if (nEndPos - nSttPos < 2)
1136         // string must be at least 2-character long.
1137         return false;
1138 
1139     CharClass& rCC = GetCharClass( eLang );
1140 
1141     // Check the first 2 letters.
1142     if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) )
1143         return false;
1144 
1145     if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) )
1146         return false;
1147 
1148     OUStringBuffer aConverted;
1149     aConverted.append( rCC.uppercase(OUString(rTxt[nSttPos])) );
1150     aConverted.append( rCC.lowercase(OUString(rTxt[nSttPos+1])) );
1151 
1152     // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
1153     if (FindInWrdSttExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos)))
1154         return false;
1155 
1156     for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i )
1157     {
1158         if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) )
1159             // A lowercase letter disqualifies the whole text.
1160             return false;
1161 
1162         if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) )
1163             // Another uppercase letter.  Convert it.
1164             aConverted.append( rCC.lowercase(OUString(rTxt[i])) );
1165         else
1166             // This is not an alphabetic letter.  Leave it as-is.
1167             aConverted.append( rTxt[i] );
1168     }
1169 
1170     // Replace the word.
1171     rDoc.Delete(nSttPos, nEndPos);
1172     rDoc.Insert(nSttPos, aConverted.makeStringAndClear());
1173 
1174     return true;
1175 }
1176 
1177 
1178 sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote,
1179                                         LanguageType eLang ) const
1180 {
1181     sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar
1182                                     ? GetStartDoubleQuote()
1183                                     : GetStartSingleQuote() )
1184                                    : ( '\"' == cInsChar
1185                                     ? GetEndDoubleQuote()
1186                                     : GetEndSingleQuote() );
1187     if( !cRet )
1188     {
1189         // then through the Language find the right character
1190         if( LANGUAGE_NONE == eLang )
1191             cRet = cInsChar;
1192         else
1193         {
1194             LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1195             OUString sRet( bSttQuote
1196                             ? ( '\"' == cInsChar
1197                                 ? rLcl.getDoubleQuotationMarkStart()
1198                                 : rLcl.getQuotationMarkStart() )
1199                             : ( '\"' == cInsChar
1200                                 ? rLcl.getDoubleQuotationMarkEnd()
1201                                 : rLcl.getQuotationMarkEnd() ));
1202             cRet = !sRet.isEmpty() ? sRet[0] : cInsChar;
1203         }
1204     }
1205     return cRet;
1206 }
1207 
1208 void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos,
1209                                     sal_Unicode cInsChar, bool bSttQuote,
1210                                     bool bIns, LanguageType eLang, ACQuotes eType ) const
1211 {
1212     sal_Unicode cRet;
1213 
1214     if ( eType == ACQuotes::DoubleAngleQuote )
1215     {
1216         bool bSwiss = eLang == LANGUAGE_FRENCH_SWISS;
1217         // pressing " inside a quotation -> use second level angle quotes
1218         bool bLeftQuote = '\"' == cInsChar &&
1219                 // start position and Romanian OR
1220                 // not start position and Hungarian
1221                 bSttQuote == (eLang != LANGUAGE_HUNGARIAN);
1222         cRet = ( '<' == cInsChar || bLeftQuote )
1223                 ? ( bSwiss ? cLeftSingleAngleQuote : cLeftDoubleAngleQuote )
1224                 : ( bSwiss ? cRightSingleAngleQuote : cRightDoubleAngleQuote );
1225     }
1226     else if ( eType == ACQuotes::UseApostrophe )
1227         cRet = cApostrophe;
1228     else
1229         cRet = GetQuote( cInsChar, bSttQuote, eLang );
1230 
1231     OUString sChg( cInsChar );
1232     if( bIns )
1233         rDoc.Insert( nInsPos, sChg );
1234     else
1235         rDoc.Replace( nInsPos, sChg );
1236 
1237     sChg = OUString(cRet);
1238 
1239     if( eType == ACQuotes::NonBreakingSpace )
1240     {
1241         if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, OUStringChar(cNonBreakingSpace) ))
1242         {
1243             if( !bSttQuote )
1244                 ++nInsPos;
1245         }
1246     }
1247     else if( eType == ACQuotes::DoubleAngleQuote && cInsChar != '\"' )
1248     {
1249         rDoc.Delete( nInsPos-1, nInsPos);
1250         --nInsPos;
1251     }
1252 
1253     rDoc.Replace( nInsPos, sChg );
1254 
1255     // i' -> I' in English (last step for the Undo)
1256     if( eType == ACQuotes::CapitalizeIAm )
1257         rDoc.Replace( nInsPos-1, "I" );
1258 }
1259 
1260 OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPos,
1261                                 sal_Unicode cInsChar, bool bSttQuote )
1262 {
1263     const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1264     sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang );
1265 
1266     OUString sRet(cRet);
1267 
1268     if( '\"' == cInsChar )
1269     {
1270         if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS)
1271         {
1272             if( bSttQuote )
1273                 sRet += " ";
1274             else
1275                 sRet = " " + sRet;
1276         }
1277     }
1278     return sRet;
1279 }
1280 
1281 // search preceding opening quote in the paragraph before the insert position
1282 static bool lcl_HasPrecedingChar( std::u16string_view rTxt, sal_Int32 nPos,
1283                 const sal_Unicode sPrecedingChar, const sal_Unicode* aStopChars )
1284 {
1285     sal_Unicode cTmpChar;
1286 
1287     do {
1288         cTmpChar = rTxt[ --nPos ];
1289         if ( cTmpChar == sPrecedingChar )
1290             return true;
1291 
1292         for ( const sal_Unicode* pCh = aStopChars; *pCh; ++pCh )
1293             if ( cTmpChar == *pCh )
1294                 return false;
1295 
1296     } while ( nPos > 0 );
1297 
1298     return false;
1299 }
1300 
1301 // WARNING: rText may become invalid, see comment below
1302 void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1303                                     sal_Int32 nInsPos, sal_Unicode cChar,
1304                                     bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin )
1305 {
1306     bool bIsNextRun = io_bNbspRunNext;
1307     io_bNbspRunNext = false;  // if it was set, then it has to be turned off
1308 
1309     do{                                 // only for middle check loop !!
1310         if( cChar )
1311         {
1312             // Prevent double space
1313             if( nInsPos && ' ' == cChar &&
1314                 IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace ) &&
1315                 ' ' == rTxt[ nInsPos - 1 ])
1316             {
1317                 break;
1318             }
1319 
1320             bool bSingle = '\'' == cChar;
1321             bool bIsReplaceQuote =
1322                         (IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('\"' == cChar )) ||
1323                         (IsAutoCorrFlag( ACFlags::ChgSglQuotes ) && bSingle );
1324             if( bIsReplaceQuote )
1325             {
1326                 bool bSttQuote = !nInsPos;
1327                 ACQuotes eType = ACQuotes::NONE;
1328                 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1329                 if (!bSttQuote)
1330                 {
1331                     sal_Unicode cPrev = rTxt[ nInsPos-1 ];
1332                     bSttQuote = NonFieldWordDelim(cPrev) ||
1333                         lcl_IsInAsciiArr( "([{", cPrev ) ||
1334                         ( cEmDash == cPrev ) ||
1335                         ( cEnDash == cPrev );
1336                     // tdf#38394 use opening quotation mark << in French l'<<word>>
1337                     if ( !bSingle && !bSttQuote && cPrev == cApostrophe &&
1338                         primary(eLang) == primary(LANGUAGE_FRENCH) &&
1339                         ( ( ( nInsPos == 2 || ( nInsPos > 2 && IsWordDelim( rTxt[ nInsPos-3 ] ) ) ) &&
1340                                // abbreviated form of ce, de, je, la, le, ne, me, te, se or si
1341                                OUString("cdjlnmtsCDJLNMTS").indexOf( rTxt[ nInsPos-2 ] ) > -1 ) ||
1342                           ( ( nInsPos == 3 || (nInsPos > 3 && IsWordDelim( rTxt[ nInsPos-4 ] ) ) ) &&
1343                                // abbreviated form of que
1344                                ( rTxt[ nInsPos-2 ] == 'u' || rTxt[ nInsPos-2 ] == 'U' ) &&
1345                                ( rTxt[ nInsPos-3 ] == 'q' || rTxt[ nInsPos-3 ] == 'Q' ) ) ) )
1346                     {
1347                         bSttQuote = true;
1348                     }
1349                     // tdf#108423 for capitalization of English i'm
1350                     else if ( bSingle && ( cPrev == 'i' ) &&
1351                         primary(eLang) == primary(LANGUAGE_ENGLISH) &&
1352                         ( nInsPos == 1 || IsWordDelim( rTxt[ nInsPos-2 ] ) ) )
1353                     {
1354                         eType = ACQuotes::CapitalizeIAm;
1355                     }
1356                     // tdf#133524 support >>Hungarian<< and <<Romanian>> secondary level quotations
1357                     else if ( !bSingle && nInsPos &&
1358                         ( ( eLang == LANGUAGE_HUNGARIAN &&
1359                             lcl_HasPrecedingChar( rTxt, nInsPos,
1360                                 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEnd[0],
1361                                 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEnd + 1 ) ) ||
1362                           ( eLang.anyOf(
1363                                 LANGUAGE_ROMANIAN,
1364                                 LANGUAGE_ROMANIAN_MOLDOVA ) &&
1365                             lcl_HasPrecedingChar( rTxt, nInsPos,
1366                                 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEndRo[0],
1367                                 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEndRo + 1 ) ) ) )
1368                     {
1369                         LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1370                         // only if the opening double quotation mark is the default one
1371                         if ( rLcl.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart[0]) )
1372                             eType = ACQuotes::DoubleAngleQuote;
1373                     }
1374                     else if ( bSingle && nInsPos && !bSttQuote &&
1375                         // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic,
1376                         // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018.
1377                         // tdf#123786 the same for Russian and Ukrainian
1378                         ( ( eLang.anyOf (
1379                                  LANGUAGE_CZECH,
1380                                  LANGUAGE_GERMAN,
1381                                  LANGUAGE_GERMAN_SWISS,
1382                                  LANGUAGE_GERMAN_AUSTRIAN,
1383                                  LANGUAGE_GERMAN_LUXEMBOURG,
1384                                  LANGUAGE_GERMAN_LIECHTENSTEIN,
1385                                  LANGUAGE_ICELANDIC,
1386                                  LANGUAGE_SLOVAK,
1387                                  LANGUAGE_SLOVENIAN ) &&
1388                             !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEnd[0],  aStopSingleQuoteEnd + 1 ) ) ||
1389                           ( eLang.anyOf (
1390                                  LANGUAGE_RUSSIAN,
1391                                  LANGUAGE_UKRAINIAN ) &&
1392                             !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEndRuUa[0],  aStopSingleQuoteEndRuUa + 1 ) ) ) )
1393                     {
1394                         LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1395                         CharClass& rCC = GetCharClass( eLang );
1396                         if ( ( rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEnd[0]) ||
1397                              rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa[0]) ) &&
1398                              // use apostrophe only after letters, not after digits or punctuation
1399                              rCC.isLetter(rTxt, nInsPos-1) )
1400                         {
1401                             eType = ACQuotes::UseApostrophe;
1402                         }
1403                     }
1404                 }
1405 
1406                 if ( eType == ACQuotes::NONE && !bSingle &&
1407                     ( primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS ) )
1408                     eType = ACQuotes::NonBreakingSpace;
1409 
1410                 InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, eLang, eType );
1411                 break;
1412             }
1413             // tdf#133524 change "<<" and ">>" to double angle quotation marks
1414             else if ( IsAutoCorrFlag( ACFlags::ChgQuotes ) &&
1415                 IsAutoCorrFlag( ACFlags::ChgAngleQuotes ) &&
1416                 ('<' == cChar || '>' == cChar) &&
1417                 nInsPos > 0 && cChar == rTxt[ nInsPos-1 ] )
1418             {
1419                 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1420                 if ( eLang.anyOf(
1421                         LANGUAGE_CATALAN,              // primary level
1422                         LANGUAGE_CATALAN_VALENCIAN,    // primary level
1423                         LANGUAGE_FINNISH,              // alternative primary level
1424                         LANGUAGE_FRENCH_SWISS,         // second level
1425                         LANGUAGE_GALICIAN,             // primary level
1426                         LANGUAGE_HUNGARIAN,            // second level
1427                         LANGUAGE_POLISH,               // second level
1428                         LANGUAGE_PORTUGUESE,           // primary level
1429                         LANGUAGE_PORTUGUESE_BRAZILIAN, // primary level
1430                         LANGUAGE_ROMANIAN,             // second level
1431                         LANGUAGE_ROMANIAN_MOLDOVA,     // second level
1432                         LANGUAGE_SWEDISH,              // alternative primary level
1433                         LANGUAGE_SWEDISH_FINLAND,      // alternative primary level
1434                         LANGUAGE_UKRAINIAN,            // primary level
1435                         LANGUAGE_USER_ARAGONESE,       // primary level
1436                         LANGUAGE_USER_ASTURIAN ) ||    // primary level
1437                     primary(eLang) == primary(LANGUAGE_GERMAN) ||  // alternative primary level
1438                     primary(eLang) == primary(LANGUAGE_SPANISH) )  // primary level
1439                 {
1440                     InsertQuote( rDoc, nInsPos, cChar, false, bInsert, eLang, ACQuotes::DoubleAngleQuote );
1441                     break;
1442                 }
1443             }
1444 
1445             if( bInsert )
1446                 rDoc.Insert( nInsPos, OUString(cChar) );
1447             else
1448                 rDoc.Replace( nInsPos, OUString(cChar) );
1449 
1450             // Hardspaces autocorrection
1451             if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace ) )
1452             {
1453                 if ( NeedsHardspaceAutocorr( cChar ) &&
1454                     FnAddNonBrkSpace( rDoc, rTxt, nInsPos, GetDocLanguage( rDoc, nInsPos ), io_bNbspRunNext ) )
1455                 {
1456                     ;
1457                 }
1458                 else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) )
1459                 {
1460                     // Remove the NBSP if it wasn't an autocorrection
1461                     if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) &&
1462                             cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace )
1463                     {
1464                         // Look for the last HARD_SPACE
1465                         sal_Int32 nPos = nInsPos - 1;
1466                         bool bContinue = true;
1467                         while ( bContinue )
1468                         {
1469                             const sal_Unicode cTmpChar = rTxt[ nPos ];
1470                             if ( cTmpChar == cNonBreakingSpace )
1471                             {
1472                                 rDoc.Delete( nPos, nPos + 1 );
1473                                 bContinue = false;
1474                             }
1475                             else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 )
1476                                 bContinue = false;
1477                             nPos--;
1478                         }
1479                     }
1480                 }
1481             }
1482         }
1483 
1484         if( !nInsPos )
1485             break;
1486 
1487         sal_Int32 nPos = nInsPos - 1;
1488 
1489         if( IsWordDelim( rTxt[ nPos ]))
1490             break;
1491 
1492         // Set bold or underline automatically?
1493         if (('*' == cChar || '_' == cChar || '/' == cChar || '-' == cChar) && (nPos+1 < rTxt.getLength()))
1494         {
1495             if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl ) )
1496             {
1497                 FnChgWeightUnderl( rDoc, rTxt, nPos+1 );
1498             }
1499             break;
1500         }
1501 
1502         while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1503             ;
1504 
1505         // Found a Paragraph-start or a Blank, search for the word shortcut in
1506         // auto.
1507         sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
1508         if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1509             --nCapLttrPos;          // begin of paragraph and no blank
1510 
1511         const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1512         CharClass& rCC = GetCharClass( eLang );
1513 
1514         // no symbol characters
1515         if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos ))
1516             break;
1517 
1518         if( IsAutoCorrFlag( ACFlags::Autocorrect ) )
1519         {
1520             // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1521             // and becomes INVALID if ChgAutoCorrWord returns true!
1522             // => use aPara/pPara to create a valid copy of the string!
1523             OUString aPara;
1524             OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr;
1525 
1526             bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos,
1527                                                     *this, pPara );
1528             if( !bChgWord )
1529             {
1530                 sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos;
1531                 while( nCapLttrPos1 < nInsPos &&
1532                         lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] )
1533                         )
1534                         ++nCapLttrPos1;
1535                 while( nCapLttrPos1 < nInsPos1 && nInsPos1 &&
1536                         lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] )
1537                         )
1538                         --nInsPos1;
1539 
1540                 if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) &&
1541                     nCapLttrPos1 < nInsPos1 &&
1542                     rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara ))
1543                 {
1544                     bChgWord = true;
1545                     nCapLttrPos = nCapLttrPos1;
1546                 }
1547             }
1548 
1549             if( bChgWord )
1550             {
1551                 if( !aPara.isEmpty() )
1552                 {
1553                     sal_Int32 nEnd = nCapLttrPos;
1554                     while( nEnd < aPara.getLength() &&
1555                             !IsWordDelim( aPara[ nEnd ]))
1556                         ++nEnd;
1557 
1558                     // Capital letter at beginning of paragraph?
1559                     if( IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1560                     {
1561                         FnCapitalStartSentence( rDoc, aPara, false,
1562                                                 nCapLttrPos, nEnd, eLang );
1563                     }
1564 
1565                     if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1566                     {
1567                         FnChgToEnEmDash( rDoc, aPara, nCapLttrPos, nEnd, eLang );
1568                     }
1569                 }
1570                 break;
1571             }
1572         }
1573 
1574         if( IsAutoCorrFlag( ACFlags::TransliterateRTL ) && GetDocLanguage( rDoc, nInsPos ) == LANGUAGE_HUNGARIAN )
1575         {
1576             // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1577             // and becomes INVALID if TransliterateRTLWord returns true!
1578             if ( rDoc.TransliterateRTLWord( nCapLttrPos, nInsPos ) )
1579                 break;
1580         }
1581 
1582         if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber ) &&
1583                 (nInsPos >= 2 ) &&       // fdo#69762 avoid autocorrect for 2e-3
1584                 ( '-' != cChar || 'E' != rtl::toAsciiUpperCase(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) &&
1585                 FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
1586             ( IsAutoCorrFlag( ACFlags::SetINetAttr ) &&
1587                 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
1588                 FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
1589             ;
1590         else
1591         {
1592             bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK);
1593             bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos );
1594 
1595             if ( bLockKeyOn && IsAutoCorrFlag( ACFlags::CorrectCapsLock ) &&
1596                  FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) )
1597             {
1598                 // Correct accidental use of cAPS LOCK key (do this only when
1599                 // the caps or shift lock key is pressed). Turn off the caps
1600                 // lock afterwards.
1601                 pFrameWin->SimulateKeyPress( KEY_CAPSLOCK );
1602             }
1603 
1604             // Capital letter at beginning of paragraph ?
1605             if( !bUnsupported &&
1606                 IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1607             {
1608                 FnCapitalStartSentence( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang );
1609             }
1610 
1611             // Two capital letters at beginning of word ??
1612             if( !bUnsupported &&
1613                 IsAutoCorrFlag( ACFlags::CapitalStartWord ) )
1614             {
1615                 FnCapitalStartWord( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1616             }
1617 
1618             if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1619             {
1620                 FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1621             }
1622         }
1623 
1624     } while( false );
1625 }
1626 
1627 SvxAutoCorrectLanguageLists& SvxAutoCorrect::GetLanguageList_(
1628                                                         LanguageType eLang )
1629 {
1630     LanguageTag aLanguageTag( eLang);
1631     if (m_aLangTable.find(aLanguageTag) == m_aLangTable.end())
1632         (void)CreateLanguageFile(aLanguageTag);
1633     return *(m_aLangTable.find(aLanguageTag)->second);
1634 }
1635 
1636 void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang )
1637 {
1638     auto const iter = m_aLangTable.find(LanguageTag(eLang));
1639     if (iter != m_aLangTable.end() && iter->second)
1640         iter->second->SaveCplSttExceptList();
1641     else
1642     {
1643         SAL_WARN("editeng", "Save an empty list? ");
1644     }
1645 }
1646 
1647 void SvxAutoCorrect::SaveWrdSttExceptList(LanguageType eLang)
1648 {
1649     auto const iter = m_aLangTable.find(LanguageTag(eLang));
1650     if (iter != m_aLangTable.end() && iter->second)
1651         iter->second->SaveWrdSttExceptList();
1652     else
1653     {
1654         SAL_WARN("editeng", "Save an empty list? ");
1655     }
1656 }
1657 
1658 // Adds a single word. The list will immediately be written to the file!
1659 bool SvxAutoCorrect::AddCplSttException( const OUString& rNew,
1660                                         LanguageType eLang )
1661 {
1662     SvxAutoCorrectLanguageLists* pLists = nullptr;
1663     // either the right language is present or it will be this in the general list
1664     auto iter = m_aLangTable.find(LanguageTag(eLang));
1665     if (iter != m_aLangTable.end())
1666         pLists = iter->second.get();
1667     else
1668     {
1669         LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1670         iter = m_aLangTable.find(aLangTagUndetermined);
1671         if (iter != m_aLangTable.end())
1672             pLists = iter->second.get();
1673         else if(CreateLanguageFile(aLangTagUndetermined))
1674             pLists = m_aLangTable.find(aLangTagUndetermined)->second.get();
1675     }
1676     OSL_ENSURE(pLists, "No auto correction data");
1677     return pLists && pLists->AddToCplSttExceptList(rNew);
1678 }
1679 
1680 // Adds a single word. The list will immediately be written to the file!
1681 bool SvxAutoCorrect::AddWrtSttException( const OUString& rNew,
1682                                          LanguageType eLang )
1683 {
1684     SvxAutoCorrectLanguageLists* pLists = nullptr;
1685     //either the right language is present or it is set in the general list
1686     auto iter = m_aLangTable.find(LanguageTag(eLang));
1687     if (iter != m_aLangTable.end())
1688         pLists = iter->second.get();
1689     else
1690     {
1691         LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1692         iter = m_aLangTable.find(aLangTagUndetermined);
1693         if (iter != m_aLangTable.end())
1694             pLists = iter->second.get();
1695         else if(CreateLanguageFile(aLangTagUndetermined))
1696             pLists = m_aLangTable.find(aLangTagUndetermined)->second.get();
1697     }
1698     OSL_ENSURE(pLists, "No auto correction file!");
1699     return pLists && pLists->AddToWrdSttExceptList(rNew);
1700 }
1701 
1702 OUString SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const OUString& rTxt,
1703                                              sal_Int32 nPos)
1704 {
1705     OUString sRet;
1706     if( !nPos )
1707         return sRet;
1708 
1709     sal_Int32 nEnd = nPos;
1710 
1711     // it must be followed by a blank or tab!
1712     if( ( nPos < rTxt.getLength() &&
1713         !IsWordDelim( rTxt[ nPos ])) ||
1714         IsWordDelim( rTxt[ --nPos ]))
1715         return sRet;
1716 
1717     while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1718         ;
1719 
1720     // Found a Paragraph-start or a Blank, search for the word shortcut in
1721     // auto.
1722     sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
1723     if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1724         --nCapLttrPos;          // Beginning of paragraph and no Blank!
1725 
1726     while( lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) )
1727         if( ++nCapLttrPos >= nEnd )
1728             return sRet;
1729 
1730     if( 3 > nEnd - nCapLttrPos )
1731         return sRet;
1732 
1733     const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1734 
1735     CharClass& rCC = GetCharClass(eLang);
1736 
1737     if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnd ))
1738         return sRet;
1739 
1740     sRet = rTxt.copy( nCapLttrPos, nEnd - nCapLttrPos );
1741     return sRet;
1742 }
1743 
1744 // static
1745 std::vector<OUString> SvxAutoCorrect::GetChunkForAutoText(const OUString& rTxt,
1746                                                           const sal_Int32 nPos)
1747 {
1748     constexpr sal_Int32 nMinLen = 3;
1749     constexpr sal_Int32 nMaxLen = 9;
1750     std::vector<OUString> aRes;
1751     if (nPos >= nMinLen)
1752     {
1753         sal_Int32 nBegin = std::max<sal_Int32>(nPos - nMaxLen, 0);
1754         // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation)
1755         if (nBegin > 0 && !IsWordDelim(rTxt[nBegin-1]))
1756         {
1757             while (nBegin + nMinLen <= nPos && !IsWordDelim(rTxt[nBegin]))
1758                 ++nBegin;
1759         }
1760         if (nBegin + nMinLen <= nPos)
1761         {
1762             OUString sRes = rTxt.copy(nBegin, nPos - nBegin);
1763             aRes.push_back(sRes);
1764             bool bLastStartedWithDelim = IsWordDelim(sRes[0]);
1765             for (sal_Int32 i = 1; i <= sRes.getLength() - nMinLen; ++i)
1766             {
1767                 bool bAdd = bLastStartedWithDelim;
1768                 bLastStartedWithDelim = IsWordDelim(sRes[i]);
1769                 bAdd = bAdd || bLastStartedWithDelim;
1770                 if (bAdd)
1771                     aRes.push_back(sRes.copy(i));
1772             }
1773         }
1774     }
1775     return aRes;
1776 }
1777 
1778 bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile )
1779 {
1780     OSL_ENSURE(m_aLangTable.find(rLanguageTag) == m_aLangTable.end(), "Language already exists ");
1781 
1782     OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true ));
1783     OUString sShareDirFile( sUserDirFile );
1784 
1785     SvxAutoCorrectLanguageLists* pLists = nullptr;
1786 
1787     tools::Time nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM ), nLastCheckTime( tools::Time::EMPTY );
1788 
1789     auto nFndPos = aLastFileTable.find(rLanguageTag);
1790     if(nFndPos != aLastFileTable.end() &&
1791        (nLastCheckTime.SetTime(nFndPos->second), nLastCheckTime < nAktTime) &&
1792        nAktTime - nLastCheckTime < nMinTime)
1793     {
1794         // no need to test the file, because the last check is not older then
1795         // 2 minutes.
1796         if( bNewFile )
1797         {
1798             sShareDirFile = sUserDirFile;
1799             pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile );
1800             LanguageTag aTmp(rLanguageTag);     // this insert() needs a non-const reference
1801             m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists)));
1802             aLastFileTable.erase(nFndPos);
1803         }
1804     }
1805     else if(
1806              ( FStatHelper::IsDocument( sUserDirFile ) ||
1807                FStatHelper::IsDocument( sShareDirFile =
1808                    GetAutoCorrFileName( rLanguageTag ) ) ||
1809                FStatHelper::IsDocument( sShareDirFile =
1810                    GetAutoCorrFileName( rLanguageTag, false, false, true) )
1811              ) ||
1812         ( sShareDirFile = sUserDirFile, bNewFile )
1813           )
1814     {
1815         pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile );
1816         LanguageTag aTmp(rLanguageTag);     // this insert() needs a non-const reference
1817         m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists)));
1818         if (nFndPos != aLastFileTable.end())
1819             aLastFileTable.erase(nFndPos);
1820     }
1821     else if( !bNewFile )
1822     {
1823         aLastFileTable[rLanguageTag] = nAktTime.GetTime();
1824     }
1825     return pLists != nullptr;
1826 }
1827 
1828 bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong,
1829                                 LanguageType eLang )
1830 {
1831     LanguageTag aLanguageTag( eLang);
1832     auto const iter = m_aLangTable.find(aLanguageTag);
1833     if (iter != m_aLangTable.end())
1834         return iter->second->PutText(rShort, rLong);
1835     if(CreateLanguageFile(aLanguageTag))
1836         return m_aLangTable.find(aLanguageTag)->second->PutText(rShort, rLong);
1837     return false;
1838 }
1839 
1840 void SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries,
1841                                               std::vector<SvxAutocorrWord>& aDeleteEntries,
1842                                               LanguageType eLang )
1843 {
1844     LanguageTag aLanguageTag( eLang);
1845     auto const iter = m_aLangTable.find(aLanguageTag);
1846     if (iter != m_aLangTable.end())
1847     {
1848         iter->second->MakeCombinedChanges( aNewEntries, aDeleteEntries );
1849     }
1850     else if(CreateLanguageFile( aLanguageTag ))
1851     {
1852         m_aLangTable.find( aLanguageTag )->second->MakeCombinedChanges( aNewEntries, aDeleteEntries );
1853     }
1854 }
1855 
1856 //  - return the replacement text (only for SWG-Format, all other
1857 //    can be taken from the word list!)
1858 bool SvxAutoCorrect::GetLongText( const OUString&, OUString& )
1859 {
1860     return false;
1861 }
1862 
1863 void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& )
1864 {
1865 }
1866 
1867 // Text with attribution (only the SWG - SWG format!)
1868 bool SvxAutoCorrect::PutText( const css::uno::Reference < css::embed::XStorage >&,
1869                               const OUString&, const OUString&, SfxObjectShell&, OUString& )
1870 {
1871     return false;
1872 }
1873 
1874 OUString EncryptBlockName_Imp(const OUString& rName)
1875 {
1876     OUStringBuffer aName;
1877     aName.append('#').append(rName);
1878     for (sal_Int32 nLen = rName.getLength(), nPos = 1; nPos < nLen; ++nPos)
1879     {
1880         if (lcl_IsInAsciiArr( "!/:.\\", aName[nPos]))
1881             aName[nPos] &= 0x0f;
1882     }
1883     return aName.makeStringAndClear();
1884 }
1885 
1886 /* This code is copied from SwXMLTextBlocks::GeneratePackageName */
1887 static void GeneratePackageName ( std::u16string_view rShort, OUString& rPackageName )
1888 {
1889     OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7));
1890     OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US));
1891 
1892     for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos)
1893     {
1894         switch (aBuf[nPos])
1895         {
1896             case '!':
1897             case '/':
1898             case ':':
1899             case '.':
1900             case '\\':
1901                 aBuf[nPos] = '_';
1902                 break;
1903             default:
1904                 break;
1905         }
1906     }
1907 
1908     rPackageName = aBuf.makeStringAndClear();
1909 }
1910 
1911 static const SvxAutocorrWord* lcl_SearchWordsInList(
1912                 SvxAutoCorrectLanguageLists* pList, const OUString& rTxt,
1913                 sal_Int32& rStt, sal_Int32 nEndPos)
1914 {
1915     const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList();
1916     return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos );
1917 }
1918 
1919 // the search for the words in the substitution table
1920 const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList(
1921                 const OUString& rTxt, sal_Int32& rStt, sal_Int32 nEndPos,
1922                 SvxAutoCorrDoc&, LanguageTag& rLang )
1923 {
1924     const SvxAutocorrWord* pRet = nullptr;
1925     LanguageTag aLanguageTag( rLang);
1926     if( aLanguageTag.isSystemLocale() )
1927         aLanguageTag.reset( MsLangId::getSystemLanguage());
1928 
1929     /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback
1930      * list instead? */
1931 
1932     // First search for eLang, then US-English -> English
1933     // and last in LANGUAGE_UNDETERMINED
1934     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
1935     {
1936         //the language is available - so bring it on
1937         std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
1938         pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
1939         if( pRet )
1940         {
1941             rLang = aLanguageTag;
1942             return pRet;
1943         }
1944         else
1945             return nullptr;
1946     }
1947 
1948     // If it still could not be found here, then keep on searching
1949     LanguageType eLang = aLanguageTag.getLanguageType();
1950     // the primary language for example EN
1951     aLanguageTag.reset(aLanguageTag.getLanguage());
1952     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
1953     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
1954                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
1955                  CreateLanguageFile(aLanguageTag, false)))
1956     {
1957         //the language is available - so bring it on
1958         std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
1959         pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
1960         if( pRet )
1961         {
1962             rLang = aLanguageTag;
1963             return pRet;
1964         }
1965     }
1966 
1967     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
1968             CreateLanguageFile(aLanguageTag, false))
1969     {
1970         //the language is available - so bring it on
1971         std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
1972         pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
1973         if( pRet )
1974         {
1975             rLang = aLanguageTag;
1976             return pRet;
1977         }
1978     }
1979     return nullptr;
1980 }
1981 
1982 bool SvxAutoCorrect::FindInWrdSttExceptList( LanguageType eLang,
1983                                              const OUString& sWord )
1984 {
1985     LanguageTag aLanguageTag( eLang);
1986 
1987     /* TODO-BCP47: again horrible ugliness */
1988 
1989     // First search for eLang, then primary language of eLang
1990     // and last in LANGUAGE_UNDETERMINED
1991 
1992     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
1993     {
1994         //the language is available - so bring it on
1995         auto const& pList = m_aLangTable.find(aLanguageTag)->second;
1996         if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() )
1997             return true;
1998     }
1999 
2000     // If it still could not be found here, then keep on searching
2001     // the primary language for example EN
2002     aLanguageTag.reset(aLanguageTag.getLanguage());
2003     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2004     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2005                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2006                  CreateLanguageFile(aLanguageTag, false)))
2007     {
2008         //the language is available - so bring it on
2009         auto const& pList = m_aLangTable.find(aLanguageTag)->second;
2010         if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() )
2011             return true;
2012     }
2013 
2014     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2015             CreateLanguageFile(aLanguageTag, false))
2016     {
2017         //the language is available - so bring it on
2018         auto const& pList = m_aLangTable.find(aLanguageTag)->second;
2019         if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() )
2020             return true;
2021     }
2022     return false;
2023 }
2024 
2025 static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord)
2026 {
2027     SvStringsISortDtor::const_iterator it = pList->find( "~" );
2028     SvStringsISortDtor::size_type nPos = it - pList->begin();
2029     if( nPos < pList->size() )
2030     {
2031         OUString sLowerWord(sWord.toAsciiLowerCase());
2032         OUString sAbr;
2033         for( SvStringsISortDtor::size_type n = nPos; n < pList->size(); ++n )
2034         {
2035             sAbr = (*pList)[ n ];
2036             if (sAbr[0] != '~')
2037                 break;
2038             // ~ and ~. are not allowed!
2039             if( 2 < sAbr.getLength() && sAbr.getLength() - 1 <= sWord.getLength() )
2040             {
2041                 OUString sLowerAbk(sAbr.toAsciiLowerCase());
2042                 for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;)
2043                 {
2044                     if( !--i )      // agrees
2045                         return true;
2046 
2047                     if( sLowerAbk[i] != sLowerWord[--ii])
2048                         break;
2049                 }
2050             }
2051         }
2052     }
2053     OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ),
2054             "Wrongly sorted exception list?" );
2055     return false;
2056 }
2057 
2058 bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang,
2059                                 const OUString& sWord, bool bAbbreviation)
2060 {
2061     LanguageTag aLanguageTag( eLang);
2062 
2063     /* TODO-BCP47: did I mention terrible horrible ugliness? */
2064 
2065     // First search for eLang, then primary language of eLang
2066     // and last in LANGUAGE_UNDETERMINED
2067 
2068     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
2069     {
2070         //the language is available - so bring it on
2071         const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
2072         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2073             return true;
2074     }
2075 
2076     // If it still could not be found here, then keep on searching
2077     // the primary language for example EN
2078     aLanguageTag.reset(aLanguageTag.getLanguage());
2079     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2080     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2081                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2082                  CreateLanguageFile(aLanguageTag, false)))
2083     {
2084         //the language is available - so bring it on
2085         const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
2086         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2087             return true;
2088     }
2089 
2090     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2091             CreateLanguageFile(aLanguageTag, false))
2092     {
2093         //the language is available - so bring it on
2094         const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
2095         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2096             return true;
2097     }
2098     return false;
2099 }
2100 
2101 OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag,
2102                                             bool bNewFile, bool bTst, bool bUnlocalized ) const
2103 {
2104     OUString sRet, sExt( rLanguageTag.getBcp47() );
2105     if (bUnlocalized)
2106     {
2107         // we don't want variant, so we'll take "fr" instead of "fr-CA" for example
2108         std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false);
2109         if (!vecFallBackStrings.empty())
2110            sExt = vecFallBackStrings[0];
2111     }
2112 
2113     sExt = "_" + sExt + ".dat";
2114     if( bNewFile )
2115         sRet = sUserAutoCorrFile + sExt;
2116     else if( !bTst )
2117         sRet = sShareAutoCorrFile + sExt;
2118     else
2119     {
2120         // test first in the user directory - if not exist, then
2121         sRet = sUserAutoCorrFile + sExt;
2122         if( !FStatHelper::IsDocument( sRet ))
2123             sRet = sShareAutoCorrFile + sExt;
2124     }
2125     return sRet;
2126 }
2127 
2128 SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists(
2129                 SvxAutoCorrect& rParent,
2130                 const OUString& rShareAutoCorrectFile,
2131                 const OUString& rUserAutoCorrectFile)
2132 :   sShareAutoCorrFile( rShareAutoCorrectFile ),
2133     sUserAutoCorrFile( rUserAutoCorrectFile ),
2134     aModifiedDate( Date::EMPTY ),
2135     aModifiedTime( tools::Time::EMPTY ),
2136     aLastCheckTime( tools::Time::EMPTY ),
2137     rAutoCorrect(rParent),
2138     nFlags(ACFlags::NONE)
2139 {
2140 }
2141 
2142 SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists()
2143 {
2144 }
2145 
2146 bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp()
2147 {
2148     // Access the file system only every 2 minutes to check the date stamp
2149     bool bRet = false;
2150 
2151     tools::Time nMinTime( 0, 2 );
2152     tools::Time nAktTime( tools::Time::SYSTEM );
2153     if( aLastCheckTime <= nAktTime) // overflow?
2154         return false;
2155     nAktTime -= aLastCheckTime;
2156     if( nAktTime > nMinTime )     // min time past
2157     {
2158         Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY );
2159         if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2160                                             &aTstDate, &aTstTime ) &&
2161             ( aModifiedDate != aTstDate || aModifiedTime != aTstTime ))
2162         {
2163             bRet = true;
2164             // then remove all the lists fast!
2165             if( (ACFlags::CplSttLstLoad & nFlags) && pCplStt_ExcptLst )
2166             {
2167                 pCplStt_ExcptLst.reset();
2168             }
2169             if( (ACFlags::WrdSttLstLoad & nFlags) && pWrdStt_ExcptLst )
2170             {
2171                 pWrdStt_ExcptLst.reset();
2172             }
2173             if( (ACFlags::ChgWordLstLoad & nFlags) && pAutocorr_List )
2174             {
2175                 pAutocorr_List.reset();
2176             }
2177             nFlags &= ~ACFlags(ACFlags::CplSttLstLoad | ACFlags::WrdSttLstLoad | ACFlags::ChgWordLstLoad );
2178         }
2179         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2180     }
2181     return bRet;
2182 }
2183 
2184 void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp(
2185                                         std::unique_ptr<SvStringsISortDtor>& rpLst,
2186                                         const OUString& sStrmName,
2187                                         tools::SvRef<SotStorage>& rStg)
2188 {
2189     if( rpLst )
2190         rpLst->clear();
2191     else
2192         rpLst.reset( new SvStringsISortDtor );
2193 
2194     {
2195         if( rStg.is() && rStg->IsStream( sStrmName ) )
2196         {
2197             tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2198                 ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) );
2199             if( ERRCODE_NONE != xStrm->GetError())
2200             {
2201                 xStrm.clear();
2202                 rStg.clear();
2203                 RemoveStream_Imp( sStrmName );
2204             }
2205             else
2206             {
2207                 uno::Reference< uno::XComponentContext > xContext =
2208                     comphelper::getProcessComponentContext();
2209 
2210                 xml::sax::InputSource aParserInput;
2211                 aParserInput.sSystemId = sStrmName;
2212 
2213                 xStrm->Seek( 0 );
2214                 xStrm->SetBufferSize( 8 * 1024 );
2215                 aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm );
2216 
2217                 // get filter
2218                 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLExceptionListImport ( xContext, *rpLst );
2219 
2220                 // connect parser and filter
2221                 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create( xContext );
2222                 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2223                 xParser->setFastDocumentHandler( xFilter );
2224                 xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
2225                 xParser->setTokenHandler( xTokenHandler );
2226 
2227                 // parse
2228                 try
2229                 {
2230                     xParser->parseStream( aParserInput );
2231                 }
2232                 catch( const xml::sax::SAXParseException& )
2233                 {
2234                     // re throw ?
2235                 }
2236                 catch( const xml::sax::SAXException& )
2237                 {
2238                     // re throw ?
2239                 }
2240                 catch( const io::IOException& )
2241                 {
2242                     // re throw ?
2243                 }
2244             }
2245         }
2246 
2247         // Set time stamp
2248         FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2249                                         &aModifiedDate, &aModifiedTime );
2250         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2251     }
2252 
2253 }
2254 
2255 void SvxAutoCorrectLanguageLists::SaveExceptList_Imp(
2256                             const SvStringsISortDtor& rLst,
2257                             const OUString& sStrmName,
2258                             tools::SvRef<SotStorage> const &rStg,
2259                             bool bConvert )
2260 {
2261     if( !rStg.is() )
2262         return;
2263 
2264     if( rLst.empty() )
2265     {
2266         rStg->Remove( sStrmName );
2267         rStg->Commit();
2268     }
2269     else
2270     {
2271         tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2272                 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2273         if( xStrm.is() )
2274         {
2275             xStrm->SetSize( 0 );
2276             xStrm->SetBufferSize( 8192 );
2277             xStrm->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2278 
2279 
2280             uno::Reference< uno::XComponentContext > xContext =
2281                 comphelper::getProcessComponentContext();
2282 
2283             uno::Reference < xml::sax::XWriter > xWriter  = xml::sax::Writer::create(xContext);
2284             uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm );
2285             xWriter->setOutputStream(xOut);
2286 
2287             uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW);
2288             rtl::Reference< SvXMLExceptionListExport > xExp( new SvXMLExceptionListExport( xContext, rLst, sStrmName, xHandler ) );
2289 
2290             xExp->exportDoc( XML_BLOCK_LIST );
2291 
2292             xStrm->Commit();
2293             if( xStrm->GetError() == ERRCODE_NONE )
2294             {
2295                 xStrm.clear();
2296                 if (!bConvert)
2297                 {
2298                     rStg->Commit();
2299                     if( ERRCODE_NONE != rStg->GetError() )
2300                     {
2301                         rStg->Remove( sStrmName );
2302                         rStg->Commit();
2303                     }
2304                 }
2305             }
2306         }
2307     }
2308 }
2309 
2310 SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList()
2311 {
2312     if( pAutocorr_List )
2313         pAutocorr_List->DeleteAndDestroyAll();
2314     else
2315         pAutocorr_List.reset( new SvxAutocorrWordList() );
2316 
2317     try
2318     {
2319         uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ );
2320         uno::Reference < io::XStream > xStrm = xStg->openStreamElement( pXMLImplAutocorr_ListStr, embed::ElementModes::READ );
2321         uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext();
2322 
2323         xml::sax::InputSource aParserInput;
2324         aParserInput.sSystemId = pXMLImplAutocorr_ListStr;
2325         aParserInput.aInputStream = xStrm->getInputStream();
2326 
2327         // get parser
2328         uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext);
2329         SAL_INFO("editeng", "AutoCorrect Import" );
2330         uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List.get(), rAutoCorrect, xStg );
2331         uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2332 
2333         // connect parser and filter
2334         xParser->setFastDocumentHandler( xFilter );
2335         xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
2336         xParser->setTokenHandler(xTokenHandler);
2337 
2338         // parse
2339         xParser->parseStream( aParserInput );
2340     }
2341     catch ( const uno::Exception& )
2342     {
2343         TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile);
2344     }
2345 
2346     // Set time stamp
2347     FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2348                                     &aModifiedDate, &aModifiedTime );
2349     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2350 
2351     return pAutocorr_List.get();
2352 }
2353 
2354 const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList()
2355 {
2356     if( !( ACFlags::ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() )
2357     {
2358         LoadAutocorrWordList();
2359         if( !pAutocorr_List )
2360         {
2361             OSL_ENSURE( false, "No valid list" );
2362             pAutocorr_List.reset( new SvxAutocorrWordList() );
2363         }
2364         nFlags |= ACFlags::ChgWordLstLoad;
2365     }
2366     return pAutocorr_List.get();
2367 }
2368 
2369 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList()
2370 {
2371     if( !( ACFlags::CplSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2372     {
2373         LoadCplSttExceptList();
2374         if( !pCplStt_ExcptLst )
2375         {
2376             OSL_ENSURE( false, "No valid list" );
2377             pCplStt_ExcptLst.reset( new SvStringsISortDtor );
2378         }
2379         nFlags |= ACFlags::CplSttLstLoad;
2380     }
2381     return pCplStt_ExcptLst.get();
2382 }
2383 
2384 bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew)
2385 {
2386     bool bRet = false;
2387     if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second )
2388     {
2389         MakeUserStorage_Impl();
2390         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2391 
2392         SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2393 
2394         xStg = nullptr;
2395         // Set time stamp
2396         FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2397                                             &aModifiedDate, &aModifiedTime );
2398         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2399         bRet = true;
2400     }
2401     return bRet;
2402 }
2403 
2404 bool SvxAutoCorrectLanguageLists::AddToWrdSttExceptList(const OUString& rNew)
2405 {
2406     bool bRet = false;
2407     SvStringsISortDtor* pExceptList = LoadWrdSttExceptList();
2408     if( !rNew.isEmpty() && pExceptList && pExceptList->insert( rNew ).second )
2409     {
2410         MakeUserStorage_Impl();
2411         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2412 
2413         SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2414 
2415         xStg = nullptr;
2416         // Set time stamp
2417         FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2418                                             &aModifiedDate, &aModifiedTime );
2419         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2420         bRet = true;
2421     }
2422     return bRet;
2423 }
2424 
2425 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList()
2426 {
2427     try
2428     {
2429         tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2430         if( xStg.is() && xStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
2431             LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2432     }
2433     catch (const css::ucb::ContentCreationException&)
2434     {
2435     }
2436     return pCplStt_ExcptLst.get();
2437 }
2438 
2439 void SvxAutoCorrectLanguageLists::SaveCplSttExceptList()
2440 {
2441     MakeUserStorage_Impl();
2442     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2443 
2444     SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2445 
2446     xStg = nullptr;
2447 
2448     // Set time stamp
2449     FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2450                                             &aModifiedDate, &aModifiedTime );
2451     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2452 }
2453 
2454 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWrdSttExceptList()
2455 {
2456     try
2457     {
2458         tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2459         if( xStg.is() && xStg->IsContained( pXMLImplWrdStt_ExcptLstStr ) )
2460             LoadXMLExceptList_Imp( pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2461     }
2462     catch (const css::ucb::ContentCreationException &)
2463     {
2464         TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWrdSttExceptList");
2465     }
2466     return pWrdStt_ExcptLst.get();
2467 }
2468 
2469 void SvxAutoCorrectLanguageLists::SaveWrdSttExceptList()
2470 {
2471     MakeUserStorage_Impl();
2472     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2473 
2474     SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2475 
2476     xStg = nullptr;
2477     // Set time stamp
2478     FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2479                                             &aModifiedDate, &aModifiedTime );
2480     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2481 }
2482 
2483 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWrdSttExceptList()
2484 {
2485     if( !( ACFlags::WrdSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2486     {
2487         LoadWrdSttExceptList();
2488         if( !pWrdStt_ExcptLst )
2489         {
2490             OSL_ENSURE( false, "No valid list" );
2491             pWrdStt_ExcptLst.reset( new SvStringsISortDtor );
2492         }
2493         nFlags |= ACFlags::WrdSttLstLoad;
2494     }
2495     return pWrdStt_ExcptLst.get();
2496 }
2497 
2498 void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName )
2499 {
2500     if( sShareAutoCorrFile != sUserAutoCorrFile )
2501     {
2502         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2503         if( xStg.is() && ERRCODE_NONE == xStg->GetError() &&
2504             xStg->IsStream( rName ) )
2505         {
2506             xStg->Remove( rName );
2507             xStg->Commit();
2508 
2509             xStg = nullptr;
2510         }
2511     }
2512 }
2513 
2514 void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl()
2515 {
2516     // The conversion needs to happen if the file is already in the user
2517     // directory and is in the old format. Additionally it needs to
2518     // happen when the file is being copied from share to user.
2519 
2520     bool bError = false, bConvert = false, bCopy = false;
2521     INetURLObject aDest;
2522     INetURLObject aSource;
2523 
2524     if (sUserAutoCorrFile != sShareAutoCorrFile )
2525     {
2526         aSource = INetURLObject ( sShareAutoCorrFile );
2527         aDest = INetURLObject ( sUserAutoCorrFile );
2528         if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) )
2529         {
2530             aDest.SetExtension ( "bak" );
2531             bConvert = true;
2532         }
2533         bCopy = true;
2534     }
2535     else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) )
2536     {
2537         aSource = INetURLObject ( sUserAutoCorrFile );
2538         aDest = INetURLObject ( sUserAutoCorrFile );
2539         aDest.SetExtension ( "bak" );
2540         bCopy = bConvert = true;
2541     }
2542     if (bCopy)
2543     {
2544         try
2545         {
2546             OUString sMain(aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ));
2547             sal_Int32 nSlashPos = sMain.lastIndexOf('/');
2548             sMain = sMain.copy(0, nSlashPos);
2549             ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2550             TransferInfo aInfo;
2551             aInfo.NameClash = NameClash::OVERWRITE;
2552             aInfo.NewTitle = aDest.GetLastName();
2553             aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DecodeMechanism::ToIUri );
2554             aInfo.MoveData  = false;
2555             aNewContent.executeCommand( "transfer", Any(aInfo));
2556         }
2557         catch (...)
2558         {
2559             bError = true;
2560         }
2561     }
2562     if (bConvert && !bError)
2563     {
2564         tools::SvRef<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), StreamMode::READ );
2565         tools::SvRef<SotStorage> xDstStg = new SotStorage( sUserAutoCorrFile, StreamMode::WRITE );
2566 
2567         if( xSrcStg.is() && xDstStg.is() )
2568         {
2569             std::unique_ptr<SvStringsISortDtor> pTmpWordList;
2570 
2571             if (xSrcStg->IsContained( pXMLImplWrdStt_ExcptLstStr ) )
2572                 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xSrcStg );
2573 
2574             if (pTmpWordList)
2575             {
2576                 SaveExceptList_Imp( *pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xDstStg, true );
2577                 pTmpWordList.reset();
2578             }
2579 
2580 
2581             if (xSrcStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
2582                 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg );
2583 
2584             if (pTmpWordList)
2585             {
2586                 SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true );
2587                 pTmpWordList->clear();
2588             }
2589 
2590             GetAutocorrWordList();
2591             MakeBlocklist_Imp( *xDstStg );
2592             sShareAutoCorrFile = sUserAutoCorrFile;
2593             xDstStg = nullptr;
2594             try
2595             {
2596                 ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2597                 aContent.executeCommand ( "delete", makeAny ( true ) );
2598             }
2599             catch (...)
2600             {
2601             }
2602         }
2603     }
2604     else if( bCopy && !bError )
2605         sShareAutoCorrFile = sUserAutoCorrFile;
2606 }
2607 
2608 bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg )
2609 {
2610     bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty();
2611     if( !bRemove )
2612     {
2613         tools::SvRef<SotStorageStream> refList = rStg.OpenSotStream( pXMLImplAutocorr_ListStr,
2614                     ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2615         if( refList.is() )
2616         {
2617             refList->SetSize( 0 );
2618             refList->SetBufferSize( 8192 );
2619             refList->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2620 
2621             uno::Reference< uno::XComponentContext > xContext =
2622                 comphelper::getProcessComponentContext();
2623 
2624             uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
2625             uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList );
2626             xWriter->setOutputStream(xOut);
2627 
2628             rtl::Reference< SvXMLAutoCorrectExport > xExp( new SvXMLAutoCorrectExport( xContext, pAutocorr_List.get(), pXMLImplAutocorr_ListStr, xWriter ) );
2629 
2630             xExp->exportDoc( XML_BLOCK_LIST );
2631 
2632             refList->Commit();
2633             bRet = ERRCODE_NONE == refList->GetError();
2634             if( bRet )
2635             {
2636                 refList.clear();
2637                 rStg.Commit();
2638                 if( ERRCODE_NONE != rStg.GetError() )
2639                 {
2640                     bRemove = true;
2641                     bRet = false;
2642                 }
2643             }
2644         }
2645         else
2646             bRet = false;
2647     }
2648 
2649     if( bRemove )
2650     {
2651         rStg.Remove( pXMLImplAutocorr_ListStr );
2652         rStg.Commit();
2653     }
2654 
2655     return bRet;
2656 }
2657 
2658 bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries )
2659 {
2660     // First get the current list!
2661     GetAutocorrWordList();
2662 
2663     MakeUserStorage_Impl();
2664     tools::SvRef<SotStorage> xStorage = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2665 
2666     bool bRet = xStorage.is() && ERRCODE_NONE == xStorage->GetError();
2667 
2668     if( bRet )
2669     {
2670         for (SvxAutocorrWord & aWordToDelete : aDeleteEntries)
2671         {
2672             std::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete );
2673             if( xFoundEntry )
2674             {
2675                 if( !xFoundEntry->IsTextOnly() )
2676                 {
2677                     OUString aName( aWordToDelete.GetShort() );
2678                     if (xStorage->IsOLEStorage())
2679                         aName = EncryptBlockName_Imp(aName);
2680                     else
2681                         GeneratePackageName ( aWordToDelete.GetShort(), aName );
2682 
2683                     if( xStorage->IsContained( aName ) )
2684                     {
2685                         xStorage->Remove( aName );
2686                         bRet = xStorage->Commit();
2687                     }
2688                 }
2689             }
2690         }
2691 
2692         for (const SvxAutocorrWord & aNewEntrie : aNewEntries)
2693         {
2694             SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true );
2695             std::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd );
2696             if( xRemoved )
2697             {
2698                 if( !xRemoved->IsTextOnly() )
2699                 {
2700                     // Still have to remove the Storage
2701                     OUString sStorageName( aWordToAdd.GetShort() );
2702                     if (xStorage->IsOLEStorage())
2703                         sStorageName = EncryptBlockName_Imp(sStorageName);
2704                     else
2705                         GeneratePackageName ( aWordToAdd.GetShort(), sStorageName);
2706 
2707                     if( xStorage->IsContained( sStorageName ) )
2708                         xStorage->Remove( sStorageName );
2709                 }
2710             }
2711             bRet = pAutocorr_List->Insert( std::move(aWordToAdd) );
2712 
2713             if ( !bRet )
2714             {
2715                 break;
2716             }
2717         }
2718 
2719         if ( bRet )
2720         {
2721             bRet = MakeBlocklist_Imp( *xStorage );
2722         }
2723     }
2724     return bRet;
2725 }
2726 
2727 bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong )
2728 {
2729     // First get the current list!
2730     GetAutocorrWordList();
2731 
2732     MakeUserStorage_Impl();
2733     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2734 
2735     bool bRet = xStg.is() && ERRCODE_NONE == xStg->GetError();
2736 
2737     // Update the word list
2738     if( bRet )
2739     {
2740         SvxAutocorrWord aNew(rShort, rLong, true );
2741         std::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew );
2742         if( xRemove )
2743         {
2744             if( !xRemove->IsTextOnly() )
2745             {
2746                 // Still have to remove the Storage
2747                 OUString sStgNm( rShort );
2748                 if (xStg->IsOLEStorage())
2749                     sStgNm = EncryptBlockName_Imp(sStgNm);
2750                 else
2751                     GeneratePackageName ( rShort, sStgNm);
2752 
2753                 if( xStg->IsContained( sStgNm ) )
2754                     xStg->Remove( sStgNm );
2755             }
2756         }
2757 
2758         if( pAutocorr_List->Insert( std::move(aNew) ) )
2759         {
2760             bRet = MakeBlocklist_Imp( *xStg );
2761             xStg = nullptr;
2762         }
2763         else
2764         {
2765             bRet = false;
2766         }
2767     }
2768     return bRet;
2769 }
2770 
2771 void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort,
2772                                                SfxObjectShell& rShell )
2773 {
2774     // First get the current list!
2775     GetAutocorrWordList();
2776 
2777     MakeUserStorage_Impl();
2778 
2779     try
2780     {
2781         uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE );
2782         OUString sLong;
2783         bool bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong );
2784         xStg = nullptr;
2785 
2786         // Update the word list
2787         if( bRet )
2788         {
2789             if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) )
2790             {
2791                 tools::SvRef<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2792                 MakeBlocklist_Imp( *xStor );
2793             }
2794         }
2795     }
2796     catch ( const uno::Exception& )
2797     {
2798     }
2799 }
2800 
2801 // Keep the list sorted ...
2802 struct SvxAutocorrWordList::CompareSvxAutocorrWordList
2803 {
2804     bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const
2805     {
2806         CollatorWrapper& rCmp = ::GetCollatorWrapper();
2807         return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0;
2808     }
2809 };
2810 
2811 namespace {
2812 
2813 typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType;
2814 
2815 }
2816 
2817 struct SvxAutocorrWordList::Impl
2818 {
2819 
2820     // only one of these contains the data
2821     // maSortedVector is manually sorted so we can optimise data movement
2822     mutable AutocorrWordSetType maSortedVector;
2823     mutable AutocorrWordHashType maHash; // key is 'Short'
2824 
2825     void DeleteAndDestroyAll()
2826     {
2827         maHash.clear();
2828         maSortedVector.clear();
2829     }
2830 };
2831 
2832 SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {}
2833 
2834 SvxAutocorrWordList::~SvxAutocorrWordList()
2835 {
2836 }
2837 
2838 void SvxAutocorrWordList::DeleteAndDestroyAll()
2839 {
2840     mpImpl->DeleteAndDestroyAll();
2841 }
2842 
2843 // returns true if inserted
2844 const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const
2845 {
2846     if ( mpImpl->maSortedVector.empty() ) // use the hash
2847     {
2848         OUString aShort = aWord.GetShort();
2849         auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) );
2850         if (inserted)
2851             return &(it->second);
2852         return nullptr;
2853     }
2854     else
2855     {
2856         auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList());
2857         CollatorWrapper& rCmp = ::GetCollatorWrapper();
2858         if (it == mpImpl->maSortedVector.end() || rCmp.compareString( aWord.GetShort(), it->GetShort() ) != 0)
2859         {
2860             it = mpImpl->maSortedVector.insert(it, std::move(aWord));
2861             return &*it;
2862         }
2863         return nullptr;
2864     }
2865 }
2866 
2867 void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt)
2868 {
2869     (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt ));
2870 }
2871 
2872 bool SvxAutocorrWordList::empty() const
2873 {
2874     return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty();
2875 }
2876 
2877 std::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord *pWord)
2878 {
2879 
2880     if ( mpImpl->maSortedVector.empty() ) // use the hash
2881     {
2882         AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() );
2883         if( it != mpImpl->maHash.end() )
2884         {
2885             SvxAutocorrWord pMatch = std::move(it->second);
2886             mpImpl->maHash.erase (it);
2887             return pMatch;
2888         }
2889     }
2890     else
2891     {
2892         auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList());
2893         if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it))
2894         {
2895             SvxAutocorrWord pMatch = std::move(*it);
2896             mpImpl->maSortedVector.erase (it);
2897             return pMatch;
2898         }
2899     }
2900     return std::optional<SvxAutocorrWord>();
2901 }
2902 
2903 // return the sorted contents - defer sorting until we have to.
2904 const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const
2905 {
2906     // convert from hash to set permanently
2907     if ( mpImpl->maSortedVector.empty() )
2908     {
2909         std::vector<SvxAutocorrWord> tmp;
2910         tmp.reserve(mpImpl->maHash.size());
2911         for (auto & rPair : mpImpl->maHash)
2912             tmp.emplace_back(std::move(rPair.second));
2913         mpImpl->maHash.clear();
2914         // sort twice - this gets the list into mostly-sorted order, which
2915         // reduces the number of times we need to invoke the expensive ICU collate fn.
2916         std::sort(tmp.begin(), tmp.end(),
2917             [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs )
2918             {
2919                 return lhs.GetShort() < rhs.GetShort();
2920             });
2921         // This beast has some O(N log(N)) in a terribly slow ICU collate fn.
2922         // stable_sort is twice as fast as sort in this situation because it does
2923         // fewer comparison operations.
2924         std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList());
2925         mpImpl->maSortedVector = std::move(tmp);
2926     }
2927     return mpImpl->maSortedVector;
2928 }
2929 
2930 const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd,
2931                                       const OUString &rTxt,
2932                                       sal_Int32 &rStt,
2933                                       sal_Int32 nEndPos) const
2934 {
2935     const OUString& rChk = pFnd->GetShort();
2936 
2937     sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern?
2938     sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern?
2939     sal_Int32 nSttWdPos = nEndPos;
2940 
2941     // direct replacement of keywords surrounded by colons (for example, ":name:")
2942     bool bColonNameColon = rTxt.getLength() > nEndPos &&
2943         rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":");
2944     if ( nEndPos + (bColonNameColon ? 1 : 0) >= rChk.getLength() - left_wildcard - right_wildcard )
2945     {
2946 
2947         bool bWasWordDelim = false;
2948         sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard;
2949         if (bColonNameColon)
2950             nCalcStt++;
2951         if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon ||
2952               ( nCalcStt < rStt &&
2953                 IsWordDelim( rTxt[ nCalcStt - 1 ] ))) )
2954         {
2955             TransliterationWrapper& rCmp = GetIgnoreTranslWrapper();
2956             OUString sWord = rTxt.copy(nCalcStt, rChk.getLength() - left_wildcard);
2957             if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) ))
2958             {
2959                 rStt = nCalcStt;
2960                 if (!left_wildcard)
2961                 {
2962                     // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14
2963                     if (rTxt.getLength() > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1)
2964                         return nullptr;
2965                     return pFnd;
2966                 }
2967                 // get the first word delimiter position before the matching ".*word" pattern
2968                 while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ])))
2969                     ;
2970                 if (bWasWordDelim) rStt++;
2971                 OUString left_pattern = rTxt.copy(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard);
2972                 // avoid double spaces before simple "word" replacement
2973                 left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().copy(1) : pFnd->GetLong();
2974                 if( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(rTxt.copy(rStt, nEndPos - rStt), left_pattern) ) )
2975                     return pNew;
2976             }
2977         } else
2978         // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support
2979         if ( right_wildcard )
2980         {
2981 
2982             OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) );
2983             // Get the last word delimiter position
2984             bool not_suffix;
2985 
2986             while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ])))
2987                 ;
2988             // search the first occurrence (with a left word delimitation, if needed)
2989             sal_Int32 nFndPos = -1;
2990             do {
2991                 nFndPos = rTxt.indexOf( sTmp, nFndPos + 1);
2992                 if (nFndPos == -1)
2993                     break;
2994                 not_suffix = bWasWordDelim && (nSttWdPos >= (nFndPos + sTmp.getLength()));
2995             } while ( (!left_wildcard && nFndPos && !IsWordDelim( rTxt[ nFndPos - 1 ])) || not_suffix );
2996 
2997             if ( nFndPos != -1 )
2998             {
2999                 sal_Int32 extra_repl = nFndPos + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:"
3000 
3001                 if ( left_wildcard )
3002                 {
3003                     // get the first word delimiter position before the matching ".*word.*" pattern
3004                     while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ])))
3005                         ;
3006                     if (bWasWordDelim) nFndPos++;
3007                 }
3008                 if (nEndPos + extra_repl <= nFndPos)
3009                 {
3010                     return nullptr;
3011                 }
3012                 // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations"
3013                 OUString aShort = rTxt.copy(nFndPos, nEndPos - nFndPos + extra_repl);
3014 
3015                 OUString aLong;
3016                 rStt = nFndPos;
3017                 if ( !left_wildcard )
3018                 {
3019                     sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength();
3020                     aLong = pFnd->GetLong() + (siz > 0 ? rTxt.copy(nFndPos + sTmp.getLength(), siz) : "");
3021                 } else {
3022                     OUStringBuffer buf;
3023                     do {
3024                         nSttWdPos = rTxt.indexOf( sTmp, nFndPos);
3025                         if (nSttWdPos != -1)
3026                         {
3027                             sal_Int32 nTmp(nFndPos);
3028                             while (nTmp < nSttWdPos && !IsWordDelim(rTxt[nTmp]))
3029                                 nTmp++;
3030                             if (nTmp < nSttWdPos)
3031                                 break; // word delimiter found
3032                             buf.append(rTxt.subView(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong());
3033                             nFndPos = nSttWdPos + sTmp.getLength();
3034                         }
3035                     } while (nSttWdPos != -1);
3036                     if (nEndPos - nFndPos > extra_repl)
3037                         buf.append(rTxt.subView(nFndPos, nEndPos - nFndPos));
3038                     aLong = buf.makeStringAndClear();
3039                 }
3040                 if ( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(aShort, aLong) ) )
3041                 {
3042                     if ( (rTxt.getLength() > nEndPos && IsWordDelim(rTxt[nEndPos])) || rTxt.getLength() == nEndPos )
3043                         return pNew;
3044                 }
3045             }
3046         }
3047     }
3048     return nullptr;
3049 }
3050 
3051 const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(const OUString& rTxt, sal_Int32& rStt,
3052                                                               sal_Int32 nEndPos) const
3053 {
3054     for (auto const& elem : mpImpl->maHash)
3055     {
3056         if( const SvxAutocorrWord *pTmp = WordMatches( &elem.second, rTxt, rStt, nEndPos ) )
3057             return pTmp;
3058     }
3059 
3060     for (auto const& elem : mpImpl->maSortedVector)
3061     {
3062         if( const SvxAutocorrWord *pTmp = WordMatches( &elem, rTxt, rStt, nEndPos ) )
3063             return pTmp;
3064     }
3065     return nullptr;
3066 }
3067 
3068 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
3069