xref: /core/editeng/source/misc/svxacorr.cxx (revision bfc1600c)
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <memory>
21 #include <string_view>
22 #include <sal/config.h>
23 
24 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
25 #include <com/sun/star/embed/XStorage.hpp>
26 #include <com/sun/star/io/IOException.hpp>
27 #include <com/sun/star/io/XStream.hpp>
28 #include <tools/urlobj.hxx>
29 #include <i18nlangtag/mslangid.hxx>
30 #include <i18nutil/transliteration.hxx>
31 #include <sal/log.hxx>
32 #include <osl/diagnose.h>
33 #include <vcl/svapp.hxx>
34 #include <vcl/settings.hxx>
35 #include <svl/fstathelper.hxx>
36 #include <svl/urihelper.hxx>
37 #include <unotools/charclass.hxx>
38 #include <com/sun/star/i18n/UnicodeType.hpp>
39 #include <unotools/collatorwrapper.hxx>
40 #include <com/sun/star/i18n/UnicodeScript.hpp>
41 #include <com/sun/star/i18n/OrdinalSuffix.hpp>
42 #include <unotools/localedatawrapper.hxx>
43 #include <unotools/transliterationwrapper.hxx>
44 #include <comphelper/processfactory.hxx>
45 #include <comphelper/storagehelper.hxx>
46 #include <comphelper/string.hxx>
47 #include <editeng/editids.hrc>
48 #include <sot/storage.hxx>
49 #include <editeng/udlnitem.hxx>
50 #include <editeng/wghtitem.hxx>
51 #include <editeng/postitem.hxx>
52 #include <editeng/crossedoutitem.hxx>
53 #include <editeng/escapementitem.hxx>
54 #include <editeng/svxacorr.hxx>
55 #include <editeng/unolingu.hxx>
56 #include <vcl/window.hxx>
57 #include <com/sun/star/xml/sax/InputSource.hpp>
58 #include <com/sun/star/xml/sax/FastParser.hpp>
59 #include <com/sun/star/xml/sax/Writer.hpp>
60 #include <com/sun/star/xml/sax/SAXParseException.hpp>
61 #include <unotools/streamwrap.hxx>
62 #include "SvXMLAutoCorrectImport.hxx"
63 #include "SvXMLAutoCorrectExport.hxx"
64 #include "SvXMLAutoCorrectTokenHandler.hxx"
65 #include <ucbhelper/content.hxx>
66 #include <com/sun/star/ucb/ContentCreationException.hpp>
67 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
68 #include <com/sun/star/ucb/TransferInfo.hpp>
69 #include <com/sun/star/ucb/NameClash.hpp>
70 #include <tools/diagnose_ex.h>
71 #include <xmloff/xmltoken.hxx>
72 #include <unordered_map>
73 #include <rtl/character.hxx>
74 
75 using namespace ::com::sun::star::ucb;
76 using namespace ::com::sun::star::uno;
77 using namespace ::com::sun::star::xml::sax;
78 using namespace ::com::sun::star;
79 using namespace ::xmloff::token;
80 using namespace ::utl;
81 
82 namespace {
83 
84 enum class Flags {
85     NONE            = 0x00,
86     FullStop        = 0x01,
87     ExclamationMark = 0x02,
88     QuestionMark    = 0x04,
89 };
90 
91 }
92 
93 namespace o3tl {
94     template<> struct typed_flags<Flags> : is_typed_flags<Flags, 0x07> {};
95 }
96 const sal_Unicode cNonBreakingSpace = 0xA0; // UNICODE code for no break space
97 
98 constexpr OUStringLiteral pXMLImplWrdStt_ExcptLstStr = u"WordExceptList.xml";
99 constexpr OUStringLiteral pXMLImplCplStt_ExcptLstStr = u"SentenceExceptList.xml";
100 constexpr OUStringLiteral pXMLImplAutocorr_ListStr = u"DocumentList.xml";
101 
102 const char
103     /* also at these beginnings - Brackets and all kinds of begin characters */
104     sImplSttSkipChars[] = "\"\'([{\x83\x84\x89\x91\x92\x93\x94",
105     /* also at these ends - Brackets and all kinds of begin characters */
106     sImplEndSkipChars[] = "\"\')]}\x83\x84\x89\x91\x92\x93\x94";
107 
108 static OUString EncryptBlockName_Imp(const OUString& rName);
109 
110 static bool NonFieldWordDelim( const sal_Unicode c )
111 {
112     return ' ' == c || '\t' == c || 0x0a == c ||
113             cNonBreakingSpace == c || 0x2011 == c;
114 }
115 
116 static bool IsWordDelim( const sal_Unicode c )
117 {
118     return c == 0x1 || NonFieldWordDelim(c);
119 }
120 
121 
122 static bool IsLowerLetter( sal_Int32 nCharType )
123 {
124     return CharClass::isLetterType( nCharType ) &&
125            ( css::i18n::KCharacterType::LOWER & nCharType);
126 }
127 
128 static bool IsUpperLetter( sal_Int32 nCharType )
129 {
130     return CharClass::isLetterType( nCharType ) &&
131             ( css::i18n::KCharacterType::UPPER & nCharType);
132 }
133 
134 static bool lcl_IsUnsupportedUnicodeChar( CharClass const & rCC, const OUString& rTxt,
135                                    sal_Int32 nStt, sal_Int32 nEnd )
136 {
137     for( ; nStt < nEnd; ++nStt )
138     {
139         css::i18n::UnicodeScript nScript = rCC.getScript( rTxt, nStt );
140         switch( nScript )
141         {
142             case css::i18n::UnicodeScript_kCJKRadicalsSupplement:
143             case css::i18n::UnicodeScript_kHangulJamo:
144             case css::i18n::UnicodeScript_kCJKSymbolPunctuation:
145             case css::i18n::UnicodeScript_kHiragana:
146             case css::i18n::UnicodeScript_kKatakana:
147             case css::i18n::UnicodeScript_kHangulCompatibilityJamo:
148             case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth:
149             case css::i18n::UnicodeScript_kCJKCompatibility:
150             case css::i18n::UnicodeScript_k_CJKUnifiedIdeographsExtensionA:
151             case css::i18n::UnicodeScript_kCJKUnifiedIdeograph:
152             case css::i18n::UnicodeScript_kHangulSyllable:
153             case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph:
154             case css::i18n::UnicodeScript_kHalfwidthFullwidthForm:
155                 return true;
156             default: ; //do nothing
157         }
158     }
159     return false;
160 }
161 
162 static bool lcl_IsSymbolChar( CharClass const & rCC, const OUString& rTxt,
163                                   sal_Int32 nStt, sal_Int32 nEnd )
164 {
165     for( ; nStt < nEnd; ++nStt )
166     {
167         if( css::i18n::UnicodeType::PRIVATE_USE == rCC.getType( rTxt, nStt ))
168             return true;
169     }
170     return false;
171 }
172 
173 static bool lcl_IsInAsciiArr( const char* pArr, const sal_Unicode c )
174 {
175     // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks
176     if ( 0x2018 <= c && c <= 0x201F && (pArr == sImplSttSkipChars || pArr == sImplEndSkipChars) )
177         return true;
178 
179     bool bRet = false;
180     for( ; *pArr; ++pArr )
181         if( *pArr == c )
182         {
183             bRet = true;
184             break;
185         }
186     return bRet;
187 }
188 
189 SvxAutoCorrDoc::~SvxAutoCorrDoc()
190 {
191 }
192 
193 // Called by the functions:
194 //  - FnCapitalStartWord
195 //  - FnCapitalStartSentence
196 // after the exchange of characters. Then the words, if necessary, can be inserted
197 // into the exception list.
198 void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&,
199                                         sal_Unicode )
200 {
201 }
202 
203 LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32 ) const
204 {
205     return LANGUAGE_SYSTEM;
206 }
207 
208 static const LanguageTag& GetAppLang()
209 {
210     return Application::GetSettings().GetLanguageTag();
211 }
212 
213 /// Never use an unresolved LANGUAGE_SYSTEM.
214 static LanguageType GetDocLanguage( const SvxAutoCorrDoc& rDoc, sal_Int32 nPos )
215 {
216     LanguageType eLang = rDoc.GetLanguage( nPos );
217     if (eLang == LANGUAGE_SYSTEM)
218         eLang = GetAppLang().getLanguageType();     // the current work locale
219     return eLang;
220 }
221 
222 static LocaleDataWrapper& GetLocaleDataWrapper( LanguageType nLang )
223 {
224     static std::unique_ptr<LocaleDataWrapper> xLclDtWrp;
225     LanguageTag aLcl( nLang );
226     if (!xLclDtWrp || xLclDtWrp->getLoadedLanguageTag() != aLcl)
227         xLclDtWrp.reset(new LocaleDataWrapper(aLcl));
228     return *xLclDtWrp;
229 }
230 static TransliterationWrapper& GetIgnoreTranslWrapper()
231 {
232     static int bIsInit = 0;
233     static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(),
234                 TransliterationFlags::IGNORE_KANA |
235                 TransliterationFlags::IGNORE_WIDTH );
236     if( !bIsInit )
237     {
238         aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() );
239         bIsInit = 1;
240     }
241     return aWrp;
242 }
243 static CollatorWrapper& GetCollatorWrapper()
244 {
245     static CollatorWrapper aCollWrp = [&]()
246     {
247         CollatorWrapper tmp( ::comphelper::getProcessComponentContext() );
248         tmp.loadDefaultCollator( GetAppLang().getLocale(), 0 );
249         return tmp;
250     }();
251     return aCollWrp;
252 }
253 
254 bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar )
255 {
256     return  cChar == '\0' || cChar == '\t' || cChar == 0x0a ||
257             cChar == ' '  || cChar == '\'' || cChar == '\"' ||
258             cChar == '*'  || cChar == '_'  || cChar == '%' ||
259             cChar == '.'  || cChar == ','  || cChar == ';' ||
260             cChar == ':'  || cChar == '?' || cChar == '!' ||
261             cChar == '<'  || cChar == '>' ||
262             cChar == '/'  || cChar == '-';
263 }
264 
265 namespace
266 {
267     bool IsCompoundWordDelimChar(sal_Unicode cChar)
268     {
269         return  cChar == '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar);
270     }
271 }
272 
273 bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar )
274 {
275     return cChar == '%' || cChar == ';' || cChar == ':'  || cChar == '?' || cChar == '!' ||
276         cChar == '/' /*case for the urls exception*/;
277 }
278 
279 ACFlags SvxAutoCorrect::GetDefaultFlags()
280 {
281     ACFlags nRet = ACFlags::Autocorrect
282                     | ACFlags::CapitalStartSentence
283                     | ACFlags::CapitalStartWord
284                     | ACFlags::ChgOrdinalNumber
285                     | ACFlags::ChgToEnEmDash
286                     | ACFlags::AddNonBrkSpace
287                     | ACFlags::TransliterateRTL
288                     | ACFlags::ChgAngleQuotes
289                     | ACFlags::ChgWeightUnderl
290                     | ACFlags::SetINetAttr
291                     | ACFlags::ChgQuotes
292                     | ACFlags::SaveWordCplSttLst
293                     | ACFlags::SaveWordWrdSttLst
294                     | ACFlags::CorrectCapsLock;
295     LanguageType eLang = GetAppLang().getLanguageType();
296     if( eLang.anyOf(
297         LANGUAGE_ENGLISH,
298         LANGUAGE_ENGLISH_US,
299         LANGUAGE_ENGLISH_UK,
300         LANGUAGE_ENGLISH_AUS,
301         LANGUAGE_ENGLISH_CAN,
302         LANGUAGE_ENGLISH_NZ,
303         LANGUAGE_ENGLISH_EIRE,
304         LANGUAGE_ENGLISH_SAFRICA,
305         LANGUAGE_ENGLISH_JAMAICA,
306         LANGUAGE_ENGLISH_CARIBBEAN))
307         nRet &= ~ACFlags(ACFlags::ChgQuotes|ACFlags::ChgSglQuotes);
308     return nRet;
309 }
310 
311 constexpr sal_Unicode cEmDash = 0x2014;
312 constexpr sal_Unicode cEnDash = 0x2013;
313 constexpr sal_Unicode cApostrophe = 0x2019;
314 constexpr sal_Unicode cLeftDoubleAngleQuote = 0xAB;
315 constexpr sal_Unicode cRightDoubleAngleQuote = 0xBB;
316 constexpr sal_Unicode cLeftSingleAngleQuote = 0x2039;
317 constexpr sal_Unicode cRightSingleAngleQuote = 0x203A;
318 // stop characters for searching preceding quotes
319 // (the first character is also the opening quote we are looking for)
320 const sal_Unicode aStopDoubleAngleQuoteStart[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,,
321 const sal_Unicode aStopDoubleAngleQuoteEnd[] = { cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0x201D, 0x201E, 0 }; // preceding >>
322 // preceding << for Romanian, handle also alternative primary closing quotation mark U+201C
323 const sal_Unicode aStopDoubleAngleQuoteEndRo[] = { cLeftDoubleAngleQuote, cRightDoubleAngleQuote, 0x201D, 0x201E, 0x201C, 0 };
324 const sal_Unicode aStopSingleQuoteEnd[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 };
325 const sal_Unicode aStopSingleQuoteEndRuUa[] = { 0x201E, 0x201C, cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0 };
326 
327 SvxAutoCorrect::SvxAutoCorrect( const OUString& rShareAutocorrFile,
328                                 const OUString& rUserAutocorrFile )
329     : sShareAutoCorrFile( rShareAutocorrFile )
330     , sUserAutoCorrFile( rUserAutocorrFile )
331     , eCharClassLang( LANGUAGE_DONTKNOW )
332     , nFlags(SvxAutoCorrect::GetDefaultFlags())
333     , cStartDQuote( 0 )
334     , cEndDQuote( 0 )
335     , cStartSQuote( 0 )
336     , cEndSQuote( 0 )
337 {
338 }
339 
340 SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy )
341     : sShareAutoCorrFile( rCpy.sShareAutoCorrFile )
342     , sUserAutoCorrFile( rCpy.sUserAutoCorrFile )
343     , aSwFlags( rCpy.aSwFlags )
344     , eCharClassLang(rCpy.eCharClassLang)
345     , nFlags( rCpy.nFlags & ~ACFlags(ACFlags::ChgWordLstLoad|ACFlags::CplSttLstLoad|ACFlags::WrdSttLstLoad))
346     , cStartDQuote( rCpy.cStartDQuote )
347     , cEndDQuote( rCpy.cEndDQuote )
348     , cStartSQuote( rCpy.cStartSQuote )
349     , cEndSQuote( rCpy.cEndSQuote )
350 {
351 }
352 
353 
354 SvxAutoCorrect::~SvxAutoCorrect()
355 {
356 }
357 
358 void SvxAutoCorrect::GetCharClass_( LanguageType eLang )
359 {
360     pCharClass.reset( new CharClass( LanguageTag( eLang)) );
361     eCharClassLang = eLang;
362 }
363 
364 void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag, bool bOn )
365 {
366     ACFlags nOld = nFlags;
367     nFlags = bOn ? nFlags | nFlag
368                  : nFlags & ~nFlag;
369 
370     if( !bOn )
371     {
372         if( (nOld & ACFlags::CapitalStartSentence) != (nFlags & ACFlags::CapitalStartSentence) )
373             nFlags &= ~ACFlags::CplSttLstLoad;
374         if( (nOld & ACFlags::CapitalStartWord) != (nFlags & ACFlags::CapitalStartWord) )
375             nFlags &= ~ACFlags::WrdSttLstLoad;
376         if( (nOld & ACFlags::Autocorrect) != (nFlags & ACFlags::Autocorrect) )
377             nFlags &= ~ACFlags::ChgWordLstLoad;
378     }
379 }
380 
381 
382 // Correct TWo INitial CApitals
383 void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
384                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
385                                     LanguageType eLang )
386 {
387     CharClass& rCC = GetCharClass( eLang );
388 
389     // Delete all non alphanumeric. Test the characters at the beginning/end of
390     // the word ( recognizes: "(min.", "/min.", and so on.)
391     for( ; nSttPos < nEndPos; ++nSttPos )
392         if( rCC.isLetterNumeric( rTxt, nSttPos ))
393             break;
394     for( ; nSttPos < nEndPos; --nEndPos )
395         if( rCC.isLetterNumeric( rTxt, nEndPos - 1 ))
396             break;
397 
398     // Is the word a compounded word separated by delimiters?
399     // If so, keep track of all delimiters so each constituent
400     // word can be checked for two initial capital letters.
401     std::deque<sal_Int32> aDelimiters;
402 
403     // Always check for two capitals at the beginning
404     // of the entire word, so start at nSttPos.
405     aDelimiters.push_back(nSttPos);
406 
407     // Find all compound word delimiters
408     for (sal_Int32 n = nSttPos; n < nEndPos; ++n)
409     {
410         if (IsCompoundWordDelimChar(rTxt[ n ]))
411         {
412             aDelimiters.push_back( n + 1 ); // Get position of char after delimiter
413         }
414     }
415 
416     // Decide where to put the terminating delimiter.
417     // If the last AutoCorrect char was a newline, then the AutoCorrect
418     // char will not be included in rTxt.
419     // If the last AutoCorrect char was not a newline, then the AutoCorrect
420     // character will be the last character in rTxt.
421     if (!IsCompoundWordDelimChar(rTxt[nEndPos-1]))
422         aDelimiters.push_back(nEndPos);
423 
424     // Iterate through the word and all words that compose it.
425     // Two capital letters at the beginning of word?
426     for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI)
427     {
428         nSttPos = aDelimiters[nI];
429         nEndPos = aDelimiters[nI + 1];
430 
431         if( nSttPos+2 < nEndPos &&
432             IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) &&
433             IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) &&
434             // Is the third character a lower case
435             IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) &&
436             // Do not replace special attributes
437             0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ])
438         {
439             // test if the word is in an exception list
440             OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 ));
441             if( !FindInWrdSttExceptList(eLang, sWord) )
442             {
443                 // Check that word isn't correctly spelt before correcting:
444                 css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller =
445                     LinguMgr::GetSpellChecker();
446                 if( xSpeller->hasLanguage(static_cast<sal_uInt16>(eLang)) )
447                 {
448                     Sequence< css::beans::PropertyValue > aEmptySeq;
449                     if (xSpeller->isValid(sWord, static_cast<sal_uInt16>(eLang), aEmptySeq))
450                     {
451                         return;
452                     }
453                 }
454                 sal_Unicode cSave = rTxt[ nSttPos ];
455                 OUString sChar = rCC.lowercase( OUString(cSave) );
456                 if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ))
457                 {
458                     if( ACFlags::SaveWordWrdSttLst & nFlags )
459                         rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave );
460                 }
461             }
462         }
463     }
464 }
465 
466 // Format ordinal numbers suffixes (1st -> 1^st)
467 bool SvxAutoCorrect::FnChgOrdinalNumber(
468     SvxAutoCorrDoc& rDoc, const OUString& rTxt,
469     sal_Int32 nSttPos, sal_Int32 nEndPos,
470     LanguageType eLang)
471 {
472     // 1st, 2nd, 3rd, 4 - 0th
473     // 201th or 201st
474     // 12th or 12nd
475     bool bChg = false;
476 
477     // In some languages ordinal suffixes should never be
478     // changed to superscript. Let's break for those languages.
479     if (!eLang.anyOf(
480          LANGUAGE_SWEDISH,
481          LANGUAGE_SWEDISH_FINLAND))
482     {
483         CharClass& rCC = GetCharClass(eLang);
484 
485         for (; nSttPos < nEndPos; ++nSttPos)
486             if (!lcl_IsInAsciiArr(sImplSttSkipChars, rTxt[nSttPos]))
487                 break;
488         for (; nSttPos < nEndPos; --nEndPos)
489             if (!lcl_IsInAsciiArr(sImplEndSkipChars, rTxt[nEndPos - 1]))
490                 break;
491 
492 
493         // Get the last number in the string to check
494         sal_Int32 nNumEnd = nEndPos;
495         bool bFoundEnd = false;
496         bool isValidNumber = true;
497         sal_Int32 i = nEndPos;
498         while (i > nSttPos)
499         {
500             i--;
501             bool isDigit = rCC.isDigit(rTxt, i);
502             if (bFoundEnd)
503                 isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i));
504 
505             if (isDigit && !bFoundEnd)
506             {
507                 bFoundEnd = true;
508                 nNumEnd = i;
509             }
510         }
511 
512         if (bFoundEnd && isValidNumber) {
513             sal_Int32 nNum = rTxt.copy(nSttPos, nNumEnd - nSttPos + 1).toInt32();
514 
515             // Check if the characters after that number correspond to the ordinal suffix
516             uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix
517                 = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext());
518 
519             const uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale());
520             for (OUString const & sSuffix : aSuffixes)
521             {
522                 OUString sEnd = rTxt.copy(nNumEnd + 1, nEndPos - nNumEnd - 1);
523 
524                 if (sSuffix == sEnd)
525                 {
526                     // Check if the ordinal suffix has to be set as super script
527                     if (rCC.isLetter(sSuffix))
528                     {
529                         // Do the change
530                         SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER,
531                             DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT);
532                         rDoc.SetAttr(nNumEnd + 1, nEndPos,
533                             SID_ATTR_CHAR_ESCAPEMENT,
534                             aSvxEscapementItem);
535                         bChg = true;
536                     }
537                 }
538             }
539         }
540     }
541     return bChg;
542 }
543 
544 // Replace dashes
545 bool SvxAutoCorrect::FnChgToEnEmDash(
546                                 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
547                                 sal_Int32 nSttPos, sal_Int32 nEndPos,
548                                 LanguageType eLang )
549 {
550     bool bRet = false;
551     CharClass& rCC = GetCharClass( eLang );
552     if (eLang == LANGUAGE_SYSTEM)
553         eLang = GetAppLang().getLanguageType();
554     bool bAlwaysUseEmDash = (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN);
555 
556     // replace " - " or " --" with "enDash"
557     if( 1 < nSttPos && 1 <= nEndPos - nSttPos )
558     {
559         sal_Unicode cCh = rTxt[ nSttPos ];
560         if( '-' == cCh )
561         {
562             if( 1 < nEndPos - nSttPos &&
563                 ' ' == rTxt[ nSttPos-1 ] &&
564                 '-' == rTxt[ nSttPos+1 ])
565             {
566                 sal_Int32 n;
567                 for( n = nSttPos+2; n < nEndPos && lcl_IsInAsciiArr(
568                             sImplSttSkipChars,(cCh = rTxt[ n ]));
569                         ++n )
570                     ;
571 
572                 // found: " --[<AnySttChars>][A-z0-9]
573                 if( rCC.isLetterNumeric( OUString(cCh) ) )
574                 {
575                     for( n = nSttPos-1; n && lcl_IsInAsciiArr(
576                             sImplEndSkipChars,(cCh = rTxt[ --n ])); )
577                         ;
578 
579                     // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9]
580                     if( rCC.isLetterNumeric( OUString(cCh) ))
581                     {
582                         rDoc.Delete( nSttPos, nSttPos + 2 );
583                         rDoc.Insert( nSttPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) );
584                         bRet = true;
585                     }
586                 }
587             }
588         }
589         else if( 3 < nSttPos &&
590                  ' ' == rTxt[ nSttPos-1 ] &&
591                  '-' == rTxt[ nSttPos-2 ])
592         {
593             sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2;
594             if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) )
595             {
596                 --nTmpPos;
597                 ++nLen;
598                 cCh = rTxt[ nTmpPos-1 ];
599             }
600             if( ' ' == cCh )
601             {
602                 for( n = nSttPos; n < nEndPos && lcl_IsInAsciiArr(
603                             sImplSttSkipChars,(cCh = rTxt[ n ]));
604                         ++n )
605                     ;
606 
607                 // found: " - [<AnySttChars>][A-z0-9]
608                 if( rCC.isLetterNumeric( OUString(cCh) ) )
609                 {
610                     cCh = ' ';
611                     for( n = nTmpPos-1; n && lcl_IsInAsciiArr(
612                             sImplEndSkipChars,(cCh = rTxt[ --n ])); )
613                             ;
614                     // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9]
615                     if( rCC.isLetterNumeric( OUString(cCh) ))
616                     {
617                         rDoc.Delete( nTmpPos, nTmpPos + nLen );
618                         rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) );
619                         bRet = true;
620                     }
621                 }
622             }
623         }
624     }
625 
626     // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash"
627     // [0-9]--[0-9] double dash always replaced with "enDash"
628     // Finnish and Hungarian use enDash instead of emDash.
629     bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH);
630     if( 4 <= nEndPos - nSttPos )
631     {
632         OUString sTmp( rTxt.copy( nSttPos, nEndPos - nSttPos ) );
633         sal_Int32 nFndPos = sTmp.indexOf("--");
634         if( nFndPos != -1 && nFndPos &&
635             nFndPos + 2 < sTmp.getLength() &&
636             ( rCC.isLetterNumeric( sTmp, nFndPos - 1 ) ||
637               lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nFndPos - 1 ] )) &&
638             ( rCC.isLetterNumeric( sTmp, nFndPos + 2 ) ||
639             lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nFndPos + 2 ] )))
640         {
641             nSttPos = nSttPos + nFndPos;
642             rDoc.Delete( nSttPos, nSttPos + 2 );
643             rDoc.Insert( nSttPos, (bEnDash || (rCC.isDigit( sTmp, nFndPos - 1 ) &&
644                 rCC.isDigit( sTmp, nFndPos + 2 )) ? OUString(cEnDash) : OUString(cEmDash)) );
645             bRet = true;
646         }
647     }
648     return bRet;
649 }
650 
651 // Add non-breaking space before specific punctuation marks in French text
652 bool SvxAutoCorrect::FnAddNonBrkSpace(
653                                 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
654                                 sal_Int32 nEndPos,
655                                 LanguageType eLang, bool& io_bNbspRunNext )
656 {
657     bool bRet = false;
658 
659     CharClass& rCC = GetCharClass( eLang );
660 
661     if ( rCC.getLanguageTag().getLanguage() == "fr" )
662     {
663         bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA");
664         OUString allChars = ":;?!%";
665         OUString chars( allChars );
666         if ( bFrCA )
667             chars = ":";
668 
669         sal_Unicode cChar = rTxt[ nEndPos ];
670         bool bHasSpace = chars.indexOf( cChar ) != -1;
671         bool bIsSpecial = allChars.indexOf( cChar ) != -1;
672         if ( bIsSpecial )
673         {
674             // Get the last word delimiter position
675             sal_Int32 nSttWdPos = nEndPos;
676             bool bWasWordDelim = false;
677             while( nSttWdPos )
678             {
679                 bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]);
680                 if (bWasWordDelim)
681                     break;
682             }
683 
684             //See if the text is the start of a protocol string, e.g. have text of
685             //"http" see if it is the start of "http:" and if so leave it alone
686             sal_Int32 nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0);
687             sal_Int32 nProtocolLen = nEndPos - nSttWdPos + 1;
688             if (nIndex + nProtocolLen <= rTxt.getLength())
689             {
690                 if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid)
691                     return false;
692             }
693 
694             // Check the presence of "://" in the word
695             sal_Int32 nStrPos = rTxt.indexOf( "://", nSttWdPos + 1 );
696             if ( nStrPos == -1 && nEndPos > 0 )
697             {
698                 // Check the previous char
699                 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
700                 if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' )
701                 {
702                     // Remove any previous normal space
703                     sal_Int32 nPos = nEndPos - 1;
704                     while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace )
705                     {
706                         if ( nPos == 0 ) break;
707                         nPos--;
708                         cPrevChar = rTxt[ nPos ];
709                     }
710 
711                     nPos++;
712                     if ( nEndPos - nPos > 0 )
713                         rDoc.Delete( nPos, nEndPos );
714 
715                     // Add the non-breaking space at the end pos
716                     if ( bHasSpace )
717                         rDoc.Insert( nPos, OUString(cNonBreakingSpace) );
718                     io_bNbspRunNext = true;
719                     bRet = true;
720                 }
721                 else if ( chars.indexOf( cPrevChar ) != -1 )
722                     io_bNbspRunNext = true;
723             }
724         }
725         else if ( cChar == '/' && nEndPos > 1 && rTxt.getLength() > (nEndPos - 1) )
726         {
727             // Remove the hardspace right before to avoid formatting URLs
728             sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
729             sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ];
730             if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace )
731             {
732                 rDoc.Delete( nEndPos - 2, nEndPos - 1 );
733                 bRet = true;
734             }
735         }
736     }
737 
738     return bRet;
739 }
740 
741 // URL recognition
742 bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
743                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
744                                     LanguageType eLang )
745 {
746     OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos,
747                                                 GetCharClass( eLang ) ));
748     bool bRet = !sURL.isEmpty();
749     if( bRet )          // so, set attribute:
750         rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
751     return bRet;
752 }
753 
754 // Automatic *bold*, /italic/, -strikeout- and _underline_
755 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
756                                         sal_Int32 nEndPos )
757 {
758     // Condition:
759     //  at the beginning:   _, *, / or ~ after Space with the following !Space
760     //  at the end:         _, *, / or ~ before Space (word delimiter?)
761 
762     sal_Unicode cInsChar = rTxt[ nEndPos ];  // underline, bold, italic or strikeout
763     if( ++nEndPos != rTxt.getLength() &&
764         !IsWordDelim( rTxt[ nEndPos ] ) )
765         return false;
766 
767     --nEndPos;
768 
769     bool bAlphaNum = false;
770     sal_Int32 nPos = nEndPos;
771     sal_Int32  nFndPos = -1;
772     CharClass& rCC = GetCharClass( LANGUAGE_SYSTEM );
773 
774     while( nPos )
775     {
776         switch( sal_Unicode c = rTxt[ --nPos ] )
777         {
778         case '_':
779         case '-':
780         case '/':
781         case '*':
782             if( c == cInsChar )
783             {
784                 if( bAlphaNum && nPos+1 < nEndPos && ( !nPos ||
785                     IsWordDelim( rTxt[ nPos-1 ])) &&
786                     !IsWordDelim( rTxt[ nPos+1 ]))
787                         nFndPos = nPos;
788                 else
789                     // Condition is not satisfied, so cancel
790                     nFndPos = -1;
791                 nPos = 0;
792             }
793             break;
794         default:
795             if( !bAlphaNum )
796                 bAlphaNum = rCC.isLetterNumeric( rTxt, nPos );
797         }
798     }
799 
800     if( -1 != nFndPos )
801     {
802         // first delete the Character at the end - this allows insertion
803         // of an empty hint in SetAttr which would be removed by Delete
804         // (fdo#62536, AUTOFMT in Writer)
805         rDoc.Delete( nEndPos, nEndPos + 1 );
806         rDoc.Delete( nFndPos, nFndPos + 1 );
807         // Span the Attribute over the area
808         // the end.
809         if( '*' == cInsChar )           // Bold
810         {
811             SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT );
812             rDoc.SetAttr( nFndPos, nEndPos - 1,
813                           SID_ATTR_CHAR_WEIGHT,
814                           aSvxWeightItem);
815         }
816         else if( '/' == cInsChar )           // Italic
817         {
818             SvxPostureItem aSvxPostureItem( ITALIC_NORMAL, SID_ATTR_CHAR_POSTURE );
819             rDoc.SetAttr( nFndPos, nEndPos - 1,
820                           SID_ATTR_CHAR_POSTURE,
821                           aSvxPostureItem);
822         }
823         else if( '-' == cInsChar )           // Strikeout
824         {
825             SvxCrossedOutItem aSvxCrossedOutItem( STRIKEOUT_SINGLE, SID_ATTR_CHAR_STRIKEOUT );
826             rDoc.SetAttr( nFndPos, nEndPos - 1,
827                           SID_ATTR_CHAR_STRIKEOUT,
828                           aSvxCrossedOutItem);
829         }
830         else                            // Underline
831         {
832             SvxUnderlineItem aSvxUnderlineItem( LINESTYLE_SINGLE, SID_ATTR_CHAR_UNDERLINE );
833             rDoc.SetAttr( nFndPos, nEndPos - 1,
834                           SID_ATTR_CHAR_UNDERLINE,
835                           aSvxUnderlineItem);
836         }
837     }
838 
839     return -1 != nFndPos;
840 }
841 
842 // Capitalize first letter of every sentence
843 void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc& rDoc,
844                                     const OUString& rTxt, bool bNormalPos,
845                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
846                                     LanguageType eLang )
847 {
848 
849     if( rTxt.isEmpty() || nEndPos <= nSttPos )
850         return;
851 
852     CharClass& rCC = GetCharClass( eLang );
853     OUString aText( rTxt );
854     const sal_Unicode *pStart = aText.getStr(),
855                       *pStr = pStart + nEndPos,
856                       *pWordStt = nullptr,
857                       *pDelim = nullptr;
858 
859     bool bAtStart = false;
860     do {
861         --pStr;
862         if (rCC.isLetter(aText, pStr - pStart))
863         {
864             if( !pWordStt )
865                 pDelim = pStr+1;
866             pWordStt = pStr;
867         }
868         else if (pWordStt && !rCC.isDigit(aText, pStr - pStart))
869         {
870             if( (lcl_IsInAsciiArr( "-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words
871                 pWordStt - 1 == pStr &&
872                 // Installation at beginning of paragraph. Replaced < by <= (#i38971#)
873                 (pStart + 1) <= pStr &&
874                 rCC.isLetter(aText, pStr-1 - pStart))
875                 pWordStt = --pStr;
876             else
877                 break;
878         }
879         bAtStart = (pStart == pStr);
880     } while( !bAtStart );
881 
882     if (!pWordStt)
883         return;    // no character to be replaced
884 
885 
886     if (rCC.isDigit(aText, pStr - pStart))
887         return; // already ok
888 
889     if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart)))
890         return; // already ok
891 
892     //See if the text is the start of a protocol string, e.g. have text of
893     //"http" see if it is the start of "http:" and if so leave it alone
894     sal_Int32 nIndex = pWordStt - pStart;
895     sal_Int32 nProtocolLen = pDelim - pWordStt + 1;
896     if (nIndex + nProtocolLen <= rTxt.getLength())
897     {
898         if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid)
899             return; // already ok
900     }
901 
902     if (0x1 == *pWordStt || 0x2 == *pWordStt)
903         return; // already ok
904 
905     // Only capitalize, if string before specified characters is long enough
906     if( *pDelim && 2 >= pDelim - pWordStt &&
907         lcl_IsInAsciiArr( ".-)>", *pDelim ) )
908         return;
909 
910     // tdf#59666 don't capitalize single Greek letters (except in Greek texts)
911     if ( 1 == pDelim - pWordStt && 0x03B1 <= *pWordStt && *pWordStt <= 0x03C9 && eLang != LANGUAGE_GREEK )
912         return;
913 
914     if( !bAtStart ) // Still no beginning of a paragraph?
915     {
916         if (NonFieldWordDelim(*pStr))
917         {
918             for (;;)
919             {
920                 bAtStart = (pStart == pStr--);
921                 if (bAtStart || !NonFieldWordDelim(*pStr))
922                     break;
923             }
924         }
925         // Asian full stop, full width full stop, full width exclamation mark
926         // and full width question marks are treated as word delimiters
927         else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr &&
928                   0xFF1F != *pStr )
929             return; // no valid separator -> no replacement
930     }
931 
932     // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
933     if (FindInWrdSttExceptList(eLang, OUString(pWordStt, pDelim - pWordStt)))
934         return;
935 
936     if( bAtStart )  // at the beginning of a paragraph?
937     {
938         // Check out the previous paragraph, if it exists.
939         // If so, then check to paragraph separator at the end.
940         OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos);
941         if (!pPrevPara)
942         {
943             // valid separator -> replace
944             OUString sChar( *pWordStt );
945             sChar = rCC.titlecase(sChar); //see fdo#56740
946             if (sChar != OUStringChar(*pWordStt))
947                rDoc.ReplaceRange( pWordStt - pStart, 1, sChar );
948             return;
949         }
950 
951         aText = *pPrevPara;
952         bAtStart = false;
953         pStart = aText.getStr();
954         pStr = pStart + aText.getLength();
955 
956         do {            // overwrite all blanks
957             --pStr;
958             if (!NonFieldWordDelim(*pStr))
959                 break;
960             bAtStart = (pStart == pStr);
961         } while( !bAtStart );
962 
963         if( bAtStart )
964             return;  // no valid separator -> no replacement
965     }
966 
967     // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator.
968     // all three can happen, but not more than once!
969     const sal_Unicode* pExceptStt = nullptr;
970     bool bContinue = true;
971     Flags nFlag = Flags::NONE;
972     do
973     {
974         switch (*pStr)
975         {
976             // Western and Asian full stop
977             case '.':
978             case 0x3002:
979             case 0xFF0E:
980             {
981                 if (pStr >= pStart + 2 && *(pStr - 2) == '.')
982                 {
983                     //e.g. text "f.o.o. word": Now currently considering
984                     //capitalizing word but second last character of
985                     //previous word is a .  So probably last word is an
986                     //anagram that ends in . and not truly the end of a
987                     //previous sentence, so don't autocapitalize this word
988                     return;
989                 }
990                 if (nFlag & Flags::FullStop)
991                     return; // no valid separator -> no replacement
992                 nFlag |= Flags::FullStop;
993                 pExceptStt = pStr;
994             }
995             break;
996             case '!':
997             case 0xFF01:
998             {
999                 if (nFlag & Flags::ExclamationMark)
1000                     return; // no valid separator -> no replacement
1001                 nFlag |= Flags::ExclamationMark;
1002             }
1003             break;
1004             case '?':
1005             case 0xFF1F:
1006             {
1007                 if (nFlag & Flags::QuestionMark)
1008                     return; // no valid separator -> no replacement
1009                 nFlag |= Flags::QuestionMark;
1010             }
1011             break;
1012             default:
1013                 if (nFlag == Flags::NONE)
1014                     return; // no valid separator -> no replacement
1015                 else
1016                     bContinue = false;
1017                 break;
1018         }
1019 
1020         if (bContinue && pStr-- == pStart)
1021         {
1022             return; // no valid separator -> no replacement
1023         }
1024     } while (bContinue);
1025     if (Flags::FullStop != nFlag)
1026         pExceptStt = nullptr;
1027 
1028     // Only capitalize, if string is long enough
1029     if( 2 > ( pStr - pStart ) )
1030         return;
1031 
1032     if (!rCC.isLetterNumeric(aText, pStr-- - pStart))
1033     {
1034         bool bValid = false, bAlphaFnd = false;
1035         const sal_Unicode* pTmpStr = pStr;
1036         while( !bValid )
1037         {
1038             if( rCC.isDigit( aText, pTmpStr - pStart ) )
1039             {
1040                 bValid = true;
1041                 pStr = pTmpStr - 1;
1042             }
1043             else if( rCC.isLetter( aText, pTmpStr - pStart ) )
1044             {
1045                 if( bAlphaFnd )
1046                 {
1047                     bValid = true;
1048                     pStr = pTmpStr;
1049                 }
1050                 else
1051                     bAlphaFnd = true;
1052             }
1053             else if (bAlphaFnd || NonFieldWordDelim(*pTmpStr))
1054                 break;
1055 
1056             if( pTmpStr == pStart )
1057                 break;
1058 
1059             --pTmpStr;
1060         }
1061 
1062         if( !bValid )
1063             return;       // no valid separator -> no replacement
1064     }
1065 
1066     bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9';
1067 
1068     // Search for the beginning of the word
1069     while (!NonFieldWordDelim(*pStr))
1070     {
1071         if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) )
1072             bNumericOnly = false;
1073 
1074         if( pStart == pStr )
1075             break;
1076 
1077         --pStr;
1078     }
1079 
1080     if( bNumericOnly )      // consists of only numbers, then not
1081         return;
1082 
1083     if (NonFieldWordDelim(*pStr))
1084         ++pStr;
1085 
1086     OUString sWord;
1087 
1088     // check on the basis of the exception list
1089     if( pExceptStt )
1090     {
1091         sWord = OUString(pStr, pExceptStt - pStr + 1);
1092         if( FindInCplSttExceptList(eLang, sWord) )
1093             return;
1094 
1095         // Delete all non alphanumeric. Test the characters at the
1096         // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.)
1097         OUString sTmp( sWord );
1098         while( !sTmp.isEmpty() &&
1099                 !rCC.isLetterNumeric( sTmp, 0 ) )
1100             sTmp = sTmp.copy(1);
1101 
1102         // Remove all non alphanumeric characters towards the end up until
1103         // the last one.
1104         sal_Int32 nLen = sTmp.getLength();
1105         while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) )
1106             --nLen;
1107         if( nLen + 1 < sTmp.getLength() )
1108             sTmp = sTmp.copy( 0, nLen + 1 );
1109 
1110         if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() &&
1111             FindInCplSttExceptList(eLang, sTmp))
1112             return;
1113 
1114         if(FindInCplSttExceptList(eLang, sWord, true))
1115             return;
1116     }
1117 
1118     // Ok, then replace
1119     sal_Unicode cSave = *pWordStt;
1120     nSttPos = pWordStt - rTxt.getStr();
1121     OUString sChar = rCC.titlecase(OUString(cSave)); //see fdo#56740
1122     bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar );
1123 
1124     // Perhaps someone wants to have the word
1125     if( bRet && ACFlags::SaveWordCplSttLst & nFlags )
1126         rDoc.SaveCpltSttWord( ACFlags::CapitalStartSentence, nSttPos, sWord, cSave );
1127 }
1128 
1129 // Correct accidental use of cAPS LOCK key
1130 bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1131                                         sal_Int32 nSttPos, sal_Int32 nEndPos,
1132                                         LanguageType eLang )
1133 {
1134     if (nEndPos - nSttPos < 2)
1135         // string must be at least 2-character long.
1136         return false;
1137 
1138     CharClass& rCC = GetCharClass( eLang );
1139 
1140     // Check the first 2 letters.
1141     if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) )
1142         return false;
1143 
1144     if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) )
1145         return false;
1146 
1147     OUStringBuffer aConverted;
1148     aConverted.append( rCC.uppercase(OUString(rTxt[nSttPos])) );
1149     aConverted.append( rCC.lowercase(OUString(rTxt[nSttPos+1])) );
1150 
1151     // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
1152     if (FindInWrdSttExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos)))
1153         return false;
1154 
1155     for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i )
1156     {
1157         if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) )
1158             // A lowercase letter disqualifies the whole text.
1159             return false;
1160 
1161         if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) )
1162             // Another uppercase letter.  Convert it.
1163             aConverted.append( rCC.lowercase(OUString(rTxt[i])) );
1164         else
1165             // This is not an alphabetic letter.  Leave it as-is.
1166             aConverted.append( rTxt[i] );
1167     }
1168 
1169     // Replace the word.
1170     rDoc.Delete(nSttPos, nEndPos);
1171     rDoc.Insert(nSttPos, aConverted.makeStringAndClear());
1172 
1173     return true;
1174 }
1175 
1176 
1177 sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote,
1178                                         LanguageType eLang ) const
1179 {
1180     sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar
1181                                     ? GetStartDoubleQuote()
1182                                     : GetStartSingleQuote() )
1183                                    : ( '\"' == cInsChar
1184                                     ? GetEndDoubleQuote()
1185                                     : GetEndSingleQuote() );
1186     if( !cRet )
1187     {
1188         // then through the Language find the right character
1189         if( LANGUAGE_NONE == eLang )
1190             cRet = cInsChar;
1191         else
1192         {
1193             LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1194             OUString sRet( bSttQuote
1195                             ? ( '\"' == cInsChar
1196                                 ? rLcl.getDoubleQuotationMarkStart()
1197                                 : rLcl.getQuotationMarkStart() )
1198                             : ( '\"' == cInsChar
1199                                 ? rLcl.getDoubleQuotationMarkEnd()
1200                                 : rLcl.getQuotationMarkEnd() ));
1201             cRet = !sRet.isEmpty() ? sRet[0] : cInsChar;
1202         }
1203     }
1204     return cRet;
1205 }
1206 
1207 void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos,
1208                                     sal_Unicode cInsChar, bool bSttQuote,
1209                                     bool bIns, LanguageType eLang, ACQuotes eType ) const
1210 {
1211     sal_Unicode cRet;
1212 
1213     if ( eType == ACQuotes::DoubleAngleQuote )
1214     {
1215         bool bSwiss = eLang == LANGUAGE_FRENCH_SWISS;
1216         // pressing " inside a quotation -> use second level angle quotes
1217         bool bLeftQuote = '\"' == cInsChar &&
1218                 // start position and Romanian OR
1219                 // not start position and Hungarian
1220                 bSttQuote == (eLang != LANGUAGE_HUNGARIAN);
1221         cRet = ( '<' == cInsChar || bLeftQuote )
1222                 ? ( bSwiss ? cLeftSingleAngleQuote : cLeftDoubleAngleQuote )
1223                 : ( bSwiss ? cRightSingleAngleQuote : cRightDoubleAngleQuote );
1224     }
1225     else if ( eType == ACQuotes::UseApostrophe )
1226         cRet = cApostrophe;
1227     else
1228         cRet = GetQuote( cInsChar, bSttQuote, eLang );
1229 
1230     OUString sChg( cInsChar );
1231     if( bIns )
1232         rDoc.Insert( nInsPos, sChg );
1233     else
1234         rDoc.Replace( nInsPos, sChg );
1235 
1236     sChg = OUString(cRet);
1237 
1238     if( eType == ACQuotes::NonBreakingSpace )
1239     {
1240         if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, OUStringChar(cNonBreakingSpace) ))
1241         {
1242             if( !bSttQuote )
1243                 ++nInsPos;
1244         }
1245     }
1246     else if( eType == ACQuotes::DoubleAngleQuote && cInsChar != '\"' )
1247     {
1248         rDoc.Delete( nInsPos-1, nInsPos);
1249         --nInsPos;
1250     }
1251 
1252     rDoc.Replace( nInsPos, sChg );
1253 
1254     // i' -> I' in English (last step for the Undo)
1255     if( eType == ACQuotes::CapitalizeIAm )
1256         rDoc.Replace( nInsPos-1, "I" );
1257 }
1258 
1259 OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPos,
1260                                 sal_Unicode cInsChar, bool bSttQuote )
1261 {
1262     const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1263     sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang );
1264 
1265     OUString sRet(cRet);
1266 
1267     if( '\"' == cInsChar )
1268     {
1269         if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS)
1270         {
1271             if( bSttQuote )
1272                 sRet += " ";
1273             else
1274                 sRet = " " + sRet;
1275         }
1276     }
1277     return sRet;
1278 }
1279 
1280 // search preceding opening quote in the paragraph before the insert position
1281 static bool lcl_HasPrecedingChar( std::u16string_view rTxt, sal_Int32 nPos,
1282                 const sal_Unicode sPrecedingChar, const sal_Unicode* aStopChars )
1283 {
1284     sal_Unicode cTmpChar;
1285 
1286     do {
1287         cTmpChar = rTxt[ --nPos ];
1288         if ( cTmpChar == sPrecedingChar )
1289             return true;
1290 
1291         for ( const sal_Unicode* pCh = aStopChars; *pCh; ++pCh )
1292             if ( cTmpChar == *pCh )
1293                 return false;
1294 
1295     } while ( nPos > 0 );
1296 
1297     return false;
1298 }
1299 
1300 // WARNING: rText may become invalid, see comment below
1301 void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1302                                     sal_Int32 nInsPos, sal_Unicode cChar,
1303                                     bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin )
1304 {
1305     bool bIsNextRun = io_bNbspRunNext;
1306     io_bNbspRunNext = false;  // if it was set, then it has to be turned off
1307 
1308     do{                                 // only for middle check loop !!
1309         if( cChar )
1310         {
1311             // Prevent double space
1312             if( nInsPos && ' ' == cChar &&
1313                 IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace ) &&
1314                 ' ' == rTxt[ nInsPos - 1 ])
1315             {
1316                 break;
1317             }
1318 
1319             bool bSingle = '\'' == cChar;
1320             bool bIsReplaceQuote =
1321                         (IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('\"' == cChar )) ||
1322                         (IsAutoCorrFlag( ACFlags::ChgSglQuotes ) && bSingle );
1323             if( bIsReplaceQuote )
1324             {
1325                 bool bSttQuote = !nInsPos;
1326                 ACQuotes eType = ACQuotes::NONE;
1327                 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1328                 if (!bSttQuote)
1329                 {
1330                     sal_Unicode cPrev = rTxt[ nInsPos-1 ];
1331                     bSttQuote = NonFieldWordDelim(cPrev) ||
1332                         lcl_IsInAsciiArr( "([{", cPrev ) ||
1333                         ( cEmDash == cPrev ) ||
1334                         ( cEnDash == cPrev );
1335                     // tdf#38394 use opening quotation mark << in French l'<<word>>
1336                     if ( !bSingle && !bSttQuote && cPrev == cApostrophe &&
1337                         primary(eLang) == primary(LANGUAGE_FRENCH) &&
1338                         ( ( ( nInsPos == 2 || ( nInsPos > 2 && IsWordDelim( rTxt[ nInsPos-3 ] ) ) ) &&
1339                                // abbreviated form of ce, de, je, la, le, ne, me, te, se or si
1340                                OUString("cdjlnmtsCDJLNMTS").indexOf( rTxt[ nInsPos-2 ] ) > -1 ) ||
1341                           ( ( nInsPos == 3 || (nInsPos > 3 && IsWordDelim( rTxt[ nInsPos-4 ] ) ) ) &&
1342                                // abbreviated form of que
1343                                ( rTxt[ nInsPos-2 ] == 'u' || rTxt[ nInsPos-2 ] == 'U' ) &&
1344                                ( rTxt[ nInsPos-3 ] == 'q' || rTxt[ nInsPos-3 ] == 'Q' ) ) ) )
1345                     {
1346                         bSttQuote = true;
1347                     }
1348                     // tdf#108423 for capitalization of English i'm
1349                     else if ( bSingle && ( cPrev == 'i' ) &&
1350                         primary(eLang) == primary(LANGUAGE_ENGLISH) &&
1351                         ( nInsPos == 1 || IsWordDelim( rTxt[ nInsPos-2 ] ) ) )
1352                     {
1353                         eType = ACQuotes::CapitalizeIAm;
1354                     }
1355                     // tdf#133524 support >>Hungarian<< and <<Romanian>> secondary level quotations
1356                     else if ( !bSingle && nInsPos &&
1357                         ( ( eLang == LANGUAGE_HUNGARIAN &&
1358                             lcl_HasPrecedingChar( rTxt, nInsPos,
1359                                 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEnd[0],
1360                                 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEnd + 1 ) ) ||
1361                           ( eLang.anyOf(
1362                                 LANGUAGE_ROMANIAN,
1363                                 LANGUAGE_ROMANIAN_MOLDOVA ) &&
1364                             lcl_HasPrecedingChar( rTxt, nInsPos,
1365                                 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEndRo[0],
1366                                 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEndRo + 1 ) ) ) )
1367                     {
1368                         LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1369                         // only if the opening double quotation mark is the default one
1370                         if ( rLcl.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart[0]) )
1371                             eType = ACQuotes::DoubleAngleQuote;
1372                     }
1373                     else if ( bSingle && nInsPos && !bSttQuote &&
1374                         // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic,
1375                         // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018.
1376                         // tdf#123786 the same for Russian and Ukrainian
1377                         ( ( eLang.anyOf (
1378                                  LANGUAGE_CZECH,
1379                                  LANGUAGE_GERMAN,
1380                                  LANGUAGE_GERMAN_SWISS,
1381                                  LANGUAGE_GERMAN_AUSTRIAN,
1382                                  LANGUAGE_GERMAN_LUXEMBOURG,
1383                                  LANGUAGE_GERMAN_LIECHTENSTEIN,
1384                                  LANGUAGE_ICELANDIC,
1385                                  LANGUAGE_SLOVAK,
1386                                  LANGUAGE_SLOVENIAN ) &&
1387                             !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEnd[0],  aStopSingleQuoteEnd + 1 ) ) ||
1388                           ( eLang.anyOf (
1389                                  LANGUAGE_RUSSIAN,
1390                                  LANGUAGE_UKRAINIAN ) &&
1391                             !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEndRuUa[0],  aStopSingleQuoteEndRuUa + 1 ) ) ) )
1392                     {
1393                         LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1394                         CharClass& rCC = GetCharClass( eLang );
1395                         if ( ( rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEnd[0]) ||
1396                              rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa[0]) ) &&
1397                              // use apostrophe only after letters, not after digits or punctuation
1398                              rCC.isLetter(rTxt, nInsPos-1) )
1399                         {
1400                             eType = ACQuotes::UseApostrophe;
1401                         }
1402                     }
1403                 }
1404 
1405                 if ( eType == ACQuotes::NONE && !bSingle &&
1406                     ( primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS ) )
1407                     eType = ACQuotes::NonBreakingSpace;
1408 
1409                 InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, eLang, eType );
1410                 break;
1411             }
1412             // tdf#133524 change "<<" and ">>" to double angle quotation marks
1413             else if ( IsAutoCorrFlag( ACFlags::ChgQuotes ) &&
1414                 IsAutoCorrFlag( ACFlags::ChgAngleQuotes ) &&
1415                 ('<' == cChar || '>' == cChar) &&
1416                 nInsPos > 0 && cChar == rTxt[ nInsPos-1 ] )
1417             {
1418                 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1419                 if ( eLang.anyOf(
1420                         LANGUAGE_CATALAN,              // primary level
1421                         LANGUAGE_CATALAN_VALENCIAN,    // primary level
1422                         LANGUAGE_FINNISH,              // alternative primary level
1423                         LANGUAGE_FRENCH_SWISS,         // second level
1424                         LANGUAGE_GALICIAN,             // primary level
1425                         LANGUAGE_HUNGARIAN,            // second level
1426                         LANGUAGE_POLISH,               // second level
1427                         LANGUAGE_PORTUGUESE,           // primary level
1428                         LANGUAGE_PORTUGUESE_BRAZILIAN, // primary level
1429                         LANGUAGE_ROMANIAN,             // second level
1430                         LANGUAGE_ROMANIAN_MOLDOVA,     // second level
1431                         LANGUAGE_SWEDISH,              // alternative primary level
1432                         LANGUAGE_SWEDISH_FINLAND,      // alternative primary level
1433                         LANGUAGE_UKRAINIAN,            // primary level
1434                         LANGUAGE_USER_ARAGONESE,       // primary level
1435                         LANGUAGE_USER_ASTURIAN ) ||    // primary level
1436                     primary(eLang) == primary(LANGUAGE_GERMAN) ||  // alternative primary level
1437                     primary(eLang) == primary(LANGUAGE_SPANISH) )  // primary level
1438                 {
1439                     InsertQuote( rDoc, nInsPos, cChar, false, bInsert, eLang, ACQuotes::DoubleAngleQuote );
1440                     break;
1441                 }
1442             }
1443 
1444             if( bInsert )
1445                 rDoc.Insert( nInsPos, OUString(cChar) );
1446             else
1447                 rDoc.Replace( nInsPos, OUString(cChar) );
1448 
1449             // Hardspaces autocorrection
1450             if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace ) )
1451             {
1452                 if ( NeedsHardspaceAutocorr( cChar ) &&
1453                     FnAddNonBrkSpace( rDoc, rTxt, nInsPos, GetDocLanguage( rDoc, nInsPos ), io_bNbspRunNext ) )
1454                 {
1455                     ;
1456                 }
1457                 else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) )
1458                 {
1459                     // Remove the NBSP if it wasn't an autocorrection
1460                     if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) &&
1461                             cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace )
1462                     {
1463                         // Look for the last HARD_SPACE
1464                         sal_Int32 nPos = nInsPos - 1;
1465                         bool bContinue = true;
1466                         while ( bContinue )
1467                         {
1468                             const sal_Unicode cTmpChar = rTxt[ nPos ];
1469                             if ( cTmpChar == cNonBreakingSpace )
1470                             {
1471                                 rDoc.Delete( nPos, nPos + 1 );
1472                                 bContinue = false;
1473                             }
1474                             else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 )
1475                                 bContinue = false;
1476                             nPos--;
1477                         }
1478                     }
1479                 }
1480             }
1481         }
1482 
1483         if( !nInsPos )
1484             break;
1485 
1486         sal_Int32 nPos = nInsPos - 1;
1487 
1488         if( IsWordDelim( rTxt[ nPos ]))
1489             break;
1490 
1491         // Set bold or underline automatically?
1492         if (('*' == cChar || '_' == cChar || '/' == cChar || '-' == cChar) && (nPos+1 < rTxt.getLength()))
1493         {
1494             if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl ) )
1495             {
1496                 FnChgWeightUnderl( rDoc, rTxt, nPos+1 );
1497             }
1498             break;
1499         }
1500 
1501         while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1502             ;
1503 
1504         // Found a Paragraph-start or a Blank, search for the word shortcut in
1505         // auto.
1506         sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
1507         if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1508             --nCapLttrPos;          // begin of paragraph and no blank
1509 
1510         const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1511         CharClass& rCC = GetCharClass( eLang );
1512 
1513         // no symbol characters
1514         if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos ))
1515             break;
1516 
1517         if( IsAutoCorrFlag( ACFlags::Autocorrect ) &&
1518             // tdf#134940 fix regression of arrow "-->" resulted by premature
1519             // replacement of "--" since '>' was added to IsAutoCorrectChar()
1520             '>' != cChar )
1521         {
1522             // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1523             // and becomes INVALID if ChgAutoCorrWord returns true!
1524             // => use aPara/pPara to create a valid copy of the string!
1525             OUString aPara;
1526             OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr;
1527 
1528             bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos,
1529                                                     *this, pPara );
1530             if( !bChgWord )
1531             {
1532                 sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos;
1533                 while( nCapLttrPos1 < nInsPos &&
1534                         lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] )
1535                         )
1536                         ++nCapLttrPos1;
1537                 while( nCapLttrPos1 < nInsPos1 && nInsPos1 &&
1538                         lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] )
1539                         )
1540                         --nInsPos1;
1541 
1542                 if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) &&
1543                     nCapLttrPos1 < nInsPos1 &&
1544                     rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara ))
1545                 {
1546                     bChgWord = true;
1547                     nCapLttrPos = nCapLttrPos1;
1548                 }
1549             }
1550 
1551             if( bChgWord )
1552             {
1553                 if( !aPara.isEmpty() )
1554                 {
1555                     sal_Int32 nEnd = nCapLttrPos;
1556                     while( nEnd < aPara.getLength() &&
1557                             !IsWordDelim( aPara[ nEnd ]))
1558                         ++nEnd;
1559 
1560                     // Capital letter at beginning of paragraph?
1561                     if( IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1562                     {
1563                         FnCapitalStartSentence( rDoc, aPara, false,
1564                                                 nCapLttrPos, nEnd, eLang );
1565                     }
1566 
1567                     if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1568                     {
1569                         FnChgToEnEmDash( rDoc, aPara, nCapLttrPos, nEnd, eLang );
1570                     }
1571                 }
1572                 break;
1573             }
1574         }
1575 
1576         if( IsAutoCorrFlag( ACFlags::TransliterateRTL ) && GetDocLanguage( rDoc, nInsPos ) == LANGUAGE_HUNGARIAN )
1577         {
1578             // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1579             // and becomes INVALID if TransliterateRTLWord returns true!
1580             if ( rDoc.TransliterateRTLWord( nCapLttrPos, nInsPos ) )
1581                 break;
1582         }
1583 
1584         if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber ) &&
1585                 (nInsPos >= 2 ) &&       // fdo#69762 avoid autocorrect for 2e-3
1586                 ( '-' != cChar || 'E' != rtl::toAsciiUpperCase(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) &&
1587                 FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
1588             ( IsAutoCorrFlag( ACFlags::SetINetAttr ) &&
1589                 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
1590                 FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
1591             ;
1592         else
1593         {
1594             bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK);
1595             bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos );
1596 
1597             if ( bLockKeyOn && IsAutoCorrFlag( ACFlags::CorrectCapsLock ) &&
1598                  FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) )
1599             {
1600                 // Correct accidental use of cAPS LOCK key (do this only when
1601                 // the caps or shift lock key is pressed). Turn off the caps
1602                 // lock afterwards.
1603                 pFrameWin->SimulateKeyPress( KEY_CAPSLOCK );
1604             }
1605 
1606             // Capital letter at beginning of paragraph ?
1607             if( !bUnsupported &&
1608                 IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1609             {
1610                 FnCapitalStartSentence( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang );
1611             }
1612 
1613             // Two capital letters at beginning of word ??
1614             if( !bUnsupported &&
1615                 IsAutoCorrFlag( ACFlags::CapitalStartWord ) )
1616             {
1617                 FnCapitalStartWord( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1618             }
1619 
1620             if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1621             {
1622                 FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1623             }
1624         }
1625 
1626     } while( false );
1627 }
1628 
1629 SvxAutoCorrectLanguageLists& SvxAutoCorrect::GetLanguageList_(
1630                                                         LanguageType eLang )
1631 {
1632     LanguageTag aLanguageTag( eLang);
1633     if (m_aLangTable.find(aLanguageTag) == m_aLangTable.end())
1634         (void)CreateLanguageFile(aLanguageTag);
1635     return *(m_aLangTable.find(aLanguageTag)->second);
1636 }
1637 
1638 void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang )
1639 {
1640     auto const iter = m_aLangTable.find(LanguageTag(eLang));
1641     if (iter != m_aLangTable.end() && iter->second)
1642         iter->second->SaveCplSttExceptList();
1643     else
1644     {
1645         SAL_WARN("editeng", "Save an empty list? ");
1646     }
1647 }
1648 
1649 void SvxAutoCorrect::SaveWrdSttExceptList(LanguageType eLang)
1650 {
1651     auto const iter = m_aLangTable.find(LanguageTag(eLang));
1652     if (iter != m_aLangTable.end() && iter->second)
1653         iter->second->SaveWrdSttExceptList();
1654     else
1655     {
1656         SAL_WARN("editeng", "Save an empty list? ");
1657     }
1658 }
1659 
1660 // Adds a single word. The list will immediately be written to the file!
1661 bool SvxAutoCorrect::AddCplSttException( const OUString& rNew,
1662                                         LanguageType eLang )
1663 {
1664     SvxAutoCorrectLanguageLists* pLists = nullptr;
1665     // either the right language is present or it will be this in the general list
1666     auto iter = m_aLangTable.find(LanguageTag(eLang));
1667     if (iter != m_aLangTable.end())
1668         pLists = iter->second.get();
1669     else
1670     {
1671         LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1672         iter = m_aLangTable.find(aLangTagUndetermined);
1673         if (iter != m_aLangTable.end())
1674             pLists = iter->second.get();
1675         else if(CreateLanguageFile(aLangTagUndetermined))
1676             pLists = m_aLangTable.find(aLangTagUndetermined)->second.get();
1677     }
1678     OSL_ENSURE(pLists, "No auto correction data");
1679     return pLists && pLists->AddToCplSttExceptList(rNew);
1680 }
1681 
1682 // Adds a single word. The list will immediately be written to the file!
1683 bool SvxAutoCorrect::AddWrtSttException( const OUString& rNew,
1684                                          LanguageType eLang )
1685 {
1686     SvxAutoCorrectLanguageLists* pLists = nullptr;
1687     //either the right language is present or it is set in the general list
1688     auto iter = m_aLangTable.find(LanguageTag(eLang));
1689     if (iter != m_aLangTable.end())
1690         pLists = iter->second.get();
1691     else
1692     {
1693         LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1694         iter = m_aLangTable.find(aLangTagUndetermined);
1695         if (iter != m_aLangTable.end())
1696             pLists = iter->second.get();
1697         else if(CreateLanguageFile(aLangTagUndetermined))
1698             pLists = m_aLangTable.find(aLangTagUndetermined)->second.get();
1699     }
1700     OSL_ENSURE(pLists, "No auto correction file!");
1701     return pLists && pLists->AddToWrdSttExceptList(rNew);
1702 }
1703 
1704 OUString SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const OUString& rTxt,
1705                                              sal_Int32 nPos)
1706 {
1707     OUString sRet;
1708     if( !nPos )
1709         return sRet;
1710 
1711     sal_Int32 nEnd = nPos;
1712 
1713     // it must be followed by a blank or tab!
1714     if( ( nPos < rTxt.getLength() &&
1715         !IsWordDelim( rTxt[ nPos ])) ||
1716         IsWordDelim( rTxt[ --nPos ]))
1717         return sRet;
1718 
1719     while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1720         ;
1721 
1722     // Found a Paragraph-start or a Blank, search for the word shortcut in
1723     // auto.
1724     sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
1725     if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1726         --nCapLttrPos;          // Beginning of paragraph and no Blank!
1727 
1728     while( lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) )
1729         if( ++nCapLttrPos >= nEnd )
1730             return sRet;
1731 
1732     if( 3 > nEnd - nCapLttrPos )
1733         return sRet;
1734 
1735     const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1736 
1737     CharClass& rCC = GetCharClass(eLang);
1738 
1739     if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnd ))
1740         return sRet;
1741 
1742     sRet = rTxt.copy( nCapLttrPos, nEnd - nCapLttrPos );
1743     return sRet;
1744 }
1745 
1746 // static
1747 std::vector<OUString> SvxAutoCorrect::GetChunkForAutoText(const OUString& rTxt,
1748                                                           const sal_Int32 nPos)
1749 {
1750     constexpr sal_Int32 nMinLen = 3;
1751     constexpr sal_Int32 nMaxLen = 9;
1752     std::vector<OUString> aRes;
1753     if (nPos >= nMinLen)
1754     {
1755         sal_Int32 nBegin = std::max<sal_Int32>(nPos - nMaxLen, 0);
1756         // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation)
1757         if (nBegin > 0 && !IsWordDelim(rTxt[nBegin-1]))
1758         {
1759             while (nBegin + nMinLen <= nPos && !IsWordDelim(rTxt[nBegin]))
1760                 ++nBegin;
1761         }
1762         if (nBegin + nMinLen <= nPos)
1763         {
1764             OUString sRes = rTxt.copy(nBegin, nPos - nBegin);
1765             aRes.push_back(sRes);
1766             bool bLastStartedWithDelim = IsWordDelim(sRes[0]);
1767             for (sal_Int32 i = 1; i <= sRes.getLength() - nMinLen; ++i)
1768             {
1769                 bool bAdd = bLastStartedWithDelim;
1770                 bLastStartedWithDelim = IsWordDelim(sRes[i]);
1771                 bAdd = bAdd || bLastStartedWithDelim;
1772                 if (bAdd)
1773                     aRes.push_back(sRes.copy(i));
1774             }
1775         }
1776     }
1777     return aRes;
1778 }
1779 
1780 bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile )
1781 {
1782     OSL_ENSURE(m_aLangTable.find(rLanguageTag) == m_aLangTable.end(), "Language already exists ");
1783 
1784     OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true ));
1785     OUString sShareDirFile( sUserDirFile );
1786 
1787     SvxAutoCorrectLanguageLists* pLists = nullptr;
1788 
1789     tools::Time nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM ), nLastCheckTime( tools::Time::EMPTY );
1790 
1791     auto nFndPos = aLastFileTable.find(rLanguageTag);
1792     if(nFndPos != aLastFileTable.end() &&
1793        (nLastCheckTime.SetTime(nFndPos->second), nLastCheckTime < nAktTime) &&
1794        nAktTime - nLastCheckTime < nMinTime)
1795     {
1796         // no need to test the file, because the last check is not older then
1797         // 2 minutes.
1798         if( bNewFile )
1799         {
1800             sShareDirFile = sUserDirFile;
1801             pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile );
1802             LanguageTag aTmp(rLanguageTag);     // this insert() needs a non-const reference
1803             m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists)));
1804             aLastFileTable.erase(nFndPos);
1805         }
1806     }
1807     else if(
1808              ( FStatHelper::IsDocument( sUserDirFile ) ||
1809                FStatHelper::IsDocument( sShareDirFile =
1810                    GetAutoCorrFileName( rLanguageTag ) ) ||
1811                FStatHelper::IsDocument( sShareDirFile =
1812                    GetAutoCorrFileName( rLanguageTag, false, false, true) )
1813              ) ||
1814         ( sShareDirFile = sUserDirFile, bNewFile )
1815           )
1816     {
1817         pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile );
1818         LanguageTag aTmp(rLanguageTag);     // this insert() needs a non-const reference
1819         m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists)));
1820         if (nFndPos != aLastFileTable.end())
1821             aLastFileTable.erase(nFndPos);
1822     }
1823     else if( !bNewFile )
1824     {
1825         aLastFileTable[rLanguageTag] = nAktTime.GetTime();
1826     }
1827     return pLists != nullptr;
1828 }
1829 
1830 bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong,
1831                                 LanguageType eLang )
1832 {
1833     LanguageTag aLanguageTag( eLang);
1834     auto const iter = m_aLangTable.find(aLanguageTag);
1835     if (iter != m_aLangTable.end())
1836         return iter->second->PutText(rShort, rLong);
1837     if(CreateLanguageFile(aLanguageTag))
1838         return m_aLangTable.find(aLanguageTag)->second->PutText(rShort, rLong);
1839     return false;
1840 }
1841 
1842 void SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries,
1843                                               std::vector<SvxAutocorrWord>& aDeleteEntries,
1844                                               LanguageType eLang )
1845 {
1846     LanguageTag aLanguageTag( eLang);
1847     auto const iter = m_aLangTable.find(aLanguageTag);
1848     if (iter != m_aLangTable.end())
1849     {
1850         iter->second->MakeCombinedChanges( aNewEntries, aDeleteEntries );
1851     }
1852     else if(CreateLanguageFile( aLanguageTag ))
1853     {
1854         m_aLangTable.find( aLanguageTag )->second->MakeCombinedChanges( aNewEntries, aDeleteEntries );
1855     }
1856 }
1857 
1858 //  - return the replacement text (only for SWG-Format, all other
1859 //    can be taken from the word list!)
1860 bool SvxAutoCorrect::GetLongText( const OUString&, OUString& )
1861 {
1862     return false;
1863 }
1864 
1865 void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& )
1866 {
1867 }
1868 
1869 // Text with attribution (only the SWG - SWG format!)
1870 bool SvxAutoCorrect::PutText( const css::uno::Reference < css::embed::XStorage >&,
1871                               const OUString&, const OUString&, SfxObjectShell&, OUString& )
1872 {
1873     return false;
1874 }
1875 
1876 OUString EncryptBlockName_Imp(const OUString& rName)
1877 {
1878     OUStringBuffer aName;
1879     aName.append('#').append(rName);
1880     for (sal_Int32 nLen = rName.getLength(), nPos = 1; nPos < nLen; ++nPos)
1881     {
1882         if (lcl_IsInAsciiArr( "!/:.\\", aName[nPos]))
1883             aName[nPos] &= 0x0f;
1884     }
1885     return aName.makeStringAndClear();
1886 }
1887 
1888 /* This code is copied from SwXMLTextBlocks::GeneratePackageName */
1889 static void GeneratePackageName ( std::u16string_view rShort, OUString& rPackageName )
1890 {
1891     OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7));
1892     OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US));
1893 
1894     for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos)
1895     {
1896         switch (aBuf[nPos])
1897         {
1898             case '!':
1899             case '/':
1900             case ':':
1901             case '.':
1902             case '\\':
1903                 aBuf[nPos] = '_';
1904                 break;
1905             default:
1906                 break;
1907         }
1908     }
1909 
1910     rPackageName = aBuf.makeStringAndClear();
1911 }
1912 
1913 static const SvxAutocorrWord* lcl_SearchWordsInList(
1914                 SvxAutoCorrectLanguageLists* pList, const OUString& rTxt,
1915                 sal_Int32& rStt, sal_Int32 nEndPos)
1916 {
1917     const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList();
1918     return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos );
1919 }
1920 
1921 // the search for the words in the substitution table
1922 const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList(
1923                 const OUString& rTxt, sal_Int32& rStt, sal_Int32 nEndPos,
1924                 SvxAutoCorrDoc&, LanguageTag& rLang )
1925 {
1926     const SvxAutocorrWord* pRet = nullptr;
1927     LanguageTag aLanguageTag( rLang);
1928     if( aLanguageTag.isSystemLocale() )
1929         aLanguageTag.reset( MsLangId::getSystemLanguage());
1930 
1931     /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback
1932      * list instead? */
1933 
1934     // First search for eLang, then US-English -> English
1935     // and last in LANGUAGE_UNDETERMINED
1936     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
1937     {
1938         //the language is available - so bring it on
1939         std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
1940         pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
1941         if( pRet )
1942         {
1943             rLang = aLanguageTag;
1944             return pRet;
1945         }
1946         else
1947             return nullptr;
1948     }
1949 
1950     // If it still could not be found here, then keep on searching
1951     LanguageType eLang = aLanguageTag.getLanguageType();
1952     // the primary language for example EN
1953     aLanguageTag.reset(aLanguageTag.getLanguage());
1954     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
1955     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
1956                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
1957                  CreateLanguageFile(aLanguageTag, false)))
1958     {
1959         //the language is available - so bring it on
1960         std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
1961         pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
1962         if( pRet )
1963         {
1964             rLang = aLanguageTag;
1965             return pRet;
1966         }
1967     }
1968 
1969     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
1970             CreateLanguageFile(aLanguageTag, false))
1971     {
1972         //the language is available - so bring it on
1973         std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
1974         pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
1975         if( pRet )
1976         {
1977             rLang = aLanguageTag;
1978             return pRet;
1979         }
1980     }
1981     return nullptr;
1982 }
1983 
1984 bool SvxAutoCorrect::FindInWrdSttExceptList( LanguageType eLang,
1985                                              const OUString& sWord )
1986 {
1987     LanguageTag aLanguageTag( eLang);
1988 
1989     /* TODO-BCP47: again horrible ugliness */
1990 
1991     // First search for eLang, then primary language of eLang
1992     // and last in LANGUAGE_UNDETERMINED
1993 
1994     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
1995     {
1996         //the language is available - so bring it on
1997         auto const& pList = m_aLangTable.find(aLanguageTag)->second;
1998         if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() )
1999             return true;
2000     }
2001 
2002     // If it still could not be found here, then keep on searching
2003     // the primary language for example EN
2004     aLanguageTag.reset(aLanguageTag.getLanguage());
2005     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2006     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2007                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2008                  CreateLanguageFile(aLanguageTag, false)))
2009     {
2010         //the language is available - so bring it on
2011         auto const& pList = m_aLangTable.find(aLanguageTag)->second;
2012         if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() )
2013             return true;
2014     }
2015 
2016     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2017             CreateLanguageFile(aLanguageTag, false))
2018     {
2019         //the language is available - so bring it on
2020         auto const& pList = m_aLangTable.find(aLanguageTag)->second;
2021         if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() )
2022             return true;
2023     }
2024     return false;
2025 }
2026 
2027 static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord)
2028 {
2029     SvStringsISortDtor::const_iterator it = pList->find( "~" );
2030     SvStringsISortDtor::size_type nPos = it - pList->begin();
2031     if( nPos < pList->size() )
2032     {
2033         OUString sLowerWord(sWord.toAsciiLowerCase());
2034         OUString sAbr;
2035         for( SvStringsISortDtor::size_type n = nPos; n < pList->size(); ++n )
2036         {
2037             sAbr = (*pList)[ n ];
2038             if (sAbr[0] != '~')
2039                 break;
2040             // ~ and ~. are not allowed!
2041             if( 2 < sAbr.getLength() && sAbr.getLength() - 1 <= sWord.getLength() )
2042             {
2043                 OUString sLowerAbk(sAbr.toAsciiLowerCase());
2044                 for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;)
2045                 {
2046                     if( !--i )      // agrees
2047                         return true;
2048 
2049                     if( sLowerAbk[i] != sLowerWord[--ii])
2050                         break;
2051                 }
2052             }
2053         }
2054     }
2055     OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ),
2056             "Wrongly sorted exception list?" );
2057     return false;
2058 }
2059 
2060 bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang,
2061                                 const OUString& sWord, bool bAbbreviation)
2062 {
2063     LanguageTag aLanguageTag( eLang);
2064 
2065     /* TODO-BCP47: did I mention terrible horrible ugliness? */
2066 
2067     // First search for eLang, then primary language of eLang
2068     // and last in LANGUAGE_UNDETERMINED
2069 
2070     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
2071     {
2072         //the language is available - so bring it on
2073         const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
2074         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2075             return true;
2076     }
2077 
2078     // If it still could not be found here, then keep on searching
2079     // the primary language for example EN
2080     aLanguageTag.reset(aLanguageTag.getLanguage());
2081     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2082     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2083                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2084                  CreateLanguageFile(aLanguageTag, false)))
2085     {
2086         //the language is available - so bring it on
2087         const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
2088         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2089             return true;
2090     }
2091 
2092     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2093             CreateLanguageFile(aLanguageTag, false))
2094     {
2095         //the language is available - so bring it on
2096         const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
2097         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2098             return true;
2099     }
2100     return false;
2101 }
2102 
2103 OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag,
2104                                             bool bNewFile, bool bTst, bool bUnlocalized ) const
2105 {
2106     OUString sRet, sExt( rLanguageTag.getBcp47() );
2107     if (bUnlocalized)
2108     {
2109         // we don't want variant, so we'll take "fr" instead of "fr-CA" for example
2110         std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false);
2111         if (!vecFallBackStrings.empty())
2112            sExt = vecFallBackStrings[0];
2113     }
2114 
2115     sExt = "_" + sExt + ".dat";
2116     if( bNewFile )
2117         sRet = sUserAutoCorrFile + sExt;
2118     else if( !bTst )
2119         sRet = sShareAutoCorrFile + sExt;
2120     else
2121     {
2122         // test first in the user directory - if not exist, then
2123         sRet = sUserAutoCorrFile + sExt;
2124         if( !FStatHelper::IsDocument( sRet ))
2125             sRet = sShareAutoCorrFile + sExt;
2126     }
2127     return sRet;
2128 }
2129 
2130 SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists(
2131                 SvxAutoCorrect& rParent,
2132                 const OUString& rShareAutoCorrectFile,
2133                 const OUString& rUserAutoCorrectFile)
2134 :   sShareAutoCorrFile( rShareAutoCorrectFile ),
2135     sUserAutoCorrFile( rUserAutoCorrectFile ),
2136     aModifiedDate( Date::EMPTY ),
2137     aModifiedTime( tools::Time::EMPTY ),
2138     aLastCheckTime( tools::Time::EMPTY ),
2139     rAutoCorrect(rParent),
2140     nFlags(ACFlags::NONE)
2141 {
2142 }
2143 
2144 SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists()
2145 {
2146 }
2147 
2148 bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp()
2149 {
2150     // Access the file system only every 2 minutes to check the date stamp
2151     bool bRet = false;
2152 
2153     tools::Time nMinTime( 0, 2 );
2154     tools::Time nAktTime( tools::Time::SYSTEM );
2155     if( aLastCheckTime <= nAktTime) // overflow?
2156         return false;
2157     nAktTime -= aLastCheckTime;
2158     if( nAktTime > nMinTime )     // min time past
2159     {
2160         Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY );
2161         if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2162                                             &aTstDate, &aTstTime ) &&
2163             ( aModifiedDate != aTstDate || aModifiedTime != aTstTime ))
2164         {
2165             bRet = true;
2166             // then remove all the lists fast!
2167             if( (ACFlags::CplSttLstLoad & nFlags) && pCplStt_ExcptLst )
2168             {
2169                 pCplStt_ExcptLst.reset();
2170             }
2171             if( (ACFlags::WrdSttLstLoad & nFlags) && pWrdStt_ExcptLst )
2172             {
2173                 pWrdStt_ExcptLst.reset();
2174             }
2175             if( (ACFlags::ChgWordLstLoad & nFlags) && pAutocorr_List )
2176             {
2177                 pAutocorr_List.reset();
2178             }
2179             nFlags &= ~ACFlags(ACFlags::CplSttLstLoad | ACFlags::WrdSttLstLoad | ACFlags::ChgWordLstLoad );
2180         }
2181         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2182     }
2183     return bRet;
2184 }
2185 
2186 void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp(
2187                                         std::unique_ptr<SvStringsISortDtor>& rpLst,
2188                                         const OUString& sStrmName,
2189                                         tools::SvRef<SotStorage>& rStg)
2190 {
2191     if( rpLst )
2192         rpLst->clear();
2193     else
2194         rpLst.reset( new SvStringsISortDtor );
2195 
2196     {
2197         if( rStg.is() && rStg->IsStream( sStrmName ) )
2198         {
2199             tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2200                 ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) );
2201             if( ERRCODE_NONE != xStrm->GetError())
2202             {
2203                 xStrm.clear();
2204                 rStg.clear();
2205                 RemoveStream_Imp( sStrmName );
2206             }
2207             else
2208             {
2209                 uno::Reference< uno::XComponentContext > xContext =
2210                     comphelper::getProcessComponentContext();
2211 
2212                 xml::sax::InputSource aParserInput;
2213                 aParserInput.sSystemId = sStrmName;
2214 
2215                 xStrm->Seek( 0 );
2216                 xStrm->SetBufferSize( 8 * 1024 );
2217                 aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm );
2218 
2219                 // get filter
2220                 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLExceptionListImport ( xContext, *rpLst );
2221 
2222                 // connect parser and filter
2223                 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create( xContext );
2224                 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2225                 xParser->setFastDocumentHandler( xFilter );
2226                 xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
2227                 xParser->setTokenHandler( xTokenHandler );
2228 
2229                 // parse
2230                 try
2231                 {
2232                     xParser->parseStream( aParserInput );
2233                 }
2234                 catch( const xml::sax::SAXParseException& )
2235                 {
2236                     // re throw ?
2237                 }
2238                 catch( const xml::sax::SAXException& )
2239                 {
2240                     // re throw ?
2241                 }
2242                 catch( const io::IOException& )
2243                 {
2244                     // re throw ?
2245                 }
2246             }
2247         }
2248 
2249         // Set time stamp
2250         FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2251                                         &aModifiedDate, &aModifiedTime );
2252         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2253     }
2254 
2255 }
2256 
2257 void SvxAutoCorrectLanguageLists::SaveExceptList_Imp(
2258                             const SvStringsISortDtor& rLst,
2259                             const OUString& sStrmName,
2260                             tools::SvRef<SotStorage> const &rStg,
2261                             bool bConvert )
2262 {
2263     if( !rStg.is() )
2264         return;
2265 
2266     if( rLst.empty() )
2267     {
2268         rStg->Remove( sStrmName );
2269         rStg->Commit();
2270     }
2271     else
2272     {
2273         tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2274                 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2275         if( xStrm.is() )
2276         {
2277             xStrm->SetSize( 0 );
2278             xStrm->SetBufferSize( 8192 );
2279             xStrm->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2280 
2281 
2282             uno::Reference< uno::XComponentContext > xContext =
2283                 comphelper::getProcessComponentContext();
2284 
2285             uno::Reference < xml::sax::XWriter > xWriter  = xml::sax::Writer::create(xContext);
2286             uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm );
2287             xWriter->setOutputStream(xOut);
2288 
2289             uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW);
2290             rtl::Reference< SvXMLExceptionListExport > xExp( new SvXMLExceptionListExport( xContext, rLst, sStrmName, xHandler ) );
2291 
2292             xExp->exportDoc( XML_BLOCK_LIST );
2293 
2294             xStrm->Commit();
2295             if( xStrm->GetError() == ERRCODE_NONE )
2296             {
2297                 xStrm.clear();
2298                 if (!bConvert)
2299                 {
2300                     rStg->Commit();
2301                     if( ERRCODE_NONE != rStg->GetError() )
2302                     {
2303                         rStg->Remove( sStrmName );
2304                         rStg->Commit();
2305                     }
2306                 }
2307             }
2308         }
2309     }
2310 }
2311 
2312 SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList()
2313 {
2314     if( pAutocorr_List )
2315         pAutocorr_List->DeleteAndDestroyAll();
2316     else
2317         pAutocorr_List.reset( new SvxAutocorrWordList() );
2318 
2319     try
2320     {
2321         uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ );
2322         uno::Reference < io::XStream > xStrm = xStg->openStreamElement( pXMLImplAutocorr_ListStr, embed::ElementModes::READ );
2323         uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext();
2324 
2325         xml::sax::InputSource aParserInput;
2326         aParserInput.sSystemId = pXMLImplAutocorr_ListStr;
2327         aParserInput.aInputStream = xStrm->getInputStream();
2328 
2329         // get parser
2330         uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext);
2331         SAL_INFO("editeng", "AutoCorrect Import" );
2332         uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List.get(), rAutoCorrect, xStg );
2333         uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2334 
2335         // connect parser and filter
2336         xParser->setFastDocumentHandler( xFilter );
2337         xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
2338         xParser->setTokenHandler(xTokenHandler);
2339 
2340         // parse
2341         xParser->parseStream( aParserInput );
2342     }
2343     catch ( const uno::Exception& )
2344     {
2345         TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile);
2346     }
2347 
2348     // Set time stamp
2349     FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2350                                     &aModifiedDate, &aModifiedTime );
2351     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2352 
2353     return pAutocorr_List.get();
2354 }
2355 
2356 const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList()
2357 {
2358     if( !( ACFlags::ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() )
2359     {
2360         LoadAutocorrWordList();
2361         if( !pAutocorr_List )
2362         {
2363             OSL_ENSURE( false, "No valid list" );
2364             pAutocorr_List.reset( new SvxAutocorrWordList() );
2365         }
2366         nFlags |= ACFlags::ChgWordLstLoad;
2367     }
2368     return pAutocorr_List.get();
2369 }
2370 
2371 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList()
2372 {
2373     if( !( ACFlags::CplSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2374     {
2375         LoadCplSttExceptList();
2376         if( !pCplStt_ExcptLst )
2377         {
2378             OSL_ENSURE( false, "No valid list" );
2379             pCplStt_ExcptLst.reset( new SvStringsISortDtor );
2380         }
2381         nFlags |= ACFlags::CplSttLstLoad;
2382     }
2383     return pCplStt_ExcptLst.get();
2384 }
2385 
2386 bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew)
2387 {
2388     bool bRet = false;
2389     if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second )
2390     {
2391         MakeUserStorage_Impl();
2392         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2393 
2394         SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2395 
2396         xStg = nullptr;
2397         // Set time stamp
2398         FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2399                                             &aModifiedDate, &aModifiedTime );
2400         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2401         bRet = true;
2402     }
2403     return bRet;
2404 }
2405 
2406 bool SvxAutoCorrectLanguageLists::AddToWrdSttExceptList(const OUString& rNew)
2407 {
2408     bool bRet = false;
2409     SvStringsISortDtor* pExceptList = LoadWrdSttExceptList();
2410     if( !rNew.isEmpty() && pExceptList && pExceptList->insert( rNew ).second )
2411     {
2412         MakeUserStorage_Impl();
2413         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2414 
2415         SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2416 
2417         xStg = nullptr;
2418         // Set time stamp
2419         FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2420                                             &aModifiedDate, &aModifiedTime );
2421         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2422         bRet = true;
2423     }
2424     return bRet;
2425 }
2426 
2427 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList()
2428 {
2429     try
2430     {
2431         tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2432         if( xStg.is() && xStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
2433             LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2434     }
2435     catch (const css::ucb::ContentCreationException&)
2436     {
2437     }
2438     return pCplStt_ExcptLst.get();
2439 }
2440 
2441 void SvxAutoCorrectLanguageLists::SaveCplSttExceptList()
2442 {
2443     MakeUserStorage_Impl();
2444     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2445 
2446     SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2447 
2448     xStg = nullptr;
2449 
2450     // Set time stamp
2451     FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2452                                             &aModifiedDate, &aModifiedTime );
2453     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2454 }
2455 
2456 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWrdSttExceptList()
2457 {
2458     try
2459     {
2460         tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2461         if( xStg.is() && xStg->IsContained( pXMLImplWrdStt_ExcptLstStr ) )
2462             LoadXMLExceptList_Imp( pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2463     }
2464     catch (const css::ucb::ContentCreationException &)
2465     {
2466         TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWrdSttExceptList");
2467     }
2468     return pWrdStt_ExcptLst.get();
2469 }
2470 
2471 void SvxAutoCorrectLanguageLists::SaveWrdSttExceptList()
2472 {
2473     MakeUserStorage_Impl();
2474     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2475 
2476     SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2477 
2478     xStg = nullptr;
2479     // Set time stamp
2480     FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2481                                             &aModifiedDate, &aModifiedTime );
2482     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2483 }
2484 
2485 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWrdSttExceptList()
2486 {
2487     if( !( ACFlags::WrdSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2488     {
2489         LoadWrdSttExceptList();
2490         if( !pWrdStt_ExcptLst )
2491         {
2492             OSL_ENSURE( false, "No valid list" );
2493             pWrdStt_ExcptLst.reset( new SvStringsISortDtor );
2494         }
2495         nFlags |= ACFlags::WrdSttLstLoad;
2496     }
2497     return pWrdStt_ExcptLst.get();
2498 }
2499 
2500 void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName )
2501 {
2502     if( sShareAutoCorrFile != sUserAutoCorrFile )
2503     {
2504         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2505         if( xStg.is() && ERRCODE_NONE == xStg->GetError() &&
2506             xStg->IsStream( rName ) )
2507         {
2508             xStg->Remove( rName );
2509             xStg->Commit();
2510 
2511             xStg = nullptr;
2512         }
2513     }
2514 }
2515 
2516 void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl()
2517 {
2518     // The conversion needs to happen if the file is already in the user
2519     // directory and is in the old format. Additionally it needs to
2520     // happen when the file is being copied from share to user.
2521 
2522     bool bError = false, bConvert = false, bCopy = false;
2523     INetURLObject aDest;
2524     INetURLObject aSource;
2525 
2526     if (sUserAutoCorrFile != sShareAutoCorrFile )
2527     {
2528         aSource = INetURLObject ( sShareAutoCorrFile );
2529         aDest = INetURLObject ( sUserAutoCorrFile );
2530         if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) )
2531         {
2532             aDest.SetExtension ( u"bak" );
2533             bConvert = true;
2534         }
2535         bCopy = true;
2536     }
2537     else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) )
2538     {
2539         aSource = INetURLObject ( sUserAutoCorrFile );
2540         aDest = INetURLObject ( sUserAutoCorrFile );
2541         aDest.SetExtension ( u"bak" );
2542         bCopy = bConvert = true;
2543     }
2544     if (bCopy)
2545     {
2546         try
2547         {
2548             OUString sMain(aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ));
2549             sal_Int32 nSlashPos = sMain.lastIndexOf('/');
2550             sMain = sMain.copy(0, nSlashPos);
2551             ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2552             TransferInfo aInfo;
2553             aInfo.NameClash = NameClash::OVERWRITE;
2554             aInfo.NewTitle = aDest.GetLastName();
2555             aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DecodeMechanism::ToIUri );
2556             aInfo.MoveData  = false;
2557             aNewContent.executeCommand( "transfer", Any(aInfo));
2558         }
2559         catch (...)
2560         {
2561             bError = true;
2562         }
2563     }
2564     if (bConvert && !bError)
2565     {
2566         tools::SvRef<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), StreamMode::READ );
2567         tools::SvRef<SotStorage> xDstStg = new SotStorage( sUserAutoCorrFile, StreamMode::WRITE );
2568 
2569         if( xSrcStg.is() && xDstStg.is() )
2570         {
2571             std::unique_ptr<SvStringsISortDtor> pTmpWordList;
2572 
2573             if (xSrcStg->IsContained( pXMLImplWrdStt_ExcptLstStr ) )
2574                 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xSrcStg );
2575 
2576             if (pTmpWordList)
2577             {
2578                 SaveExceptList_Imp( *pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xDstStg, true );
2579                 pTmpWordList.reset();
2580             }
2581 
2582 
2583             if (xSrcStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
2584                 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg );
2585 
2586             if (pTmpWordList)
2587             {
2588                 SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true );
2589                 pTmpWordList->clear();
2590             }
2591 
2592             GetAutocorrWordList();
2593             MakeBlocklist_Imp( *xDstStg );
2594             sShareAutoCorrFile = sUserAutoCorrFile;
2595             xDstStg = nullptr;
2596             try
2597             {
2598                 ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2599                 aContent.executeCommand ( "delete", makeAny ( true ) );
2600             }
2601             catch (...)
2602             {
2603             }
2604         }
2605     }
2606     else if( bCopy && !bError )
2607         sShareAutoCorrFile = sUserAutoCorrFile;
2608 }
2609 
2610 bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg )
2611 {
2612     bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty();
2613     if( !bRemove )
2614     {
2615         tools::SvRef<SotStorageStream> refList = rStg.OpenSotStream( pXMLImplAutocorr_ListStr,
2616                     ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2617         if( refList.is() )
2618         {
2619             refList->SetSize( 0 );
2620             refList->SetBufferSize( 8192 );
2621             refList->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2622 
2623             uno::Reference< uno::XComponentContext > xContext =
2624                 comphelper::getProcessComponentContext();
2625 
2626             uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
2627             uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList );
2628             xWriter->setOutputStream(xOut);
2629 
2630             rtl::Reference< SvXMLAutoCorrectExport > xExp( new SvXMLAutoCorrectExport( xContext, pAutocorr_List.get(), pXMLImplAutocorr_ListStr, xWriter ) );
2631 
2632             xExp->exportDoc( XML_BLOCK_LIST );
2633 
2634             refList->Commit();
2635             bRet = ERRCODE_NONE == refList->GetError();
2636             if( bRet )
2637             {
2638                 refList.clear();
2639                 rStg.Commit();
2640                 if( ERRCODE_NONE != rStg.GetError() )
2641                 {
2642                     bRemove = true;
2643                     bRet = false;
2644                 }
2645             }
2646         }
2647         else
2648             bRet = false;
2649     }
2650 
2651     if( bRemove )
2652     {
2653         rStg.Remove( pXMLImplAutocorr_ListStr );
2654         rStg.Commit();
2655     }
2656 
2657     return bRet;
2658 }
2659 
2660 bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries )
2661 {
2662     // First get the current list!
2663     GetAutocorrWordList();
2664 
2665     MakeUserStorage_Impl();
2666     tools::SvRef<SotStorage> xStorage = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2667 
2668     bool bRet = xStorage.is() && ERRCODE_NONE == xStorage->GetError();
2669 
2670     if( bRet )
2671     {
2672         for (SvxAutocorrWord & aWordToDelete : aDeleteEntries)
2673         {
2674             std::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete );
2675             if( xFoundEntry )
2676             {
2677                 if( !xFoundEntry->IsTextOnly() )
2678                 {
2679                     OUString aName( aWordToDelete.GetShort() );
2680                     if (xStorage->IsOLEStorage())
2681                         aName = EncryptBlockName_Imp(aName);
2682                     else
2683                         GeneratePackageName ( aWordToDelete.GetShort(), aName );
2684 
2685                     if( xStorage->IsContained( aName ) )
2686                     {
2687                         xStorage->Remove( aName );
2688                         bRet = xStorage->Commit();
2689                     }
2690                 }
2691             }
2692         }
2693 
2694         for (const SvxAutocorrWord & aNewEntrie : aNewEntries)
2695         {
2696             SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true );
2697             std::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd );
2698             if( xRemoved )
2699             {
2700                 if( !xRemoved->IsTextOnly() )
2701                 {
2702                     // Still have to remove the Storage
2703                     OUString sStorageName( aWordToAdd.GetShort() );
2704                     if (xStorage->IsOLEStorage())
2705                         sStorageName = EncryptBlockName_Imp(sStorageName);
2706                     else
2707                         GeneratePackageName ( aWordToAdd.GetShort(), sStorageName);
2708 
2709                     if( xStorage->IsContained( sStorageName ) )
2710                         xStorage->Remove( sStorageName );
2711                 }
2712             }
2713             bRet = pAutocorr_List->Insert( std::move(aWordToAdd) );
2714 
2715             if ( !bRet )
2716             {
2717                 break;
2718             }
2719         }
2720 
2721         if ( bRet )
2722         {
2723             bRet = MakeBlocklist_Imp( *xStorage );
2724         }
2725     }
2726     return bRet;
2727 }
2728 
2729 bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong )
2730 {
2731     // First get the current list!
2732     GetAutocorrWordList();
2733 
2734     MakeUserStorage_Impl();
2735     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2736 
2737     bool bRet = xStg.is() && ERRCODE_NONE == xStg->GetError();
2738 
2739     // Update the word list
2740     if( bRet )
2741     {
2742         SvxAutocorrWord aNew(rShort, rLong, true );
2743         std::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew );
2744         if( xRemove )
2745         {
2746             if( !xRemove->IsTextOnly() )
2747             {
2748                 // Still have to remove the Storage
2749                 OUString sStgNm( rShort );
2750                 if (xStg->IsOLEStorage())
2751                     sStgNm = EncryptBlockName_Imp(sStgNm);
2752                 else
2753                     GeneratePackageName ( rShort, sStgNm);
2754 
2755                 if( xStg->IsContained( sStgNm ) )
2756                     xStg->Remove( sStgNm );
2757             }
2758         }
2759 
2760         if( pAutocorr_List->Insert( std::move(aNew) ) )
2761         {
2762             bRet = MakeBlocklist_Imp( *xStg );
2763             xStg = nullptr;
2764         }
2765         else
2766         {
2767             bRet = false;
2768         }
2769     }
2770     return bRet;
2771 }
2772 
2773 void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort,
2774                                                SfxObjectShell& rShell )
2775 {
2776     // First get the current list!
2777     GetAutocorrWordList();
2778 
2779     MakeUserStorage_Impl();
2780 
2781     try
2782     {
2783         uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE );
2784         OUString sLong;
2785         bool bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong );
2786         xStg = nullptr;
2787 
2788         // Update the word list
2789         if( bRet )
2790         {
2791             if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) )
2792             {
2793                 tools::SvRef<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2794                 MakeBlocklist_Imp( *xStor );
2795             }
2796         }
2797     }
2798     catch ( const uno::Exception& )
2799     {
2800     }
2801 }
2802 
2803 // Keep the list sorted ...
2804 struct SvxAutocorrWordList::CompareSvxAutocorrWordList
2805 {
2806     bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const
2807     {
2808         CollatorWrapper& rCmp = ::GetCollatorWrapper();
2809         return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0;
2810     }
2811 };
2812 
2813 namespace {
2814 
2815 typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType;
2816 
2817 }
2818 
2819 struct SvxAutocorrWordList::Impl
2820 {
2821 
2822     // only one of these contains the data
2823     // maSortedVector is manually sorted so we can optimise data movement
2824     mutable AutocorrWordSetType maSortedVector;
2825     mutable AutocorrWordHashType maHash; // key is 'Short'
2826 
2827     void DeleteAndDestroyAll()
2828     {
2829         maHash.clear();
2830         maSortedVector.clear();
2831     }
2832 };
2833 
2834 SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {}
2835 
2836 SvxAutocorrWordList::~SvxAutocorrWordList()
2837 {
2838 }
2839 
2840 void SvxAutocorrWordList::DeleteAndDestroyAll()
2841 {
2842     mpImpl->DeleteAndDestroyAll();
2843 }
2844 
2845 // returns true if inserted
2846 const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const
2847 {
2848     if ( mpImpl->maSortedVector.empty() ) // use the hash
2849     {
2850         OUString aShort = aWord.GetShort();
2851         auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) );
2852         if (inserted)
2853             return &(it->second);
2854         return nullptr;
2855     }
2856     else
2857     {
2858         auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList());
2859         CollatorWrapper& rCmp = ::GetCollatorWrapper();
2860         if (it == mpImpl->maSortedVector.end() || rCmp.compareString( aWord.GetShort(), it->GetShort() ) != 0)
2861         {
2862             it = mpImpl->maSortedVector.insert(it, std::move(aWord));
2863             return &*it;
2864         }
2865         return nullptr;
2866     }
2867 }
2868 
2869 void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt)
2870 {
2871     (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt ));
2872 }
2873 
2874 bool SvxAutocorrWordList::empty() const
2875 {
2876     return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty();
2877 }
2878 
2879 std::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord *pWord)
2880 {
2881 
2882     if ( mpImpl->maSortedVector.empty() ) // use the hash
2883     {
2884         AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() );
2885         if( it != mpImpl->maHash.end() )
2886         {
2887             SvxAutocorrWord pMatch = std::move(it->second);
2888             mpImpl->maHash.erase (it);
2889             return pMatch;
2890         }
2891     }
2892     else
2893     {
2894         auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList());
2895         if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it))
2896         {
2897             SvxAutocorrWord pMatch = std::move(*it);
2898             mpImpl->maSortedVector.erase (it);
2899             return pMatch;
2900         }
2901     }
2902     return std::optional<SvxAutocorrWord>();
2903 }
2904 
2905 // return the sorted contents - defer sorting until we have to.
2906 const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const
2907 {
2908     // convert from hash to set permanently
2909     if ( mpImpl->maSortedVector.empty() )
2910     {
2911         std::vector<SvxAutocorrWord> tmp;
2912         tmp.reserve(mpImpl->maHash.size());
2913         for (auto & rPair : mpImpl->maHash)
2914             tmp.emplace_back(std::move(rPair.second));
2915         mpImpl->maHash.clear();
2916         // sort twice - this gets the list into mostly-sorted order, which
2917         // reduces the number of times we need to invoke the expensive ICU collate fn.
2918         std::sort(tmp.begin(), tmp.end(),
2919             [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs )
2920             {
2921                 return lhs.GetShort() < rhs.GetShort();
2922             });
2923         // This beast has some O(N log(N)) in a terribly slow ICU collate fn.
2924         // stable_sort is twice as fast as sort in this situation because it does
2925         // fewer comparison operations.
2926         std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList());
2927         mpImpl->maSortedVector = std::move(tmp);
2928     }
2929     return mpImpl->maSortedVector;
2930 }
2931 
2932 const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd,
2933                                       const OUString &rTxt,
2934                                       sal_Int32 &rStt,
2935                                       sal_Int32 nEndPos) const
2936 {
2937     const OUString& rChk = pFnd->GetShort();
2938 
2939     sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern?
2940     sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern?
2941     sal_Int32 nSttWdPos = nEndPos;
2942 
2943     // direct replacement of keywords surrounded by colons (for example, ":name:")
2944     bool bColonNameColon = rTxt.getLength() > nEndPos &&
2945         rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":");
2946     if ( nEndPos + (bColonNameColon ? 1 : 0) >= rChk.getLength() - left_wildcard - right_wildcard )
2947     {
2948 
2949         bool bWasWordDelim = false;
2950         sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard;
2951         if (bColonNameColon)
2952             nCalcStt++;
2953         if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon ||
2954               ( nCalcStt < rStt &&
2955                 IsWordDelim( rTxt[ nCalcStt - 1 ] ))) )
2956         {
2957             TransliterationWrapper& rCmp = GetIgnoreTranslWrapper();
2958             OUString sWord = rTxt.copy(nCalcStt, rChk.getLength() - left_wildcard);
2959             if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) ))
2960             {
2961                 rStt = nCalcStt;
2962                 if (!left_wildcard)
2963                 {
2964                     // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14
2965                     if (rTxt.getLength() > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1)
2966                         return nullptr;
2967                     return pFnd;
2968                 }
2969                 // get the first word delimiter position before the matching ".*word" pattern
2970                 while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ])))
2971                     ;
2972                 if (bWasWordDelim) rStt++;
2973                 OUString left_pattern = rTxt.copy(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard);
2974                 // avoid double spaces before simple "word" replacement
2975                 left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().copy(1) : pFnd->GetLong();
2976                 if( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(rTxt.copy(rStt, nEndPos - rStt), left_pattern) ) )
2977                     return pNew;
2978             }
2979         } else
2980         // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support
2981         if ( right_wildcard )
2982         {
2983 
2984             OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) );
2985             // Get the last word delimiter position
2986             bool not_suffix;
2987 
2988             while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ])))
2989                 ;
2990             // search the first occurrence (with a left word delimitation, if needed)
2991             sal_Int32 nFndPos = -1;
2992             do {
2993                 nFndPos = rTxt.indexOf( sTmp, nFndPos + 1);
2994                 if (nFndPos == -1)
2995                     break;
2996                 not_suffix = bWasWordDelim && (nSttWdPos >= (nFndPos + sTmp.getLength()));
2997             } while ( (!left_wildcard && nFndPos && !IsWordDelim( rTxt[ nFndPos - 1 ])) || not_suffix );
2998 
2999             if ( nFndPos != -1 )
3000             {
3001                 sal_Int32 extra_repl = nFndPos + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:"
3002 
3003                 if ( left_wildcard )
3004                 {
3005                     // get the first word delimiter position before the matching ".*word.*" pattern
3006                     while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ])))
3007                         ;
3008                     if (bWasWordDelim) nFndPos++;
3009                 }
3010                 if (nEndPos + extra_repl <= nFndPos)
3011                 {
3012                     return nullptr;
3013                 }
3014                 // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations"
3015                 OUString aShort = rTxt.copy(nFndPos, nEndPos - nFndPos + extra_repl);
3016 
3017                 OUString aLong;
3018                 rStt = nFndPos;
3019                 if ( !left_wildcard )
3020                 {
3021                     sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength();
3022                     aLong = pFnd->GetLong() + (siz > 0 ? rTxt.copy(nFndPos + sTmp.getLength(), siz) : "");
3023                 } else {
3024                     OUStringBuffer buf;
3025                     do {
3026                         nSttWdPos = rTxt.indexOf( sTmp, nFndPos);
3027                         if (nSttWdPos != -1)
3028                         {
3029                             sal_Int32 nTmp(nFndPos);
3030                             while (nTmp < nSttWdPos && !IsWordDelim(rTxt[nTmp]))
3031                                 nTmp++;
3032                             if (nTmp < nSttWdPos)
3033                                 break; // word delimiter found
3034                             buf.append(rTxt.subView(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong());
3035                             nFndPos = nSttWdPos + sTmp.getLength();
3036                         }
3037                     } while (nSttWdPos != -1);
3038                     if (nEndPos - nFndPos > extra_repl)
3039                         buf.append(rTxt.subView(nFndPos, nEndPos - nFndPos));
3040                     aLong = buf.makeStringAndClear();
3041                 }
3042                 if ( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(aShort, aLong) ) )
3043                 {
3044                     if ( (rTxt.getLength() > nEndPos && IsWordDelim(rTxt[nEndPos])) || rTxt.getLength() == nEndPos )
3045                         return pNew;
3046                 }
3047             }
3048         }
3049     }
3050     return nullptr;
3051 }
3052 
3053 const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(const OUString& rTxt, sal_Int32& rStt,
3054                                                               sal_Int32 nEndPos) const
3055 {
3056     for (auto const& elem : mpImpl->maHash)
3057     {
3058         if( const SvxAutocorrWord *pTmp = WordMatches( &elem.second, rTxt, rStt, nEndPos ) )
3059             return pTmp;
3060     }
3061 
3062     for (auto const& elem : mpImpl->maSortedVector)
3063     {
3064         if( const SvxAutocorrWord *pTmp = WordMatches( &elem, rTxt, rStt, nEndPos ) )
3065             return pTmp;
3066     }
3067     return nullptr;
3068 }
3069 
3070 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
3071