xref: /core/editeng/source/misc/svxacorr.cxx (revision d1be3d80)
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <memory>
21 #include <string_view>
22 #include <sal/config.h>
23 
24 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
25 #include <com/sun/star/embed/XStorage.hpp>
26 #include <com/sun/star/io/IOException.hpp>
27 #include <com/sun/star/io/XStream.hpp>
28 #include <tools/urlobj.hxx>
29 #include <i18nlangtag/mslangid.hxx>
30 #include <i18nutil/transliteration.hxx>
31 #include <sal/log.hxx>
32 #include <osl/diagnose.h>
33 #include <vcl/svapp.hxx>
34 #include <vcl/settings.hxx>
35 #include <svl/fstathelper.hxx>
36 #include <svl/urihelper.hxx>
37 #include <unotools/charclass.hxx>
38 #include <com/sun/star/i18n/UnicodeType.hpp>
39 #include <unotools/collatorwrapper.hxx>
40 #include <com/sun/star/i18n/UnicodeScript.hpp>
41 #include <com/sun/star/i18n/OrdinalSuffix.hpp>
42 #include <unotools/localedatawrapper.hxx>
43 #include <unotools/transliterationwrapper.hxx>
44 #include <comphelper/processfactory.hxx>
45 #include <comphelper/storagehelper.hxx>
46 #include <comphelper/string.hxx>
47 #include <editeng/editids.hrc>
48 #include <sot/storage.hxx>
49 #include <editeng/udlnitem.hxx>
50 #include <editeng/wghtitem.hxx>
51 #include <editeng/postitem.hxx>
52 #include <editeng/crossedoutitem.hxx>
53 #include <editeng/escapementitem.hxx>
54 #include <editeng/svxacorr.hxx>
55 #include <editeng/unolingu.hxx>
56 #include <vcl/window.hxx>
57 #include <com/sun/star/xml/sax/InputSource.hpp>
58 #include <com/sun/star/xml/sax/FastParser.hpp>
59 #include <com/sun/star/xml/sax/Writer.hpp>
60 #include <com/sun/star/xml/sax/SAXParseException.hpp>
61 #include <unotools/streamwrap.hxx>
62 #include "SvXMLAutoCorrectImport.hxx"
63 #include "SvXMLAutoCorrectExport.hxx"
64 #include "SvXMLAutoCorrectTokenHandler.hxx"
65 #include <ucbhelper/content.hxx>
66 #include <com/sun/star/ucb/ContentCreationException.hpp>
67 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
68 #include <com/sun/star/ucb/TransferInfo.hpp>
69 #include <com/sun/star/ucb/NameClash.hpp>
70 #include <tools/diagnose_ex.h>
71 #include <xmloff/xmltoken.hxx>
72 #include <unordered_map>
73 #include <rtl/character.hxx>
74 
75 using namespace ::com::sun::star::ucb;
76 using namespace ::com::sun::star::uno;
77 using namespace ::com::sun::star::xml::sax;
78 using namespace ::com::sun::star;
79 using namespace ::xmloff::token;
80 using namespace ::utl;
81 
82 namespace {
83 
84 enum class Flags {
85     NONE            = 0x00,
86     FullStop        = 0x01,
87     ExclamationMark = 0x02,
88     QuestionMark    = 0x04,
89 };
90 
91 }
92 
93 namespace o3tl {
94     template<> struct typed_flags<Flags> : is_typed_flags<Flags, 0x07> {};
95 }
96 const sal_Unicode cNonBreakingSpace = 0xA0; // UNICODE code for no break space
97 
98 constexpr OUStringLiteral pXMLImplWrdStt_ExcptLstStr = u"WordExceptList.xml";
99 constexpr OUStringLiteral pXMLImplCplStt_ExcptLstStr = u"SentenceExceptList.xml";
100 constexpr OUStringLiteral pXMLImplAutocorr_ListStr = u"DocumentList.xml";
101 
102 const char
103     /* also at these beginnings - Brackets and all kinds of begin characters */
104     sImplSttSkipChars[] = "\"\'([{\x83\x84\x89\x91\x92\x93\x94",
105     /* also at these ends - Brackets and all kinds of begin characters */
106     sImplEndSkipChars[] = "\"\')]}\x83\x84\x89\x91\x92\x93\x94";
107 
108 static OUString EncryptBlockName_Imp(const OUString& rName);
109 
110 static bool NonFieldWordDelim( const sal_Unicode c )
111 {
112     return ' ' == c || '\t' == c || 0x0a == c ||
113             cNonBreakingSpace == c || 0x2011 == c;
114 }
115 
116 static bool IsWordDelim( const sal_Unicode c )
117 {
118     return c == 0x1 || NonFieldWordDelim(c);
119 }
120 
121 
122 static bool IsLowerLetter( sal_Int32 nCharType )
123 {
124     return CharClass::isLetterType( nCharType ) &&
125            ( css::i18n::KCharacterType::LOWER & nCharType);
126 }
127 
128 static bool IsUpperLetter( sal_Int32 nCharType )
129 {
130     return CharClass::isLetterType( nCharType ) &&
131             ( css::i18n::KCharacterType::UPPER & nCharType);
132 }
133 
134 static bool lcl_IsUnsupportedUnicodeChar( CharClass const & rCC, const OUString& rTxt,
135                                    sal_Int32 nStt, sal_Int32 nEnd )
136 {
137     for( ; nStt < nEnd; ++nStt )
138     {
139         css::i18n::UnicodeScript nScript = rCC.getScript( rTxt, nStt );
140         switch( nScript )
141         {
142             case css::i18n::UnicodeScript_kCJKRadicalsSupplement:
143             case css::i18n::UnicodeScript_kHangulJamo:
144             case css::i18n::UnicodeScript_kCJKSymbolPunctuation:
145             case css::i18n::UnicodeScript_kHiragana:
146             case css::i18n::UnicodeScript_kKatakana:
147             case css::i18n::UnicodeScript_kHangulCompatibilityJamo:
148             case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth:
149             case css::i18n::UnicodeScript_kCJKCompatibility:
150             case css::i18n::UnicodeScript_k_CJKUnifiedIdeographsExtensionA:
151             case css::i18n::UnicodeScript_kCJKUnifiedIdeograph:
152             case css::i18n::UnicodeScript_kHangulSyllable:
153             case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph:
154             case css::i18n::UnicodeScript_kHalfwidthFullwidthForm:
155                 return true;
156             default: ; //do nothing
157         }
158     }
159     return false;
160 }
161 
162 static bool lcl_IsSymbolChar( CharClass const & rCC, const OUString& rTxt,
163                                   sal_Int32 nStt, sal_Int32 nEnd )
164 {
165     for( ; nStt < nEnd; ++nStt )
166     {
167         if( css::i18n::UnicodeType::PRIVATE_USE == rCC.getType( rTxt, nStt ))
168             return true;
169     }
170     return false;
171 }
172 
173 static bool lcl_IsInAsciiArr( const char* pArr, const sal_Unicode c )
174 {
175     // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks
176     if ( 0x2018 <= c && c <= 0x201F && (pArr == sImplSttSkipChars || pArr == sImplEndSkipChars) )
177         return true;
178 
179     bool bRet = false;
180     for( ; *pArr; ++pArr )
181         if( *pArr == c )
182         {
183             bRet = true;
184             break;
185         }
186     return bRet;
187 }
188 
189 SvxAutoCorrDoc::~SvxAutoCorrDoc()
190 {
191 }
192 
193 // Called by the functions:
194 //  - FnCapitalStartWord
195 //  - FnCapitalStartSentence
196 // after the exchange of characters. Then the words, if necessary, can be inserted
197 // into the exception list.
198 void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&,
199                                         sal_Unicode )
200 {
201 }
202 
203 LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32 ) const
204 {
205     return LANGUAGE_SYSTEM;
206 }
207 
208 static const LanguageTag& GetAppLang()
209 {
210     return Application::GetSettings().GetLanguageTag();
211 }
212 
213 /// Never use an unresolved LANGUAGE_SYSTEM.
214 static LanguageType GetDocLanguage( const SvxAutoCorrDoc& rDoc, sal_Int32 nPos )
215 {
216     LanguageType eLang = rDoc.GetLanguage( nPos );
217     if (eLang == LANGUAGE_SYSTEM)
218         eLang = GetAppLang().getLanguageType();     // the current work locale
219     return eLang;
220 }
221 
222 static LocaleDataWrapper& GetLocaleDataWrapper( LanguageType nLang )
223 {
224     static LocaleDataWrapper aLclDtWrp( GetAppLang() );
225     LanguageTag aLcl( nLang );
226     const LanguageTag& rLcl = aLclDtWrp.getLoadedLanguageTag();
227     if( aLcl != rLcl )
228         aLclDtWrp.setLanguageTag( aLcl );
229     return aLclDtWrp;
230 }
231 static TransliterationWrapper& GetIgnoreTranslWrapper()
232 {
233     static int bIsInit = 0;
234     static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(),
235                 TransliterationFlags::IGNORE_KANA |
236                 TransliterationFlags::IGNORE_WIDTH );
237     if( !bIsInit )
238     {
239         aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() );
240         bIsInit = 1;
241     }
242     return aWrp;
243 }
244 static CollatorWrapper& GetCollatorWrapper()
245 {
246     static CollatorWrapper aCollWrp = [&]()
247     {
248         CollatorWrapper tmp( ::comphelper::getProcessComponentContext() );
249         tmp.loadDefaultCollator( GetAppLang().getLocale(), 0 );
250         return tmp;
251     }();
252     return aCollWrp;
253 }
254 
255 bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar )
256 {
257     return  cChar == '\0' || cChar == '\t' || cChar == 0x0a ||
258             cChar == ' '  || cChar == '\'' || cChar == '\"' ||
259             cChar == '*'  || cChar == '_'  || cChar == '%' ||
260             cChar == '.'  || cChar == ','  || cChar == ';' ||
261             cChar == ':'  || cChar == '?' || cChar == '!' ||
262             cChar == '<'  || cChar == '>' ||
263             cChar == '/'  || cChar == '-';
264 }
265 
266 namespace
267 {
268     bool IsCompoundWordDelimChar(sal_Unicode cChar)
269     {
270         return  cChar == '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar);
271     }
272 }
273 
274 bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar )
275 {
276     return cChar == '%' || cChar == ';' || cChar == ':'  || cChar == '?' || cChar == '!' ||
277         cChar == '/' /*case for the urls exception*/;
278 }
279 
280 ACFlags SvxAutoCorrect::GetDefaultFlags()
281 {
282     ACFlags nRet = ACFlags::Autocorrect
283                     | ACFlags::CapitalStartSentence
284                     | ACFlags::CapitalStartWord
285                     | ACFlags::ChgOrdinalNumber
286                     | ACFlags::ChgToEnEmDash
287                     | ACFlags::AddNonBrkSpace
288                     | ACFlags::TransliterateRTL
289                     | ACFlags::ChgAngleQuotes
290                     | ACFlags::ChgWeightUnderl
291                     | ACFlags::SetINetAttr
292                     | ACFlags::ChgQuotes
293                     | ACFlags::SaveWordCplSttLst
294                     | ACFlags::SaveWordWrdSttLst
295                     | ACFlags::CorrectCapsLock;
296     LanguageType eLang = GetAppLang().getLanguageType();
297     if( eLang.anyOf(
298         LANGUAGE_ENGLISH,
299         LANGUAGE_ENGLISH_US,
300         LANGUAGE_ENGLISH_UK,
301         LANGUAGE_ENGLISH_AUS,
302         LANGUAGE_ENGLISH_CAN,
303         LANGUAGE_ENGLISH_NZ,
304         LANGUAGE_ENGLISH_EIRE,
305         LANGUAGE_ENGLISH_SAFRICA,
306         LANGUAGE_ENGLISH_JAMAICA,
307         LANGUAGE_ENGLISH_CARIBBEAN))
308         nRet &= ~ACFlags(ACFlags::ChgQuotes|ACFlags::ChgSglQuotes);
309     return nRet;
310 }
311 
312 constexpr sal_Unicode cEmDash = 0x2014;
313 constexpr sal_Unicode cEnDash = 0x2013;
314 constexpr sal_Unicode cApostrophe = 0x2019;
315 constexpr sal_Unicode cLeftDoubleAngleQuote = 0xAB;
316 constexpr sal_Unicode cRightDoubleAngleQuote = 0xBB;
317 constexpr sal_Unicode cLeftSingleAngleQuote = 0x2039;
318 constexpr sal_Unicode cRightSingleAngleQuote = 0x203A;
319 // stop characters for searching preceding quotes
320 // (the first character is also the opening quote we are looking for)
321 const sal_Unicode aStopDoubleAngleQuoteStart[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,,
322 const sal_Unicode aStopDoubleAngleQuoteEnd[] = { cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0x201D, 0x201E, 0 }; // preceding >>
323 // preceding << for Romanian, handle also alternative primary closing quotation mark U+201C
324 const sal_Unicode aStopDoubleAngleQuoteEndRo[] = { cLeftDoubleAngleQuote, cRightDoubleAngleQuote, 0x201D, 0x201E, 0x201C, 0 };
325 const sal_Unicode aStopSingleQuoteEnd[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 };
326 const sal_Unicode aStopSingleQuoteEndRuUa[] = { 0x201E, 0x201C, cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0 };
327 
328 SvxAutoCorrect::SvxAutoCorrect( const OUString& rShareAutocorrFile,
329                                 const OUString& rUserAutocorrFile )
330     : sShareAutoCorrFile( rShareAutocorrFile )
331     , sUserAutoCorrFile( rUserAutocorrFile )
332     , eCharClassLang( LANGUAGE_DONTKNOW )
333     , nFlags(SvxAutoCorrect::GetDefaultFlags())
334     , cStartDQuote( 0 )
335     , cEndDQuote( 0 )
336     , cStartSQuote( 0 )
337     , cEndSQuote( 0 )
338 {
339 }
340 
341 SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy )
342     : sShareAutoCorrFile( rCpy.sShareAutoCorrFile )
343     , sUserAutoCorrFile( rCpy.sUserAutoCorrFile )
344     , aSwFlags( rCpy.aSwFlags )
345     , eCharClassLang(rCpy.eCharClassLang)
346     , nFlags( rCpy.nFlags & ~ACFlags(ACFlags::ChgWordLstLoad|ACFlags::CplSttLstLoad|ACFlags::WrdSttLstLoad))
347     , cStartDQuote( rCpy.cStartDQuote )
348     , cEndDQuote( rCpy.cEndDQuote )
349     , cStartSQuote( rCpy.cStartSQuote )
350     , cEndSQuote( rCpy.cEndSQuote )
351 {
352 }
353 
354 
355 SvxAutoCorrect::~SvxAutoCorrect()
356 {
357 }
358 
359 void SvxAutoCorrect::GetCharClass_( LanguageType eLang )
360 {
361     pCharClass.reset( new CharClass( LanguageTag( eLang)) );
362     eCharClassLang = eLang;
363 }
364 
365 void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag, bool bOn )
366 {
367     ACFlags nOld = nFlags;
368     nFlags = bOn ? nFlags | nFlag
369                  : nFlags & ~nFlag;
370 
371     if( !bOn )
372     {
373         if( (nOld & ACFlags::CapitalStartSentence) != (nFlags & ACFlags::CapitalStartSentence) )
374             nFlags &= ~ACFlags::CplSttLstLoad;
375         if( (nOld & ACFlags::CapitalStartWord) != (nFlags & ACFlags::CapitalStartWord) )
376             nFlags &= ~ACFlags::WrdSttLstLoad;
377         if( (nOld & ACFlags::Autocorrect) != (nFlags & ACFlags::Autocorrect) )
378             nFlags &= ~ACFlags::ChgWordLstLoad;
379     }
380 }
381 
382 
383 // Correct TWo INitial CApitals
384 void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
385                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
386                                     LanguageType eLang )
387 {
388     CharClass& rCC = GetCharClass( eLang );
389 
390     // Delete all non alphanumeric. Test the characters at the beginning/end of
391     // the word ( recognizes: "(min.", "/min.", and so on.)
392     for( ; nSttPos < nEndPos; ++nSttPos )
393         if( rCC.isLetterNumeric( rTxt, nSttPos ))
394             break;
395     for( ; nSttPos < nEndPos; --nEndPos )
396         if( rCC.isLetterNumeric( rTxt, nEndPos - 1 ))
397             break;
398 
399     // Is the word a compounded word separated by delimiters?
400     // If so, keep track of all delimiters so each constituent
401     // word can be checked for two initial capital letters.
402     std::deque<sal_Int32> aDelimiters;
403 
404     // Always check for two capitals at the beginning
405     // of the entire word, so start at nSttPos.
406     aDelimiters.push_back(nSttPos);
407 
408     // Find all compound word delimiters
409     for (sal_Int32 n = nSttPos; n < nEndPos; ++n)
410     {
411         if (IsCompoundWordDelimChar(rTxt[ n ]))
412         {
413             aDelimiters.push_back( n + 1 ); // Get position of char after delimiter
414         }
415     }
416 
417     // Decide where to put the terminating delimiter.
418     // If the last AutoCorrect char was a newline, then the AutoCorrect
419     // char will not be included in rTxt.
420     // If the last AutoCorrect char was not a newline, then the AutoCorrect
421     // character will be the last character in rTxt.
422     if (!IsCompoundWordDelimChar(rTxt[nEndPos-1]))
423         aDelimiters.push_back(nEndPos);
424 
425     // Iterate through the word and all words that compose it.
426     // Two capital letters at the beginning of word?
427     for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI)
428     {
429         nSttPos = aDelimiters[nI];
430         nEndPos = aDelimiters[nI + 1];
431 
432         if( nSttPos+2 < nEndPos &&
433             IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) &&
434             IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) &&
435             // Is the third character a lower case
436             IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) &&
437             // Do not replace special attributes
438             0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ])
439         {
440             // test if the word is in an exception list
441             OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 ));
442             if( !FindInWrdSttExceptList(eLang, sWord) )
443             {
444                 // Check that word isn't correctly spelt before correcting:
445                 css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller =
446                     LinguMgr::GetSpellChecker();
447                 if( xSpeller->hasLanguage(static_cast<sal_uInt16>(eLang)) )
448                 {
449                     Sequence< css::beans::PropertyValue > aEmptySeq;
450                     if (xSpeller->isValid(sWord, static_cast<sal_uInt16>(eLang), aEmptySeq))
451                     {
452                         return;
453                     }
454                 }
455                 sal_Unicode cSave = rTxt[ nSttPos ];
456                 OUString sChar = rCC.lowercase( OUString(cSave) );
457                 if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ))
458                 {
459                     if( ACFlags::SaveWordWrdSttLst & nFlags )
460                         rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave );
461                 }
462             }
463         }
464     }
465 }
466 
467 // Format ordinal numbers suffixes (1st -> 1^st)
468 bool SvxAutoCorrect::FnChgOrdinalNumber(
469     SvxAutoCorrDoc& rDoc, const OUString& rTxt,
470     sal_Int32 nSttPos, sal_Int32 nEndPos,
471     LanguageType eLang)
472 {
473     // 1st, 2nd, 3rd, 4 - 0th
474     // 201th or 201st
475     // 12th or 12nd
476     bool bChg = false;
477 
478     // In some languages ordinal suffixes should never be
479     // changed to superscript. Let's break for those languages.
480     if (!eLang.anyOf(
481          LANGUAGE_SWEDISH,
482          LANGUAGE_SWEDISH_FINLAND))
483     {
484         CharClass& rCC = GetCharClass(eLang);
485 
486         for (; nSttPos < nEndPos; ++nSttPos)
487             if (!lcl_IsInAsciiArr(sImplSttSkipChars, rTxt[nSttPos]))
488                 break;
489         for (; nSttPos < nEndPos; --nEndPos)
490             if (!lcl_IsInAsciiArr(sImplEndSkipChars, rTxt[nEndPos - 1]))
491                 break;
492 
493 
494         // Get the last number in the string to check
495         sal_Int32 nNumEnd = nEndPos;
496         bool bFoundEnd = false;
497         bool isValidNumber = true;
498         sal_Int32 i = nEndPos;
499         while (i > nSttPos)
500         {
501             i--;
502             bool isDigit = rCC.isDigit(rTxt, i);
503             if (bFoundEnd)
504                 isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i));
505 
506             if (isDigit && !bFoundEnd)
507             {
508                 bFoundEnd = true;
509                 nNumEnd = i;
510             }
511         }
512 
513         if (bFoundEnd && isValidNumber) {
514             sal_Int32 nNum = rTxt.copy(nSttPos, nNumEnd - nSttPos + 1).toInt32();
515 
516             // Check if the characters after that number correspond to the ordinal suffix
517             uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix
518                 = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext());
519 
520             const uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale());
521             for (OUString const & sSuffix : aSuffixes)
522             {
523                 OUString sEnd = rTxt.copy(nNumEnd + 1, nEndPos - nNumEnd - 1);
524 
525                 if (sSuffix == sEnd)
526                 {
527                     // Check if the ordinal suffix has to be set as super script
528                     if (rCC.isLetter(sSuffix))
529                     {
530                         // Do the change
531                         SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER,
532                             DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT);
533                         rDoc.SetAttr(nNumEnd + 1, nEndPos,
534                             SID_ATTR_CHAR_ESCAPEMENT,
535                             aSvxEscapementItem);
536                         bChg = true;
537                     }
538                 }
539             }
540         }
541     }
542     return bChg;
543 }
544 
545 // Replace dashes
546 bool SvxAutoCorrect::FnChgToEnEmDash(
547                                 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
548                                 sal_Int32 nSttPos, sal_Int32 nEndPos,
549                                 LanguageType eLang )
550 {
551     bool bRet = false;
552     CharClass& rCC = GetCharClass( eLang );
553     if (eLang == LANGUAGE_SYSTEM)
554         eLang = GetAppLang().getLanguageType();
555     bool bAlwaysUseEmDash = (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN);
556 
557     // replace " - " or " --" with "enDash"
558     if( 1 < nSttPos && 1 <= nEndPos - nSttPos )
559     {
560         sal_Unicode cCh = rTxt[ nSttPos ];
561         if( '-' == cCh )
562         {
563             if( 1 < nEndPos - nSttPos &&
564                 ' ' == rTxt[ nSttPos-1 ] &&
565                 '-' == rTxt[ nSttPos+1 ])
566             {
567                 sal_Int32 n;
568                 for( n = nSttPos+2; n < nEndPos && lcl_IsInAsciiArr(
569                             sImplSttSkipChars,(cCh = rTxt[ n ]));
570                         ++n )
571                     ;
572 
573                 // found: " --[<AnySttChars>][A-z0-9]
574                 if( rCC.isLetterNumeric( OUString(cCh) ) )
575                 {
576                     for( n = nSttPos-1; n && lcl_IsInAsciiArr(
577                             sImplEndSkipChars,(cCh = rTxt[ --n ])); )
578                         ;
579 
580                     // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9]
581                     if( rCC.isLetterNumeric( OUString(cCh) ))
582                     {
583                         rDoc.Delete( nSttPos, nSttPos + 2 );
584                         rDoc.Insert( nSttPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) );
585                         bRet = true;
586                     }
587                 }
588             }
589         }
590         else if( 3 < nSttPos &&
591                  ' ' == rTxt[ nSttPos-1 ] &&
592                  '-' == rTxt[ nSttPos-2 ])
593         {
594             sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2;
595             if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) )
596             {
597                 --nTmpPos;
598                 ++nLen;
599                 cCh = rTxt[ nTmpPos-1 ];
600             }
601             if( ' ' == cCh )
602             {
603                 for( n = nSttPos; n < nEndPos && lcl_IsInAsciiArr(
604                             sImplSttSkipChars,(cCh = rTxt[ n ]));
605                         ++n )
606                     ;
607 
608                 // found: " - [<AnySttChars>][A-z0-9]
609                 if( rCC.isLetterNumeric( OUString(cCh) ) )
610                 {
611                     cCh = ' ';
612                     for( n = nTmpPos-1; n && lcl_IsInAsciiArr(
613                             sImplEndSkipChars,(cCh = rTxt[ --n ])); )
614                             ;
615                     // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9]
616                     if( rCC.isLetterNumeric( OUString(cCh) ))
617                     {
618                         rDoc.Delete( nTmpPos, nTmpPos + nLen );
619                         rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) );
620                         bRet = true;
621                     }
622                 }
623             }
624         }
625     }
626 
627     // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash"
628     // [0-9]--[0-9] double dash always replaced with "enDash"
629     // Finnish and Hungarian use enDash instead of emDash.
630     bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH);
631     if( 4 <= nEndPos - nSttPos )
632     {
633         OUString sTmp( rTxt.copy( nSttPos, nEndPos - nSttPos ) );
634         sal_Int32 nFndPos = sTmp.indexOf("--");
635         if( nFndPos != -1 && nFndPos &&
636             nFndPos + 2 < sTmp.getLength() &&
637             ( rCC.isLetterNumeric( sTmp, nFndPos - 1 ) ||
638               lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nFndPos - 1 ] )) &&
639             ( rCC.isLetterNumeric( sTmp, nFndPos + 2 ) ||
640             lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nFndPos + 2 ] )))
641         {
642             nSttPos = nSttPos + nFndPos;
643             rDoc.Delete( nSttPos, nSttPos + 2 );
644             rDoc.Insert( nSttPos, (bEnDash || (rCC.isDigit( sTmp, nFndPos - 1 ) &&
645                 rCC.isDigit( sTmp, nFndPos + 2 )) ? OUString(cEnDash) : OUString(cEmDash)) );
646             bRet = true;
647         }
648     }
649     return bRet;
650 }
651 
652 // Add non-breaking space before specific punctuation marks in French text
653 bool SvxAutoCorrect::FnAddNonBrkSpace(
654                                 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
655                                 sal_Int32 nEndPos,
656                                 LanguageType eLang, bool& io_bNbspRunNext )
657 {
658     bool bRet = false;
659 
660     CharClass& rCC = GetCharClass( eLang );
661 
662     if ( rCC.getLanguageTag().getLanguage() == "fr" )
663     {
664         bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA");
665         OUString allChars = ":;?!%";
666         OUString chars( allChars );
667         if ( bFrCA )
668             chars = ":";
669 
670         sal_Unicode cChar = rTxt[ nEndPos ];
671         bool bHasSpace = chars.indexOf( cChar ) != -1;
672         bool bIsSpecial = allChars.indexOf( cChar ) != -1;
673         if ( bIsSpecial )
674         {
675             // Get the last word delimiter position
676             sal_Int32 nSttWdPos = nEndPos;
677             bool bWasWordDelim = false;
678             while( nSttWdPos )
679             {
680                 bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]);
681                 if (bWasWordDelim)
682                     break;
683             }
684 
685             //See if the text is the start of a protocol string, e.g. have text of
686             //"http" see if it is the start of "http:" and if so leave it alone
687             sal_Int32 nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0);
688             sal_Int32 nProtocolLen = nEndPos - nSttWdPos + 1;
689             if (nIndex + nProtocolLen <= rTxt.getLength())
690             {
691                 if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid)
692                     return false;
693             }
694 
695             // Check the presence of "://" in the word
696             sal_Int32 nStrPos = rTxt.indexOf( "://", nSttWdPos + 1 );
697             if ( nStrPos == -1 && nEndPos > 0 )
698             {
699                 // Check the previous char
700                 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
701                 if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' )
702                 {
703                     // Remove any previous normal space
704                     sal_Int32 nPos = nEndPos - 1;
705                     while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace )
706                     {
707                         if ( nPos == 0 ) break;
708                         nPos--;
709                         cPrevChar = rTxt[ nPos ];
710                     }
711 
712                     nPos++;
713                     if ( nEndPos - nPos > 0 )
714                         rDoc.Delete( nPos, nEndPos );
715 
716                     // Add the non-breaking space at the end pos
717                     if ( bHasSpace )
718                         rDoc.Insert( nPos, OUString(cNonBreakingSpace) );
719                     io_bNbspRunNext = true;
720                     bRet = true;
721                 }
722                 else if ( chars.indexOf( cPrevChar ) != -1 )
723                     io_bNbspRunNext = true;
724             }
725         }
726         else if ( cChar == '/' && nEndPos > 1 && rTxt.getLength() > (nEndPos - 1) )
727         {
728             // Remove the hardspace right before to avoid formatting URLs
729             sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
730             sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ];
731             if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace )
732             {
733                 rDoc.Delete( nEndPos - 2, nEndPos - 1 );
734                 bRet = true;
735             }
736         }
737     }
738 
739     return bRet;
740 }
741 
742 // URL recognition
743 bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
744                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
745                                     LanguageType eLang )
746 {
747     OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos,
748                                                 GetCharClass( eLang ) ));
749     bool bRet = !sURL.isEmpty();
750     if( bRet )          // so, set attribute:
751         rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
752     return bRet;
753 }
754 
755 // Automatic *bold*, /italic/, -strikeout- and _underline_
756 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
757                                         sal_Int32 nEndPos )
758 {
759     // Condition:
760     //  at the beginning:   _, *, / or ~ after Space with the following !Space
761     //  at the end:         _, *, / or ~ before Space (word delimiter?)
762 
763     sal_Unicode cInsChar = rTxt[ nEndPos ];  // underline, bold, italic or strikeout
764     if( ++nEndPos != rTxt.getLength() &&
765         !IsWordDelim( rTxt[ nEndPos ] ) )
766         return false;
767 
768     --nEndPos;
769 
770     bool bAlphaNum = false;
771     sal_Int32 nPos = nEndPos;
772     sal_Int32  nFndPos = -1;
773     CharClass& rCC = GetCharClass( LANGUAGE_SYSTEM );
774 
775     while( nPos )
776     {
777         switch( sal_Unicode c = rTxt[ --nPos ] )
778         {
779         case '_':
780         case '-':
781         case '/':
782         case '*':
783             if( c == cInsChar )
784             {
785                 if( bAlphaNum && nPos+1 < nEndPos && ( !nPos ||
786                     IsWordDelim( rTxt[ nPos-1 ])) &&
787                     !IsWordDelim( rTxt[ nPos+1 ]))
788                         nFndPos = nPos;
789                 else
790                     // Condition is not satisfied, so cancel
791                     nFndPos = -1;
792                 nPos = 0;
793             }
794             break;
795         default:
796             if( !bAlphaNum )
797                 bAlphaNum = rCC.isLetterNumeric( rTxt, nPos );
798         }
799     }
800 
801     if( -1 != nFndPos )
802     {
803         // first delete the Character at the end - this allows insertion
804         // of an empty hint in SetAttr which would be removed by Delete
805         // (fdo#62536, AUTOFMT in Writer)
806         rDoc.Delete( nEndPos, nEndPos + 1 );
807         rDoc.Delete( nFndPos, nFndPos + 1 );
808         // Span the Attribute over the area
809         // the end.
810         if( '*' == cInsChar )           // Bold
811         {
812             SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT );
813             rDoc.SetAttr( nFndPos, nEndPos - 1,
814                           SID_ATTR_CHAR_WEIGHT,
815                           aSvxWeightItem);
816         }
817         else if( '/' == cInsChar )           // Italic
818         {
819             SvxPostureItem aSvxPostureItem( ITALIC_NORMAL, SID_ATTR_CHAR_POSTURE );
820             rDoc.SetAttr( nFndPos, nEndPos - 1,
821                           SID_ATTR_CHAR_POSTURE,
822                           aSvxPostureItem);
823         }
824         else if( '-' == cInsChar )           // Strikeout
825         {
826             SvxCrossedOutItem aSvxCrossedOutItem( STRIKEOUT_SINGLE, SID_ATTR_CHAR_STRIKEOUT );
827             rDoc.SetAttr( nFndPos, nEndPos - 1,
828                           SID_ATTR_CHAR_STRIKEOUT,
829                           aSvxCrossedOutItem);
830         }
831         else                            // Underline
832         {
833             SvxUnderlineItem aSvxUnderlineItem( LINESTYLE_SINGLE, SID_ATTR_CHAR_UNDERLINE );
834             rDoc.SetAttr( nFndPos, nEndPos - 1,
835                           SID_ATTR_CHAR_UNDERLINE,
836                           aSvxUnderlineItem);
837         }
838       }
839 
840     return -1 != nFndPos;
841 }
842 
843 // Capitalize first letter of every sentence
844 void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc& rDoc,
845                                     const OUString& rTxt, bool bNormalPos,
846                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
847                                     LanguageType eLang )
848 {
849 
850     if( rTxt.isEmpty() || nEndPos <= nSttPos )
851         return;
852 
853     CharClass& rCC = GetCharClass( eLang );
854     OUString aText( rTxt );
855     const sal_Unicode *pStart = aText.getStr(),
856                       *pStr = pStart + nEndPos,
857                       *pWordStt = nullptr,
858                       *pDelim = nullptr;
859 
860     bool bAtStart = false;
861     do {
862         --pStr;
863         if (rCC.isLetter(aText, pStr - pStart))
864         {
865             if( !pWordStt )
866                 pDelim = pStr+1;
867             pWordStt = pStr;
868         }
869         else if (pWordStt && !rCC.isDigit(aText, pStr - pStart))
870         {
871             if( (lcl_IsInAsciiArr( "-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words
872                 pWordStt - 1 == pStr &&
873                 // Installation at beginning of paragraph. Replaced < by <= (#i38971#)
874                 (pStart + 1) <= pStr &&
875                 rCC.isLetter(aText, pStr-1 - pStart))
876                 pWordStt = --pStr;
877             else
878                 break;
879         }
880         bAtStart = (pStart == pStr);
881     } while( !bAtStart );
882 
883     if (!pWordStt)
884         return;    // no character to be replaced
885 
886 
887     if (rCC.isDigit(aText, pStr - pStart))
888         return; // already ok
889 
890     if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart)))
891         return; // already ok
892 
893     //See if the text is the start of a protocol string, e.g. have text of
894     //"http" see if it is the start of "http:" and if so leave it alone
895     sal_Int32 nIndex = pWordStt - pStart;
896     sal_Int32 nProtocolLen = pDelim - pWordStt + 1;
897     if (nIndex + nProtocolLen <= rTxt.getLength())
898     {
899         if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid)
900             return; // already ok
901     }
902 
903     if (0x1 == *pWordStt || 0x2 == *pWordStt)
904         return; // already ok
905 
906     // Only capitalize, if string before specified characters is long enough
907     if( *pDelim && 2 >= pDelim - pWordStt &&
908         lcl_IsInAsciiArr( ".-)>", *pDelim ) )
909         return;
910 
911     // tdf#59666 don't capitalize single Greek letters (except in Greek texts)
912     if ( 1 == pDelim - pWordStt && 0x03B1 <= *pWordStt && *pWordStt <= 0x03C9 && eLang != LANGUAGE_GREEK )
913         return;
914 
915     if( !bAtStart ) // Still no beginning of a paragraph?
916     {
917         if (NonFieldWordDelim(*pStr))
918         {
919             for (;;)
920             {
921                 bAtStart = (pStart == pStr--);
922                 if (bAtStart || !NonFieldWordDelim(*pStr))
923                     break;
924             }
925         }
926         // Asian full stop, full width full stop, full width exclamation mark
927         // and full width question marks are treated as word delimiters
928         else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr &&
929                   0xFF1F != *pStr )
930             return; // no valid separator -> no replacement
931     }
932 
933     // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
934     if (FindInWrdSttExceptList(eLang, OUString(pWordStt, pDelim - pWordStt)))
935         return;
936 
937     if( bAtStart )  // at the beginning of a paragraph?
938     {
939         // Check out the previous paragraph, if it exists.
940         // If so, then check to paragraph separator at the end.
941         OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos);
942         if (!pPrevPara)
943         {
944             // valid separator -> replace
945             OUString sChar( *pWordStt );
946             sChar = rCC.titlecase(sChar); //see fdo#56740
947             if (sChar != OUStringChar(*pWordStt))
948                rDoc.ReplaceRange( pWordStt - pStart, 1, sChar );
949             return;
950         }
951 
952         aText = *pPrevPara;
953         bAtStart = false;
954         pStart = aText.getStr();
955         pStr = pStart + aText.getLength();
956 
957         do {            // overwrite all blanks
958             --pStr;
959             if (!NonFieldWordDelim(*pStr))
960                 break;
961             bAtStart = (pStart == pStr);
962         } while( !bAtStart );
963 
964         if( bAtStart )
965             return;  // no valid separator -> no replacement
966     }
967 
968     // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator.
969     // all three can happen, but not more than once!
970     const sal_Unicode* pExceptStt = nullptr;
971     bool bContinue = true;
972     Flags nFlag = Flags::NONE;
973     do
974     {
975         switch (*pStr)
976         {
977             // Western and Asian full stop
978             case '.':
979             case 0x3002:
980             case 0xFF0E:
981             {
982                 if (pStr >= pStart + 2 && *(pStr - 2) == '.')
983                 {
984                     //e.g. text "f.o.o. word": Now currently considering
985                     //capitalizing word but second last character of
986                     //previous word is a .  So probably last word is an
987                     //anagram that ends in . and not truly the end of a
988                     //previous sentence, so don't autocapitalize this word
989                     return;
990                 }
991                 if (nFlag & Flags::FullStop)
992                     return; // no valid separator -> no replacement
993                 nFlag |= Flags::FullStop;
994                 pExceptStt = pStr;
995             }
996             break;
997             case '!':
998             case 0xFF01:
999             {
1000                 if (nFlag & Flags::ExclamationMark)
1001                     return; // no valid separator -> no replacement
1002                 nFlag |= Flags::ExclamationMark;
1003             }
1004             break;
1005             case '?':
1006             case 0xFF1F:
1007             {
1008                 if (nFlag & Flags::QuestionMark)
1009                     return; // no valid separator -> no replacement
1010                 nFlag |= Flags::QuestionMark;
1011             }
1012             break;
1013             default:
1014                 if (nFlag == Flags::NONE)
1015                     return; // no valid separator -> no replacement
1016                 else
1017                     bContinue = false;
1018                 break;
1019         }
1020 
1021         if (bContinue && pStr-- == pStart)
1022         {
1023             return; // no valid separator -> no replacement
1024         }
1025     } while (bContinue);
1026     if (Flags::FullStop != nFlag)
1027         pExceptStt = nullptr;
1028 
1029     // Only capitalize, if string is long enough
1030     if( 2 > ( pStr - pStart ) )
1031         return;
1032 
1033     if (!rCC.isLetterNumeric(aText, pStr-- - pStart))
1034     {
1035         bool bValid = false, bAlphaFnd = false;
1036         const sal_Unicode* pTmpStr = pStr;
1037         while( !bValid )
1038         {
1039             if( rCC.isDigit( aText, pTmpStr - pStart ) )
1040             {
1041                 bValid = true;
1042                 pStr = pTmpStr - 1;
1043             }
1044             else if( rCC.isLetter( aText, pTmpStr - pStart ) )
1045             {
1046                 if( bAlphaFnd )
1047                 {
1048                     bValid = true;
1049                     pStr = pTmpStr;
1050                 }
1051                 else
1052                     bAlphaFnd = true;
1053             }
1054             else if (bAlphaFnd || NonFieldWordDelim(*pTmpStr))
1055                 break;
1056 
1057             if( pTmpStr == pStart )
1058                 break;
1059 
1060             --pTmpStr;
1061         }
1062 
1063         if( !bValid )
1064             return;       // no valid separator -> no replacement
1065     }
1066 
1067     bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9';
1068 
1069     // Search for the beginning of the word
1070     while (!NonFieldWordDelim(*pStr))
1071     {
1072         if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) )
1073             bNumericOnly = false;
1074 
1075         if( pStart == pStr )
1076             break;
1077 
1078         --pStr;
1079     }
1080 
1081     if( bNumericOnly )      // consists of only numbers, then not
1082         return;
1083 
1084     if (NonFieldWordDelim(*pStr))
1085         ++pStr;
1086 
1087     OUString sWord;
1088 
1089     // check on the basis of the exception list
1090     if( pExceptStt )
1091     {
1092         sWord = OUString(pStr, pExceptStt - pStr + 1);
1093         if( FindInCplSttExceptList(eLang, sWord) )
1094             return;
1095 
1096         // Delete all non alphanumeric. Test the characters at the
1097         // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.)
1098         OUString sTmp( sWord );
1099         while( !sTmp.isEmpty() &&
1100                 !rCC.isLetterNumeric( sTmp, 0 ) )
1101             sTmp = sTmp.copy(1);
1102 
1103         // Remove all non alphanumeric characters towards the end up until
1104         // the last one.
1105         sal_Int32 nLen = sTmp.getLength();
1106         while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) )
1107             --nLen;
1108         if( nLen + 1 < sTmp.getLength() )
1109             sTmp = sTmp.copy( 0, nLen + 1 );
1110 
1111         if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() &&
1112             FindInCplSttExceptList(eLang, sTmp))
1113             return;
1114 
1115         if(FindInCplSttExceptList(eLang, sWord, true))
1116             return;
1117     }
1118 
1119     // Ok, then replace
1120     sal_Unicode cSave = *pWordStt;
1121     nSttPos = pWordStt - rTxt.getStr();
1122     OUString sChar = rCC.titlecase(OUString(cSave)); //see fdo#56740
1123     bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar );
1124 
1125     // Perhaps someone wants to have the word
1126     if( bRet && ACFlags::SaveWordCplSttLst & nFlags )
1127         rDoc.SaveCpltSttWord( ACFlags::CapitalStartSentence, nSttPos, sWord, cSave );
1128 }
1129 
1130 // Correct accidental use of cAPS LOCK key
1131 bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1132                                         sal_Int32 nSttPos, sal_Int32 nEndPos,
1133                                         LanguageType eLang )
1134 {
1135     if (nEndPos - nSttPos < 2)
1136         // string must be at least 2-character long.
1137         return false;
1138 
1139     CharClass& rCC = GetCharClass( eLang );
1140 
1141     // Check the first 2 letters.
1142     if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) )
1143         return false;
1144 
1145     if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) )
1146         return false;
1147 
1148     OUStringBuffer aConverted;
1149     aConverted.append( rCC.uppercase(OUString(rTxt[nSttPos])) );
1150     aConverted.append( rCC.lowercase(OUString(rTxt[nSttPos+1])) );
1151 
1152     // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
1153     if (FindInWrdSttExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos)))
1154         return false;
1155 
1156     for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i )
1157     {
1158         if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) )
1159             // A lowercase letter disqualifies the whole text.
1160             return false;
1161 
1162         if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) )
1163             // Another uppercase letter.  Convert it.
1164             aConverted.append( rCC.lowercase(OUString(rTxt[i])) );
1165         else
1166             // This is not an alphabetic letter.  Leave it as-is.
1167             aConverted.append( rTxt[i] );
1168     }
1169 
1170     // Replace the word.
1171     rDoc.Delete(nSttPos, nEndPos);
1172     rDoc.Insert(nSttPos, aConverted.makeStringAndClear());
1173 
1174     return true;
1175 }
1176 
1177 
1178 sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote,
1179                                         LanguageType eLang ) const
1180 {
1181     sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar
1182                                     ? GetStartDoubleQuote()
1183                                     : GetStartSingleQuote() )
1184                                    : ( '\"' == cInsChar
1185                                     ? GetEndDoubleQuote()
1186                                     : GetEndSingleQuote() );
1187     if( !cRet )
1188     {
1189         // then through the Language find the right character
1190         if( LANGUAGE_NONE == eLang )
1191             cRet = cInsChar;
1192         else
1193         {
1194             LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1195             OUString sRet( bSttQuote
1196                             ? ( '\"' == cInsChar
1197                                 ? rLcl.getDoubleQuotationMarkStart()
1198                                 : rLcl.getQuotationMarkStart() )
1199                             : ( '\"' == cInsChar
1200                                 ? rLcl.getDoubleQuotationMarkEnd()
1201                                 : rLcl.getQuotationMarkEnd() ));
1202             cRet = !sRet.isEmpty() ? sRet[0] : cInsChar;
1203         }
1204     }
1205     return cRet;
1206 }
1207 
1208 void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos,
1209                                     sal_Unicode cInsChar, bool bSttQuote,
1210                                     bool bIns, LanguageType eLang, ACQuotes eType ) const
1211 {
1212     sal_Unicode cRet;
1213 
1214     if ( eType == ACQuotes::DoubleAngleQuote )
1215     {
1216         bool bSwiss = eLang == LANGUAGE_FRENCH_SWISS;
1217         // pressing " inside a quotation -> use second level angle quotes
1218         bool bLeftQuote = '\"' == cInsChar &&
1219                 // start position and Romanian OR
1220                 // not start position and Hungarian
1221                 bSttQuote == (eLang != LANGUAGE_HUNGARIAN);
1222         cRet = ( '<' == cInsChar || bLeftQuote )
1223                 ? ( bSwiss ? cLeftSingleAngleQuote : cLeftDoubleAngleQuote )
1224                 : ( bSwiss ? cRightSingleAngleQuote : cRightDoubleAngleQuote );
1225     }
1226     else if ( eType == ACQuotes::UseApostrophe )
1227         cRet = cApostrophe;
1228     else
1229         cRet = GetQuote( cInsChar, bSttQuote, eLang );
1230 
1231     OUString sChg( cInsChar );
1232     if( bIns )
1233         rDoc.Insert( nInsPos, sChg );
1234     else
1235         rDoc.Replace( nInsPos, sChg );
1236 
1237     sChg = OUString(cRet);
1238 
1239     if( eType == ACQuotes::NonBreakingSpace )
1240     {
1241         if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, OUStringChar(cNonBreakingSpace) ))
1242         {
1243             if( !bSttQuote )
1244                 ++nInsPos;
1245         }
1246     }
1247     else if( eType == ACQuotes::DoubleAngleQuote && cInsChar != '\"' )
1248     {
1249         rDoc.Delete( nInsPos-1, nInsPos);
1250         --nInsPos;
1251     }
1252 
1253     rDoc.Replace( nInsPos, sChg );
1254 
1255     // i' -> I' in English (last step for the Undo)
1256     if( eType == ACQuotes::CapitalizeIAm )
1257         rDoc.Replace( nInsPos-1, "I" );
1258 }
1259 
1260 OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPos,
1261                                 sal_Unicode cInsChar, bool bSttQuote )
1262 {
1263     const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1264     sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang );
1265 
1266     OUString sRet(cRet);
1267 
1268     if( '\"' == cInsChar )
1269     {
1270         if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS)
1271         {
1272             if( bSttQuote )
1273                 sRet += " ";
1274             else
1275                 sRet = " " + sRet;
1276         }
1277     }
1278     return sRet;
1279 }
1280 
1281 // search preceding opening quote in the paragraph before the insert position
1282 static bool lcl_HasPrecedingChar( std::u16string_view rTxt, sal_Int32 nPos,
1283                 const sal_Unicode sPrecedingChar, const sal_Unicode* aStopChars )
1284 {
1285     sal_Unicode cTmpChar;
1286 
1287     do {
1288         cTmpChar = rTxt[ --nPos ];
1289         if ( cTmpChar == sPrecedingChar )
1290             return true;
1291 
1292         for ( const sal_Unicode* pCh = aStopChars; *pCh; ++pCh )
1293             if ( cTmpChar == *pCh )
1294                 return false;
1295 
1296     } while ( nPos > 0 );
1297 
1298     return false;
1299 }
1300 
1301 // WARNING: rText may become invalid, see comment below
1302 void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1303                                     sal_Int32 nInsPos, sal_Unicode cChar,
1304                                     bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin )
1305 {
1306     bool bIsNextRun = io_bNbspRunNext;
1307     io_bNbspRunNext = false;  // if it was set, then it has to be turned off
1308 
1309     do{                                 // only for middle check loop !!
1310         if( cChar )
1311         {
1312             // Prevent double space
1313             if( nInsPos && ' ' == cChar &&
1314                 IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace ) &&
1315                 ' ' == rTxt[ nInsPos - 1 ])
1316             {
1317                 break;
1318             }
1319 
1320             bool bSingle = '\'' == cChar;
1321             bool bIsReplaceQuote =
1322                         (IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('\"' == cChar )) ||
1323                         (IsAutoCorrFlag( ACFlags::ChgSglQuotes ) && bSingle );
1324             if( bIsReplaceQuote )
1325             {
1326                 bool bSttQuote = !nInsPos;
1327                 ACQuotes eType = ACQuotes::NONE;
1328                 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1329                 if (!bSttQuote)
1330                 {
1331                     sal_Unicode cPrev = rTxt[ nInsPos-1 ];
1332                     bSttQuote = NonFieldWordDelim(cPrev) ||
1333                         lcl_IsInAsciiArr( "([{", cPrev ) ||
1334                         ( cEmDash == cPrev ) ||
1335                         ( cEnDash == cPrev );
1336                     // tdf#38394 use opening quotation mark << in French l'<<word>>
1337                     if ( !bSingle && !bSttQuote && cPrev == cApostrophe &&
1338                         primary(eLang) == primary(LANGUAGE_FRENCH) &&
1339                         ( ( ( nInsPos == 2 || ( nInsPos > 2 && IsWordDelim( rTxt[ nInsPos-3 ] ) ) ) &&
1340                                // abbreviated form of ce, de, je, la, le, ne, me, te, se or si
1341                                OUString("cdjlnmtsCDJLNMTS").indexOf( rTxt[ nInsPos-2 ] ) > -1 ) ||
1342                           ( ( nInsPos == 3 || (nInsPos > 3 && IsWordDelim( rTxt[ nInsPos-4 ] ) ) ) &&
1343                                // abbreviated form of que
1344                                ( rTxt[ nInsPos-2 ] == 'u' || rTxt[ nInsPos-2 ] == 'U' ) &&
1345                                ( rTxt[ nInsPos-3 ] == 'q' || rTxt[ nInsPos-3 ] == 'Q' ) ) ) )
1346                     {
1347                         bSttQuote = true;
1348                     }
1349                     // tdf#108423 for capitalization of English i'm
1350                     else if ( bSingle && ( cPrev == 'i' ) &&
1351                         primary(eLang) == primary(LANGUAGE_ENGLISH) &&
1352                         ( nInsPos == 1 || IsWordDelim( rTxt[ nInsPos-2 ] ) ) )
1353                     {
1354                         eType = ACQuotes::CapitalizeIAm;
1355                     }
1356                     // tdf#133524 support >>Hungarian<< and <<Romanian>> secondary level quotations
1357                     else if ( !bSingle && nInsPos &&
1358                         ( ( eLang == LANGUAGE_HUNGARIAN &&
1359                             lcl_HasPrecedingChar( rTxt, nInsPos,
1360                                 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEnd[0],
1361                                 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEnd + 1 ) ) ||
1362                           ( eLang.anyOf(
1363                                 LANGUAGE_ROMANIAN,
1364                                 LANGUAGE_ROMANIAN_MOLDOVA ) &&
1365                             lcl_HasPrecedingChar( rTxt, nInsPos,
1366                                 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEndRo[0],
1367                                 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEndRo + 1 ) ) ) )
1368                     {
1369                         LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1370                         // only if the opening double quotation mark is the default one
1371                         if ( rLcl.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart[0]) )
1372                             eType = ACQuotes::DoubleAngleQuote;
1373                     }
1374                     else if ( bSingle && nInsPos && !bSttQuote &&
1375                         // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic,
1376                         // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018.
1377                         // tdf#123786 the same for Russian and Ukrainian
1378                         ( ( eLang.anyOf (
1379                                  LANGUAGE_CZECH,
1380                                  LANGUAGE_GERMAN,
1381                                  LANGUAGE_GERMAN_SWISS,
1382                                  LANGUAGE_GERMAN_AUSTRIAN,
1383                                  LANGUAGE_GERMAN_LUXEMBOURG,
1384                                  LANGUAGE_GERMAN_LIECHTENSTEIN,
1385                                  LANGUAGE_ICELANDIC,
1386                                  LANGUAGE_SLOVAK,
1387                                  LANGUAGE_SLOVENIAN ) &&
1388                             !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEnd[0],  aStopSingleQuoteEnd + 1 ) ) ||
1389                           ( eLang.anyOf (
1390                                  LANGUAGE_RUSSIAN,
1391                                  LANGUAGE_UKRAINIAN ) &&
1392                             !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEndRuUa[0],  aStopSingleQuoteEndRuUa + 1 ) ) ) )
1393                     {
1394                         LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1395                         CharClass& rCC = GetCharClass( eLang );
1396                         if ( ( rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEnd[0]) ||
1397                              rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa[0]) ) &&
1398                              // use apostrophe only after letters, not after digits or punctuation
1399                              rCC.isLetter(rTxt, nInsPos-1) )
1400                         {
1401                             eType = ACQuotes::UseApostrophe;
1402                         }
1403                     }
1404                 }
1405 
1406                 if ( eType == ACQuotes::NONE && !bSingle &&
1407                     ( primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS ) )
1408                     eType = ACQuotes::NonBreakingSpace;
1409 
1410                 InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, eLang, eType );
1411                 break;
1412             }
1413             // tdf#133524 change "<<" and ">>" to double angle quotation marks
1414             else if ( IsAutoCorrFlag( ACFlags::ChgQuotes ) &&
1415                 IsAutoCorrFlag( ACFlags::ChgAngleQuotes ) &&
1416                 ('<' == cChar || '>' == cChar) &&
1417                 nInsPos > 0 && cChar == rTxt[ nInsPos-1 ] )
1418             {
1419                 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1420                 if ( eLang.anyOf(
1421                         LANGUAGE_CATALAN,              // primary level
1422                         LANGUAGE_CATALAN_VALENCIAN,    // primary level
1423                         LANGUAGE_FINNISH,              // alternative primary level
1424                         LANGUAGE_FRENCH_SWISS,         // second level
1425                         LANGUAGE_GALICIAN,             // primary level
1426                         LANGUAGE_HUNGARIAN,            // second level
1427                         LANGUAGE_POLISH,               // second level
1428                         LANGUAGE_PORTUGUESE,           // primary level
1429                         LANGUAGE_PORTUGUESE_BRAZILIAN, // primary level
1430                         LANGUAGE_ROMANIAN,             // second level
1431                         LANGUAGE_ROMANIAN_MOLDOVA,     // second level
1432                         LANGUAGE_SWEDISH,              // alternative primary level
1433                         LANGUAGE_SWEDISH_FINLAND,      // alternative primary level
1434                         LANGUAGE_UKRAINIAN,            // primary level
1435                         LANGUAGE_USER_ARAGONESE,       // primary level
1436                         LANGUAGE_USER_ASTURIAN ) ||    // primary level
1437                     primary(eLang) == primary(LANGUAGE_GERMAN) ||  // alternative primary level
1438                     primary(eLang) == primary(LANGUAGE_SPANISH) )  // primary level
1439                 {
1440                     InsertQuote( rDoc, nInsPos, cChar, false, bInsert, eLang, ACQuotes::DoubleAngleQuote );
1441                     break;
1442                 }
1443             }
1444 
1445             if( bInsert )
1446                 rDoc.Insert( nInsPos, OUString(cChar) );
1447             else
1448                 rDoc.Replace( nInsPos, OUString(cChar) );
1449 
1450             // Hardspaces autocorrection
1451             if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace ) )
1452             {
1453                 if ( NeedsHardspaceAutocorr( cChar ) &&
1454                     FnAddNonBrkSpace( rDoc, rTxt, nInsPos, GetDocLanguage( rDoc, nInsPos ), io_bNbspRunNext ) )
1455                 {
1456                     ;
1457                 }
1458                 else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) )
1459                 {
1460                     // Remove the NBSP if it wasn't an autocorrection
1461                     if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) &&
1462                             cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace )
1463                     {
1464                         // Look for the last HARD_SPACE
1465                         sal_Int32 nPos = nInsPos - 1;
1466                         bool bContinue = true;
1467                         while ( bContinue )
1468                         {
1469                             const sal_Unicode cTmpChar = rTxt[ nPos ];
1470                             if ( cTmpChar == cNonBreakingSpace )
1471                             {
1472                                 rDoc.Delete( nPos, nPos + 1 );
1473                                 bContinue = false;
1474                             }
1475                             else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 )
1476                                 bContinue = false;
1477                             nPos--;
1478                         }
1479                     }
1480                 }
1481             }
1482         }
1483 
1484         if( !nInsPos )
1485             break;
1486 
1487         sal_Int32 nPos = nInsPos - 1;
1488 
1489         if( IsWordDelim( rTxt[ nPos ]))
1490             break;
1491 
1492         // Set bold or underline automatically?
1493         if (('*' == cChar || '_' == cChar || '/' == cChar || '-' == cChar) && (nPos+1 < rTxt.getLength()))
1494         {
1495             if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl ) )
1496             {
1497                 FnChgWeightUnderl( rDoc, rTxt, nPos+1 );
1498             }
1499             break;
1500         }
1501 
1502         while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1503             ;
1504 
1505         // Found a Paragraph-start or a Blank, search for the word shortcut in
1506         // auto.
1507         sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
1508         if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1509             --nCapLttrPos;          // begin of paragraph and no blank
1510 
1511         const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1512         CharClass& rCC = GetCharClass( eLang );
1513 
1514         // no symbol characters
1515         if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos ))
1516             break;
1517 
1518         if( IsAutoCorrFlag( ACFlags::Autocorrect ) &&
1519             // tdf#134940 fix regression of arrow "-->" resulted by premature
1520             // replacement of "--" since '>' was added to IsAutoCorrectChar()
1521             '>' != cChar )
1522         {
1523             // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1524             // and becomes INVALID if ChgAutoCorrWord returns true!
1525             // => use aPara/pPara to create a valid copy of the string!
1526             OUString aPara;
1527             OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr;
1528 
1529             bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos,
1530                                                     *this, pPara );
1531             if( !bChgWord )
1532             {
1533                 sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos;
1534                 while( nCapLttrPos1 < nInsPos &&
1535                         lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] )
1536                         )
1537                         ++nCapLttrPos1;
1538                 while( nCapLttrPos1 < nInsPos1 && nInsPos1 &&
1539                         lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] )
1540                         )
1541                         --nInsPos1;
1542 
1543                 if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) &&
1544                     nCapLttrPos1 < nInsPos1 &&
1545                     rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara ))
1546                 {
1547                     bChgWord = true;
1548                     nCapLttrPos = nCapLttrPos1;
1549                 }
1550             }
1551 
1552             if( bChgWord )
1553             {
1554                 if( !aPara.isEmpty() )
1555                 {
1556                     sal_Int32 nEnd = nCapLttrPos;
1557                     while( nEnd < aPara.getLength() &&
1558                             !IsWordDelim( aPara[ nEnd ]))
1559                         ++nEnd;
1560 
1561                     // Capital letter at beginning of paragraph?
1562                     if( IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1563                     {
1564                         FnCapitalStartSentence( rDoc, aPara, false,
1565                                                 nCapLttrPos, nEnd, eLang );
1566                     }
1567 
1568                     if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1569                     {
1570                         FnChgToEnEmDash( rDoc, aPara, nCapLttrPos, nEnd, eLang );
1571                     }
1572                 }
1573                 break;
1574             }
1575         }
1576 
1577         if( IsAutoCorrFlag( ACFlags::TransliterateRTL ) && GetDocLanguage( rDoc, nInsPos ) == LANGUAGE_HUNGARIAN )
1578         {
1579             // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1580             // and becomes INVALID if TransliterateRTLWord returns true!
1581             if ( rDoc.TransliterateRTLWord( nCapLttrPos, nInsPos ) )
1582                 break;
1583         }
1584 
1585         if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber ) &&
1586                 (nInsPos >= 2 ) &&       // fdo#69762 avoid autocorrect for 2e-3
1587                 ( '-' != cChar || 'E' != rtl::toAsciiUpperCase(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) &&
1588                 FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
1589             ( IsAutoCorrFlag( ACFlags::SetINetAttr ) &&
1590                 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
1591                 FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
1592             ;
1593         else
1594         {
1595             bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK);
1596             bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos );
1597 
1598             if ( bLockKeyOn && IsAutoCorrFlag( ACFlags::CorrectCapsLock ) &&
1599                  FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) )
1600             {
1601                 // Correct accidental use of cAPS LOCK key (do this only when
1602                 // the caps or shift lock key is pressed). Turn off the caps
1603                 // lock afterwards.
1604                 pFrameWin->SimulateKeyPress( KEY_CAPSLOCK );
1605             }
1606 
1607             // Capital letter at beginning of paragraph ?
1608             if( !bUnsupported &&
1609                 IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1610             {
1611                 FnCapitalStartSentence( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang );
1612             }
1613 
1614             // Two capital letters at beginning of word ??
1615             if( !bUnsupported &&
1616                 IsAutoCorrFlag( ACFlags::CapitalStartWord ) )
1617             {
1618                 FnCapitalStartWord( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1619             }
1620 
1621             if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1622             {
1623                 FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1624             }
1625         }
1626 
1627     } while( false );
1628 }
1629 
1630 SvxAutoCorrectLanguageLists& SvxAutoCorrect::GetLanguageList_(
1631                                                         LanguageType eLang )
1632 {
1633     LanguageTag aLanguageTag( eLang);
1634     if (m_aLangTable.find(aLanguageTag) == m_aLangTable.end())
1635         (void)CreateLanguageFile(aLanguageTag);
1636     return *(m_aLangTable.find(aLanguageTag)->second);
1637 }
1638 
1639 void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang )
1640 {
1641     auto const iter = m_aLangTable.find(LanguageTag(eLang));
1642     if (iter != m_aLangTable.end() && iter->second)
1643         iter->second->SaveCplSttExceptList();
1644     else
1645     {
1646         SAL_WARN("editeng", "Save an empty list? ");
1647     }
1648 }
1649 
1650 void SvxAutoCorrect::SaveWrdSttExceptList(LanguageType eLang)
1651 {
1652     auto const iter = m_aLangTable.find(LanguageTag(eLang));
1653     if (iter != m_aLangTable.end() && iter->second)
1654         iter->second->SaveWrdSttExceptList();
1655     else
1656     {
1657         SAL_WARN("editeng", "Save an empty list? ");
1658     }
1659 }
1660 
1661 // Adds a single word. The list will immediately be written to the file!
1662 bool SvxAutoCorrect::AddCplSttException( const OUString& rNew,
1663                                         LanguageType eLang )
1664 {
1665     SvxAutoCorrectLanguageLists* pLists = nullptr;
1666     // either the right language is present or it will be this in the general list
1667     auto iter = m_aLangTable.find(LanguageTag(eLang));
1668     if (iter != m_aLangTable.end())
1669         pLists = iter->second.get();
1670     else
1671     {
1672         LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1673         iter = m_aLangTable.find(aLangTagUndetermined);
1674         if (iter != m_aLangTable.end())
1675             pLists = iter->second.get();
1676         else if(CreateLanguageFile(aLangTagUndetermined))
1677             pLists = m_aLangTable.find(aLangTagUndetermined)->second.get();
1678     }
1679     OSL_ENSURE(pLists, "No auto correction data");
1680     return pLists && pLists->AddToCplSttExceptList(rNew);
1681 }
1682 
1683 // Adds a single word. The list will immediately be written to the file!
1684 bool SvxAutoCorrect::AddWrtSttException( const OUString& rNew,
1685                                          LanguageType eLang )
1686 {
1687     SvxAutoCorrectLanguageLists* pLists = nullptr;
1688     //either the right language is present or it is set in the general list
1689     auto iter = m_aLangTable.find(LanguageTag(eLang));
1690     if (iter != m_aLangTable.end())
1691         pLists = iter->second.get();
1692     else
1693     {
1694         LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1695         iter = m_aLangTable.find(aLangTagUndetermined);
1696         if (iter != m_aLangTable.end())
1697             pLists = iter->second.get();
1698         else if(CreateLanguageFile(aLangTagUndetermined))
1699             pLists = m_aLangTable.find(aLangTagUndetermined)->second.get();
1700     }
1701     OSL_ENSURE(pLists, "No auto correction file!");
1702     return pLists && pLists->AddToWrdSttExceptList(rNew);
1703 }
1704 
1705 OUString SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const OUString& rTxt,
1706                                              sal_Int32 nPos)
1707 {
1708     OUString sRet;
1709     if( !nPos )
1710         return sRet;
1711 
1712     sal_Int32 nEnd = nPos;
1713 
1714     // it must be followed by a blank or tab!
1715     if( ( nPos < rTxt.getLength() &&
1716         !IsWordDelim( rTxt[ nPos ])) ||
1717         IsWordDelim( rTxt[ --nPos ]))
1718         return sRet;
1719 
1720     while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1721         ;
1722 
1723     // Found a Paragraph-start or a Blank, search for the word shortcut in
1724     // auto.
1725     sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
1726     if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1727         --nCapLttrPos;          // Beginning of paragraph and no Blank!
1728 
1729     while( lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) )
1730         if( ++nCapLttrPos >= nEnd )
1731             return sRet;
1732 
1733     if( 3 > nEnd - nCapLttrPos )
1734         return sRet;
1735 
1736     const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1737 
1738     CharClass& rCC = GetCharClass(eLang);
1739 
1740     if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnd ))
1741         return sRet;
1742 
1743     sRet = rTxt.copy( nCapLttrPos, nEnd - nCapLttrPos );
1744     return sRet;
1745 }
1746 
1747 // static
1748 std::vector<OUString> SvxAutoCorrect::GetChunkForAutoText(const OUString& rTxt,
1749                                                           const sal_Int32 nPos)
1750 {
1751     constexpr sal_Int32 nMinLen = 3;
1752     constexpr sal_Int32 nMaxLen = 9;
1753     std::vector<OUString> aRes;
1754     if (nPos >= nMinLen)
1755     {
1756         sal_Int32 nBegin = std::max<sal_Int32>(nPos - nMaxLen, 0);
1757         // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation)
1758         if (nBegin > 0 && !IsWordDelim(rTxt[nBegin-1]))
1759         {
1760             while (nBegin + nMinLen <= nPos && !IsWordDelim(rTxt[nBegin]))
1761                 ++nBegin;
1762         }
1763         if (nBegin + nMinLen <= nPos)
1764         {
1765             OUString sRes = rTxt.copy(nBegin, nPos - nBegin);
1766             aRes.push_back(sRes);
1767             bool bLastStartedWithDelim = IsWordDelim(sRes[0]);
1768             for (sal_Int32 i = 1; i <= sRes.getLength() - nMinLen; ++i)
1769             {
1770                 bool bAdd = bLastStartedWithDelim;
1771                 bLastStartedWithDelim = IsWordDelim(sRes[i]);
1772                 bAdd = bAdd || bLastStartedWithDelim;
1773                 if (bAdd)
1774                     aRes.push_back(sRes.copy(i));
1775             }
1776         }
1777     }
1778     return aRes;
1779 }
1780 
1781 bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile )
1782 {
1783     OSL_ENSURE(m_aLangTable.find(rLanguageTag) == m_aLangTable.end(), "Language already exists ");
1784 
1785     OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true ));
1786     OUString sShareDirFile( sUserDirFile );
1787 
1788     SvxAutoCorrectLanguageLists* pLists = nullptr;
1789 
1790     tools::Time nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM ), nLastCheckTime( tools::Time::EMPTY );
1791 
1792     auto nFndPos = aLastFileTable.find(rLanguageTag);
1793     if(nFndPos != aLastFileTable.end() &&
1794        (nLastCheckTime.SetTime(nFndPos->second), nLastCheckTime < nAktTime) &&
1795        nAktTime - nLastCheckTime < nMinTime)
1796     {
1797         // no need to test the file, because the last check is not older then
1798         // 2 minutes.
1799         if( bNewFile )
1800         {
1801             sShareDirFile = sUserDirFile;
1802             pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile );
1803             LanguageTag aTmp(rLanguageTag);     // this insert() needs a non-const reference
1804             m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists)));
1805             aLastFileTable.erase(nFndPos);
1806         }
1807     }
1808     else if(
1809              ( FStatHelper::IsDocument( sUserDirFile ) ||
1810                FStatHelper::IsDocument( sShareDirFile =
1811                    GetAutoCorrFileName( rLanguageTag ) ) ||
1812                FStatHelper::IsDocument( sShareDirFile =
1813                    GetAutoCorrFileName( rLanguageTag, false, false, true) )
1814              ) ||
1815         ( sShareDirFile = sUserDirFile, bNewFile )
1816           )
1817     {
1818         pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile );
1819         LanguageTag aTmp(rLanguageTag);     // this insert() needs a non-const reference
1820         m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists)));
1821         if (nFndPos != aLastFileTable.end())
1822             aLastFileTable.erase(nFndPos);
1823     }
1824     else if( !bNewFile )
1825     {
1826         aLastFileTable[rLanguageTag] = nAktTime.GetTime();
1827     }
1828     return pLists != nullptr;
1829 }
1830 
1831 bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong,
1832                                 LanguageType eLang )
1833 {
1834     LanguageTag aLanguageTag( eLang);
1835     auto const iter = m_aLangTable.find(aLanguageTag);
1836     if (iter != m_aLangTable.end())
1837         return iter->second->PutText(rShort, rLong);
1838     if(CreateLanguageFile(aLanguageTag))
1839         return m_aLangTable.find(aLanguageTag)->second->PutText(rShort, rLong);
1840     return false;
1841 }
1842 
1843 void SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries,
1844                                               std::vector<SvxAutocorrWord>& aDeleteEntries,
1845                                               LanguageType eLang )
1846 {
1847     LanguageTag aLanguageTag( eLang);
1848     auto const iter = m_aLangTable.find(aLanguageTag);
1849     if (iter != m_aLangTable.end())
1850     {
1851         iter->second->MakeCombinedChanges( aNewEntries, aDeleteEntries );
1852     }
1853     else if(CreateLanguageFile( aLanguageTag ))
1854     {
1855         m_aLangTable.find( aLanguageTag )->second->MakeCombinedChanges( aNewEntries, aDeleteEntries );
1856     }
1857 }
1858 
1859 //  - return the replacement text (only for SWG-Format, all other
1860 //    can be taken from the word list!)
1861 bool SvxAutoCorrect::GetLongText( const OUString&, OUString& )
1862 {
1863     return false;
1864 }
1865 
1866 void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& )
1867 {
1868 }
1869 
1870 // Text with attribution (only the SWG - SWG format!)
1871 bool SvxAutoCorrect::PutText( const css::uno::Reference < css::embed::XStorage >&,
1872                               const OUString&, const OUString&, SfxObjectShell&, OUString& )
1873 {
1874     return false;
1875 }
1876 
1877 OUString EncryptBlockName_Imp(const OUString& rName)
1878 {
1879     OUStringBuffer aName;
1880     aName.append('#').append(rName);
1881     for (sal_Int32 nLen = rName.getLength(), nPos = 1; nPos < nLen; ++nPos)
1882     {
1883         if (lcl_IsInAsciiArr( "!/:.\\", aName[nPos]))
1884             aName[nPos] &= 0x0f;
1885     }
1886     return aName.makeStringAndClear();
1887 }
1888 
1889 /* This code is copied from SwXMLTextBlocks::GeneratePackageName */
1890 static void GeneratePackageName ( std::u16string_view rShort, OUString& rPackageName )
1891 {
1892     OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7));
1893     OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US));
1894 
1895     for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos)
1896     {
1897         switch (aBuf[nPos])
1898         {
1899             case '!':
1900             case '/':
1901             case ':':
1902             case '.':
1903             case '\\':
1904                 aBuf[nPos] = '_';
1905                 break;
1906             default:
1907                 break;
1908         }
1909     }
1910 
1911     rPackageName = aBuf.makeStringAndClear();
1912 }
1913 
1914 static const SvxAutocorrWord* lcl_SearchWordsInList(
1915                 SvxAutoCorrectLanguageLists* pList, const OUString& rTxt,
1916                 sal_Int32& rStt, sal_Int32 nEndPos)
1917 {
1918     const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList();
1919     return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos );
1920 }
1921 
1922 // the search for the words in the substitution table
1923 const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList(
1924                 const OUString& rTxt, sal_Int32& rStt, sal_Int32 nEndPos,
1925                 SvxAutoCorrDoc&, LanguageTag& rLang )
1926 {
1927     const SvxAutocorrWord* pRet = nullptr;
1928     LanguageTag aLanguageTag( rLang);
1929     if( aLanguageTag.isSystemLocale() )
1930         aLanguageTag.reset( MsLangId::getSystemLanguage());
1931 
1932     /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback
1933      * list instead? */
1934 
1935     // First search for eLang, then US-English -> English
1936     // and last in LANGUAGE_UNDETERMINED
1937     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
1938     {
1939         //the language is available - so bring it on
1940         std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
1941         pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
1942         if( pRet )
1943         {
1944             rLang = aLanguageTag;
1945             return pRet;
1946         }
1947         else
1948             return nullptr;
1949     }
1950 
1951     // If it still could not be found here, then keep on searching
1952     LanguageType eLang = aLanguageTag.getLanguageType();
1953     // the primary language for example EN
1954     aLanguageTag.reset(aLanguageTag.getLanguage());
1955     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
1956     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
1957                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
1958                  CreateLanguageFile(aLanguageTag, false)))
1959     {
1960         //the language is available - so bring it on
1961         std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
1962         pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
1963         if( pRet )
1964         {
1965             rLang = aLanguageTag;
1966             return pRet;
1967         }
1968     }
1969 
1970     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
1971             CreateLanguageFile(aLanguageTag, false))
1972     {
1973         //the language is available - so bring it on
1974         std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
1975         pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
1976         if( pRet )
1977         {
1978             rLang = aLanguageTag;
1979             return pRet;
1980         }
1981     }
1982     return nullptr;
1983 }
1984 
1985 bool SvxAutoCorrect::FindInWrdSttExceptList( LanguageType eLang,
1986                                              const OUString& sWord )
1987 {
1988     LanguageTag aLanguageTag( eLang);
1989 
1990     /* TODO-BCP47: again horrible ugliness */
1991 
1992     // First search for eLang, then primary language of eLang
1993     // and last in LANGUAGE_UNDETERMINED
1994 
1995     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
1996     {
1997         //the language is available - so bring it on
1998         auto const& pList = m_aLangTable.find(aLanguageTag)->second;
1999         if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() )
2000             return true;
2001     }
2002 
2003     // If it still could not be found here, then keep on searching
2004     // the primary language for example EN
2005     aLanguageTag.reset(aLanguageTag.getLanguage());
2006     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2007     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2008                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2009                  CreateLanguageFile(aLanguageTag, false)))
2010     {
2011         //the language is available - so bring it on
2012         auto const& pList = m_aLangTable.find(aLanguageTag)->second;
2013         if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() )
2014             return true;
2015     }
2016 
2017     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2018             CreateLanguageFile(aLanguageTag, false))
2019     {
2020         //the language is available - so bring it on
2021         auto const& pList = m_aLangTable.find(aLanguageTag)->second;
2022         if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() )
2023             return true;
2024     }
2025     return false;
2026 }
2027 
2028 static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord)
2029 {
2030     SvStringsISortDtor::const_iterator it = pList->find( "~" );
2031     SvStringsISortDtor::size_type nPos = it - pList->begin();
2032     if( nPos < pList->size() )
2033     {
2034         OUString sLowerWord(sWord.toAsciiLowerCase());
2035         OUString sAbr;
2036         for( SvStringsISortDtor::size_type n = nPos; n < pList->size(); ++n )
2037         {
2038             sAbr = (*pList)[ n ];
2039             if (sAbr[0] != '~')
2040                 break;
2041             // ~ and ~. are not allowed!
2042             if( 2 < sAbr.getLength() && sAbr.getLength() - 1 <= sWord.getLength() )
2043             {
2044                 OUString sLowerAbk(sAbr.toAsciiLowerCase());
2045                 for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;)
2046                 {
2047                     if( !--i )      // agrees
2048                         return true;
2049 
2050                     if( sLowerAbk[i] != sLowerWord[--ii])
2051                         break;
2052                 }
2053             }
2054         }
2055     }
2056     OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ),
2057             "Wrongly sorted exception list?" );
2058     return false;
2059 }
2060 
2061 bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang,
2062                                 const OUString& sWord, bool bAbbreviation)
2063 {
2064     LanguageTag aLanguageTag( eLang);
2065 
2066     /* TODO-BCP47: did I mention terrible horrible ugliness? */
2067 
2068     // First search for eLang, then primary language of eLang
2069     // and last in LANGUAGE_UNDETERMINED
2070 
2071     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
2072     {
2073         //the language is available - so bring it on
2074         const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
2075         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2076             return true;
2077     }
2078 
2079     // If it still could not be found here, then keep on searching
2080     // the primary language for example EN
2081     aLanguageTag.reset(aLanguageTag.getLanguage());
2082     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2083     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2084                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2085                  CreateLanguageFile(aLanguageTag, false)))
2086     {
2087         //the language is available - so bring it on
2088         const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
2089         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2090             return true;
2091     }
2092 
2093     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2094             CreateLanguageFile(aLanguageTag, false))
2095     {
2096         //the language is available - so bring it on
2097         const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
2098         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2099             return true;
2100     }
2101     return false;
2102 }
2103 
2104 OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag,
2105                                             bool bNewFile, bool bTst, bool bUnlocalized ) const
2106 {
2107     OUString sRet, sExt( rLanguageTag.getBcp47() );
2108     if (bUnlocalized)
2109     {
2110         // we don't want variant, so we'll take "fr" instead of "fr-CA" for example
2111         std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false);
2112         if (!vecFallBackStrings.empty())
2113            sExt = vecFallBackStrings[0];
2114     }
2115 
2116     sExt = "_" + sExt + ".dat";
2117     if( bNewFile )
2118         sRet = sUserAutoCorrFile + sExt;
2119     else if( !bTst )
2120         sRet = sShareAutoCorrFile + sExt;
2121     else
2122     {
2123         // test first in the user directory - if not exist, then
2124         sRet = sUserAutoCorrFile + sExt;
2125         if( !FStatHelper::IsDocument( sRet ))
2126             sRet = sShareAutoCorrFile + sExt;
2127     }
2128     return sRet;
2129 }
2130 
2131 SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists(
2132                 SvxAutoCorrect& rParent,
2133                 const OUString& rShareAutoCorrectFile,
2134                 const OUString& rUserAutoCorrectFile)
2135 :   sShareAutoCorrFile( rShareAutoCorrectFile ),
2136     sUserAutoCorrFile( rUserAutoCorrectFile ),
2137     aModifiedDate( Date::EMPTY ),
2138     aModifiedTime( tools::Time::EMPTY ),
2139     aLastCheckTime( tools::Time::EMPTY ),
2140     rAutoCorrect(rParent),
2141     nFlags(ACFlags::NONE)
2142 {
2143 }
2144 
2145 SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists()
2146 {
2147 }
2148 
2149 bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp()
2150 {
2151     // Access the file system only every 2 minutes to check the date stamp
2152     bool bRet = false;
2153 
2154     tools::Time nMinTime( 0, 2 );
2155     tools::Time nAktTime( tools::Time::SYSTEM );
2156     if( aLastCheckTime <= nAktTime) // overflow?
2157         return false;
2158     nAktTime -= aLastCheckTime;
2159     if( nAktTime > nMinTime )     // min time past
2160     {
2161         Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY );
2162         if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2163                                             &aTstDate, &aTstTime ) &&
2164             ( aModifiedDate != aTstDate || aModifiedTime != aTstTime ))
2165         {
2166             bRet = true;
2167             // then remove all the lists fast!
2168             if( (ACFlags::CplSttLstLoad & nFlags) && pCplStt_ExcptLst )
2169             {
2170                 pCplStt_ExcptLst.reset();
2171             }
2172             if( (ACFlags::WrdSttLstLoad & nFlags) && pWrdStt_ExcptLst )
2173             {
2174                 pWrdStt_ExcptLst.reset();
2175             }
2176             if( (ACFlags::ChgWordLstLoad & nFlags) && pAutocorr_List )
2177             {
2178                 pAutocorr_List.reset();
2179             }
2180             nFlags &= ~ACFlags(ACFlags::CplSttLstLoad | ACFlags::WrdSttLstLoad | ACFlags::ChgWordLstLoad );
2181         }
2182         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2183     }
2184     return bRet;
2185 }
2186 
2187 void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp(
2188                                         std::unique_ptr<SvStringsISortDtor>& rpLst,
2189                                         const OUString& sStrmName,
2190                                         tools::SvRef<SotStorage>& rStg)
2191 {
2192     if( rpLst )
2193         rpLst->clear();
2194     else
2195         rpLst.reset( new SvStringsISortDtor );
2196 
2197     {
2198         if( rStg.is() && rStg->IsStream( sStrmName ) )
2199         {
2200             tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2201                 ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) );
2202             if( ERRCODE_NONE != xStrm->GetError())
2203             {
2204                 xStrm.clear();
2205                 rStg.clear();
2206                 RemoveStream_Imp( sStrmName );
2207             }
2208             else
2209             {
2210                 uno::Reference< uno::XComponentContext > xContext =
2211                     comphelper::getProcessComponentContext();
2212 
2213                 xml::sax::InputSource aParserInput;
2214                 aParserInput.sSystemId = sStrmName;
2215 
2216                 xStrm->Seek( 0 );
2217                 xStrm->SetBufferSize( 8 * 1024 );
2218                 aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm );
2219 
2220                 // get filter
2221                 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLExceptionListImport ( xContext, *rpLst );
2222 
2223                 // connect parser and filter
2224                 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create( xContext );
2225                 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2226                 xParser->setFastDocumentHandler( xFilter );
2227                 xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
2228                 xParser->setTokenHandler( xTokenHandler );
2229 
2230                 // parse
2231                 try
2232                 {
2233                     xParser->parseStream( aParserInput );
2234                 }
2235                 catch( const xml::sax::SAXParseException& )
2236                 {
2237                     // re throw ?
2238                 }
2239                 catch( const xml::sax::SAXException& )
2240                 {
2241                     // re throw ?
2242                 }
2243                 catch( const io::IOException& )
2244                 {
2245                     // re throw ?
2246                 }
2247             }
2248         }
2249 
2250         // Set time stamp
2251         FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2252                                         &aModifiedDate, &aModifiedTime );
2253         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2254     }
2255 
2256 }
2257 
2258 void SvxAutoCorrectLanguageLists::SaveExceptList_Imp(
2259                             const SvStringsISortDtor& rLst,
2260                             const OUString& sStrmName,
2261                             tools::SvRef<SotStorage> const &rStg,
2262                             bool bConvert )
2263 {
2264     if( !rStg.is() )
2265         return;
2266 
2267     if( rLst.empty() )
2268     {
2269         rStg->Remove( sStrmName );
2270         rStg->Commit();
2271     }
2272     else
2273     {
2274         tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2275                 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2276         if( xStrm.is() )
2277         {
2278             xStrm->SetSize( 0 );
2279             xStrm->SetBufferSize( 8192 );
2280             xStrm->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2281 
2282 
2283             uno::Reference< uno::XComponentContext > xContext =
2284                 comphelper::getProcessComponentContext();
2285 
2286             uno::Reference < xml::sax::XWriter > xWriter  = xml::sax::Writer::create(xContext);
2287             uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm );
2288             xWriter->setOutputStream(xOut);
2289 
2290             uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW);
2291             rtl::Reference< SvXMLExceptionListExport > xExp( new SvXMLExceptionListExport( xContext, rLst, sStrmName, xHandler ) );
2292 
2293             xExp->exportDoc( XML_BLOCK_LIST );
2294 
2295             xStrm->Commit();
2296             if( xStrm->GetError() == ERRCODE_NONE )
2297             {
2298                 xStrm.clear();
2299                 if (!bConvert)
2300                 {
2301                     rStg->Commit();
2302                     if( ERRCODE_NONE != rStg->GetError() )
2303                     {
2304                         rStg->Remove( sStrmName );
2305                         rStg->Commit();
2306                     }
2307                 }
2308             }
2309         }
2310     }
2311 }
2312 
2313 SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList()
2314 {
2315     if( pAutocorr_List )
2316         pAutocorr_List->DeleteAndDestroyAll();
2317     else
2318         pAutocorr_List.reset( new SvxAutocorrWordList() );
2319 
2320     try
2321     {
2322         uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ );
2323         uno::Reference < io::XStream > xStrm = xStg->openStreamElement( pXMLImplAutocorr_ListStr, embed::ElementModes::READ );
2324         uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext();
2325 
2326         xml::sax::InputSource aParserInput;
2327         aParserInput.sSystemId = pXMLImplAutocorr_ListStr;
2328         aParserInput.aInputStream = xStrm->getInputStream();
2329 
2330         // get parser
2331         uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext);
2332         SAL_INFO("editeng", "AutoCorrect Import" );
2333         uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List.get(), rAutoCorrect, xStg );
2334         uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2335 
2336         // connect parser and filter
2337         xParser->setFastDocumentHandler( xFilter );
2338         xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
2339         xParser->setTokenHandler(xTokenHandler);
2340 
2341         // parse
2342         xParser->parseStream( aParserInput );
2343     }
2344     catch ( const uno::Exception& )
2345     {
2346         TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile);
2347     }
2348 
2349     // Set time stamp
2350     FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2351                                     &aModifiedDate, &aModifiedTime );
2352     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2353 
2354     return pAutocorr_List.get();
2355 }
2356 
2357 const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList()
2358 {
2359     if( !( ACFlags::ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() )
2360     {
2361         LoadAutocorrWordList();
2362         if( !pAutocorr_List )
2363         {
2364             OSL_ENSURE( false, "No valid list" );
2365             pAutocorr_List.reset( new SvxAutocorrWordList() );
2366         }
2367         nFlags |= ACFlags::ChgWordLstLoad;
2368     }
2369     return pAutocorr_List.get();
2370 }
2371 
2372 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList()
2373 {
2374     if( !( ACFlags::CplSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2375     {
2376         LoadCplSttExceptList();
2377         if( !pCplStt_ExcptLst )
2378         {
2379             OSL_ENSURE( false, "No valid list" );
2380             pCplStt_ExcptLst.reset( new SvStringsISortDtor );
2381         }
2382         nFlags |= ACFlags::CplSttLstLoad;
2383     }
2384     return pCplStt_ExcptLst.get();
2385 }
2386 
2387 bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew)
2388 {
2389     bool bRet = false;
2390     if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second )
2391     {
2392         MakeUserStorage_Impl();
2393         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2394 
2395         SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2396 
2397         xStg = nullptr;
2398         // Set time stamp
2399         FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2400                                             &aModifiedDate, &aModifiedTime );
2401         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2402         bRet = true;
2403     }
2404     return bRet;
2405 }
2406 
2407 bool SvxAutoCorrectLanguageLists::AddToWrdSttExceptList(const OUString& rNew)
2408 {
2409     bool bRet = false;
2410     SvStringsISortDtor* pExceptList = LoadWrdSttExceptList();
2411     if( !rNew.isEmpty() && pExceptList && pExceptList->insert( rNew ).second )
2412     {
2413         MakeUserStorage_Impl();
2414         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2415 
2416         SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2417 
2418         xStg = nullptr;
2419         // Set time stamp
2420         FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2421                                             &aModifiedDate, &aModifiedTime );
2422         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2423         bRet = true;
2424     }
2425     return bRet;
2426 }
2427 
2428 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList()
2429 {
2430     try
2431     {
2432         tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2433         if( xStg.is() && xStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
2434             LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2435     }
2436     catch (const css::ucb::ContentCreationException&)
2437     {
2438     }
2439     return pCplStt_ExcptLst.get();
2440 }
2441 
2442 void SvxAutoCorrectLanguageLists::SaveCplSttExceptList()
2443 {
2444     MakeUserStorage_Impl();
2445     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2446 
2447     SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2448 
2449     xStg = nullptr;
2450 
2451     // Set time stamp
2452     FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2453                                             &aModifiedDate, &aModifiedTime );
2454     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2455 }
2456 
2457 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWrdSttExceptList()
2458 {
2459     try
2460     {
2461         tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2462         if( xStg.is() && xStg->IsContained( pXMLImplWrdStt_ExcptLstStr ) )
2463             LoadXMLExceptList_Imp( pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2464     }
2465     catch (const css::ucb::ContentCreationException &)
2466     {
2467         TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWrdSttExceptList");
2468     }
2469     return pWrdStt_ExcptLst.get();
2470 }
2471 
2472 void SvxAutoCorrectLanguageLists::SaveWrdSttExceptList()
2473 {
2474     MakeUserStorage_Impl();
2475     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2476 
2477     SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2478 
2479     xStg = nullptr;
2480     // Set time stamp
2481     FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2482                                             &aModifiedDate, &aModifiedTime );
2483     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2484 }
2485 
2486 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWrdSttExceptList()
2487 {
2488     if( !( ACFlags::WrdSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2489     {
2490         LoadWrdSttExceptList();
2491         if( !pWrdStt_ExcptLst )
2492         {
2493             OSL_ENSURE( false, "No valid list" );
2494             pWrdStt_ExcptLst.reset( new SvStringsISortDtor );
2495         }
2496         nFlags |= ACFlags::WrdSttLstLoad;
2497     }
2498     return pWrdStt_ExcptLst.get();
2499 }
2500 
2501 void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName )
2502 {
2503     if( sShareAutoCorrFile != sUserAutoCorrFile )
2504     {
2505         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2506         if( xStg.is() && ERRCODE_NONE == xStg->GetError() &&
2507             xStg->IsStream( rName ) )
2508         {
2509             xStg->Remove( rName );
2510             xStg->Commit();
2511 
2512             xStg = nullptr;
2513         }
2514     }
2515 }
2516 
2517 void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl()
2518 {
2519     // The conversion needs to happen if the file is already in the user
2520     // directory and is in the old format. Additionally it needs to
2521     // happen when the file is being copied from share to user.
2522 
2523     bool bError = false, bConvert = false, bCopy = false;
2524     INetURLObject aDest;
2525     INetURLObject aSource;
2526 
2527     if (sUserAutoCorrFile != sShareAutoCorrFile )
2528     {
2529         aSource = INetURLObject ( sShareAutoCorrFile );
2530         aDest = INetURLObject ( sUserAutoCorrFile );
2531         if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) )
2532         {
2533             aDest.SetExtension ( "bak" );
2534             bConvert = true;
2535         }
2536         bCopy = true;
2537     }
2538     else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) )
2539     {
2540         aSource = INetURLObject ( sUserAutoCorrFile );
2541         aDest = INetURLObject ( sUserAutoCorrFile );
2542         aDest.SetExtension ( "bak" );
2543         bCopy = bConvert = true;
2544     }
2545     if (bCopy)
2546     {
2547         try
2548         {
2549             OUString sMain(aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ));
2550             sal_Int32 nSlashPos = sMain.lastIndexOf('/');
2551             sMain = sMain.copy(0, nSlashPos);
2552             ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2553             TransferInfo aInfo;
2554             aInfo.NameClash = NameClash::OVERWRITE;
2555             aInfo.NewTitle = aDest.GetLastName();
2556             aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DecodeMechanism::ToIUri );
2557             aInfo.MoveData  = false;
2558             aNewContent.executeCommand( "transfer", Any(aInfo));
2559         }
2560         catch (...)
2561         {
2562             bError = true;
2563         }
2564     }
2565     if (bConvert && !bError)
2566     {
2567         tools::SvRef<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), StreamMode::READ );
2568         tools::SvRef<SotStorage> xDstStg = new SotStorage( sUserAutoCorrFile, StreamMode::WRITE );
2569 
2570         if( xSrcStg.is() && xDstStg.is() )
2571         {
2572             std::unique_ptr<SvStringsISortDtor> pTmpWordList;
2573 
2574             if (xSrcStg->IsContained( pXMLImplWrdStt_ExcptLstStr ) )
2575                 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xSrcStg );
2576 
2577             if (pTmpWordList)
2578             {
2579                 SaveExceptList_Imp( *pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xDstStg, true );
2580                 pTmpWordList.reset();
2581             }
2582 
2583 
2584             if (xSrcStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
2585                 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg );
2586 
2587             if (pTmpWordList)
2588             {
2589                 SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true );
2590                 pTmpWordList->clear();
2591             }
2592 
2593             GetAutocorrWordList();
2594             MakeBlocklist_Imp( *xDstStg );
2595             sShareAutoCorrFile = sUserAutoCorrFile;
2596             xDstStg = nullptr;
2597             try
2598             {
2599                 ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2600                 aContent.executeCommand ( "delete", makeAny ( true ) );
2601             }
2602             catch (...)
2603             {
2604             }
2605         }
2606     }
2607     else if( bCopy && !bError )
2608         sShareAutoCorrFile = sUserAutoCorrFile;
2609 }
2610 
2611 bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg )
2612 {
2613     bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty();
2614     if( !bRemove )
2615     {
2616         tools::SvRef<SotStorageStream> refList = rStg.OpenSotStream( pXMLImplAutocorr_ListStr,
2617                     ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2618         if( refList.is() )
2619         {
2620             refList->SetSize( 0 );
2621             refList->SetBufferSize( 8192 );
2622             refList->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2623 
2624             uno::Reference< uno::XComponentContext > xContext =
2625                 comphelper::getProcessComponentContext();
2626 
2627             uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
2628             uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList );
2629             xWriter->setOutputStream(xOut);
2630 
2631             rtl::Reference< SvXMLAutoCorrectExport > xExp( new SvXMLAutoCorrectExport( xContext, pAutocorr_List.get(), pXMLImplAutocorr_ListStr, xWriter ) );
2632 
2633             xExp->exportDoc( XML_BLOCK_LIST );
2634 
2635             refList->Commit();
2636             bRet = ERRCODE_NONE == refList->GetError();
2637             if( bRet )
2638             {
2639                 refList.clear();
2640                 rStg.Commit();
2641                 if( ERRCODE_NONE != rStg.GetError() )
2642                 {
2643                     bRemove = true;
2644                     bRet = false;
2645                 }
2646             }
2647         }
2648         else
2649             bRet = false;
2650     }
2651 
2652     if( bRemove )
2653     {
2654         rStg.Remove( pXMLImplAutocorr_ListStr );
2655         rStg.Commit();
2656     }
2657 
2658     return bRet;
2659 }
2660 
2661 bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries )
2662 {
2663     // First get the current list!
2664     GetAutocorrWordList();
2665 
2666     MakeUserStorage_Impl();
2667     tools::SvRef<SotStorage> xStorage = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2668 
2669     bool bRet = xStorage.is() && ERRCODE_NONE == xStorage->GetError();
2670 
2671     if( bRet )
2672     {
2673         for (SvxAutocorrWord & aWordToDelete : aDeleteEntries)
2674         {
2675             std::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete );
2676             if( xFoundEntry )
2677             {
2678                 if( !xFoundEntry->IsTextOnly() )
2679                 {
2680                     OUString aName( aWordToDelete.GetShort() );
2681                     if (xStorage->IsOLEStorage())
2682                         aName = EncryptBlockName_Imp(aName);
2683                     else
2684                         GeneratePackageName ( aWordToDelete.GetShort(), aName );
2685 
2686                     if( xStorage->IsContained( aName ) )
2687                     {
2688                         xStorage->Remove( aName );
2689                         bRet = xStorage->Commit();
2690                     }
2691                 }
2692             }
2693         }
2694 
2695         for (const SvxAutocorrWord & aNewEntrie : aNewEntries)
2696         {
2697             SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true );
2698             std::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd );
2699             if( xRemoved )
2700             {
2701                 if( !xRemoved->IsTextOnly() )
2702                 {
2703                     // Still have to remove the Storage
2704                     OUString sStorageName( aWordToAdd.GetShort() );
2705                     if (xStorage->IsOLEStorage())
2706                         sStorageName = EncryptBlockName_Imp(sStorageName);
2707                     else
2708                         GeneratePackageName ( aWordToAdd.GetShort(), sStorageName);
2709 
2710                     if( xStorage->IsContained( sStorageName ) )
2711                         xStorage->Remove( sStorageName );
2712                 }
2713             }
2714             bRet = pAutocorr_List->Insert( std::move(aWordToAdd) );
2715 
2716             if ( !bRet )
2717             {
2718                 break;
2719             }
2720         }
2721 
2722         if ( bRet )
2723         {
2724             bRet = MakeBlocklist_Imp( *xStorage );
2725         }
2726     }
2727     return bRet;
2728 }
2729 
2730 bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong )
2731 {
2732     // First get the current list!
2733     GetAutocorrWordList();
2734 
2735     MakeUserStorage_Impl();
2736     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2737 
2738     bool bRet = xStg.is() && ERRCODE_NONE == xStg->GetError();
2739 
2740     // Update the word list
2741     if( bRet )
2742     {
2743         SvxAutocorrWord aNew(rShort, rLong, true );
2744         std::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew );
2745         if( xRemove )
2746         {
2747             if( !xRemove->IsTextOnly() )
2748             {
2749                 // Still have to remove the Storage
2750                 OUString sStgNm( rShort );
2751                 if (xStg->IsOLEStorage())
2752                     sStgNm = EncryptBlockName_Imp(sStgNm);
2753                 else
2754                     GeneratePackageName ( rShort, sStgNm);
2755 
2756                 if( xStg->IsContained( sStgNm ) )
2757                     xStg->Remove( sStgNm );
2758             }
2759         }
2760 
2761         if( pAutocorr_List->Insert( std::move(aNew) ) )
2762         {
2763             bRet = MakeBlocklist_Imp( *xStg );
2764             xStg = nullptr;
2765         }
2766         else
2767         {
2768             bRet = false;
2769         }
2770     }
2771     return bRet;
2772 }
2773 
2774 void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort,
2775                                                SfxObjectShell& rShell )
2776 {
2777     // First get the current list!
2778     GetAutocorrWordList();
2779 
2780     MakeUserStorage_Impl();
2781 
2782     try
2783     {
2784         uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE );
2785         OUString sLong;
2786         bool bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong );
2787         xStg = nullptr;
2788 
2789         // Update the word list
2790         if( bRet )
2791         {
2792             if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) )
2793             {
2794                 tools::SvRef<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2795                 MakeBlocklist_Imp( *xStor );
2796             }
2797         }
2798     }
2799     catch ( const uno::Exception& )
2800     {
2801     }
2802 }
2803 
2804 // Keep the list sorted ...
2805 struct SvxAutocorrWordList::CompareSvxAutocorrWordList
2806 {
2807     bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const
2808     {
2809         CollatorWrapper& rCmp = ::GetCollatorWrapper();
2810         return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0;
2811     }
2812 };
2813 
2814 namespace {
2815 
2816 typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType;
2817 
2818 }
2819 
2820 struct SvxAutocorrWordList::Impl
2821 {
2822 
2823     // only one of these contains the data
2824     // maSortedVector is manually sorted so we can optimise data movement
2825     mutable AutocorrWordSetType maSortedVector;
2826     mutable AutocorrWordHashType maHash; // key is 'Short'
2827 
2828     void DeleteAndDestroyAll()
2829     {
2830         maHash.clear();
2831         maSortedVector.clear();
2832     }
2833 };
2834 
2835 SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {}
2836 
2837 SvxAutocorrWordList::~SvxAutocorrWordList()
2838 {
2839 }
2840 
2841 void SvxAutocorrWordList::DeleteAndDestroyAll()
2842 {
2843     mpImpl->DeleteAndDestroyAll();
2844 }
2845 
2846 // returns true if inserted
2847 const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const
2848 {
2849     if ( mpImpl->maSortedVector.empty() ) // use the hash
2850     {
2851         OUString aShort = aWord.GetShort();
2852         auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) );
2853         if (inserted)
2854             return &(it->second);
2855         return nullptr;
2856     }
2857     else
2858     {
2859         auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList());
2860         CollatorWrapper& rCmp = ::GetCollatorWrapper();
2861         if (it == mpImpl->maSortedVector.end() || rCmp.compareString( aWord.GetShort(), it->GetShort() ) != 0)
2862         {
2863             it = mpImpl->maSortedVector.insert(it, std::move(aWord));
2864             return &*it;
2865         }
2866         return nullptr;
2867     }
2868 }
2869 
2870 void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt)
2871 {
2872     (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt ));
2873 }
2874 
2875 bool SvxAutocorrWordList::empty() const
2876 {
2877     return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty();
2878 }
2879 
2880 std::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord *pWord)
2881 {
2882 
2883     if ( mpImpl->maSortedVector.empty() ) // use the hash
2884     {
2885         AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() );
2886         if( it != mpImpl->maHash.end() )
2887         {
2888             SvxAutocorrWord pMatch = std::move(it->second);
2889             mpImpl->maHash.erase (it);
2890             return pMatch;
2891         }
2892     }
2893     else
2894     {
2895         auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList());
2896         if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it))
2897         {
2898             SvxAutocorrWord pMatch = std::move(*it);
2899             mpImpl->maSortedVector.erase (it);
2900             return pMatch;
2901         }
2902     }
2903     return std::optional<SvxAutocorrWord>();
2904 }
2905 
2906 // return the sorted contents - defer sorting until we have to.
2907 const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const
2908 {
2909     // convert from hash to set permanently
2910     if ( mpImpl->maSortedVector.empty() )
2911     {
2912         std::vector<SvxAutocorrWord> tmp;
2913         tmp.reserve(mpImpl->maHash.size());
2914         for (auto & rPair : mpImpl->maHash)
2915             tmp.emplace_back(std::move(rPair.second));
2916         mpImpl->maHash.clear();
2917         // sort twice - this gets the list into mostly-sorted order, which
2918         // reduces the number of times we need to invoke the expensive ICU collate fn.
2919         std::sort(tmp.begin(), tmp.end(),
2920             [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs )
2921             {
2922                 return lhs.GetShort() < rhs.GetShort();
2923             });
2924         // This beast has some O(N log(N)) in a terribly slow ICU collate fn.
2925         // stable_sort is twice as fast as sort in this situation because it does
2926         // fewer comparison operations.
2927         std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList());
2928         mpImpl->maSortedVector = std::move(tmp);
2929     }
2930     return mpImpl->maSortedVector;
2931 }
2932 
2933 const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd,
2934                                       const OUString &rTxt,
2935                                       sal_Int32 &rStt,
2936                                       sal_Int32 nEndPos) const
2937 {
2938     const OUString& rChk = pFnd->GetShort();
2939 
2940     sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern?
2941     sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern?
2942     sal_Int32 nSttWdPos = nEndPos;
2943 
2944     // direct replacement of keywords surrounded by colons (for example, ":name:")
2945     bool bColonNameColon = rTxt.getLength() > nEndPos &&
2946         rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":");
2947     if ( nEndPos + (bColonNameColon ? 1 : 0) >= rChk.getLength() - left_wildcard - right_wildcard )
2948     {
2949 
2950         bool bWasWordDelim = false;
2951         sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard;
2952         if (bColonNameColon)
2953             nCalcStt++;
2954         if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon ||
2955               ( nCalcStt < rStt &&
2956                 IsWordDelim( rTxt[ nCalcStt - 1 ] ))) )
2957         {
2958             TransliterationWrapper& rCmp = GetIgnoreTranslWrapper();
2959             OUString sWord = rTxt.copy(nCalcStt, rChk.getLength() - left_wildcard);
2960             if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) ))
2961             {
2962                 rStt = nCalcStt;
2963                 if (!left_wildcard)
2964                 {
2965                     // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14
2966                     if (rTxt.getLength() > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1)
2967                         return nullptr;
2968                     return pFnd;
2969                 }
2970                 // get the first word delimiter position before the matching ".*word" pattern
2971                 while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ])))
2972                     ;
2973                 if (bWasWordDelim) rStt++;
2974                 OUString left_pattern = rTxt.copy(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard);
2975                 // avoid double spaces before simple "word" replacement
2976                 left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().copy(1) : pFnd->GetLong();
2977                 if( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(rTxt.copy(rStt, nEndPos - rStt), left_pattern) ) )
2978                     return pNew;
2979             }
2980         } else
2981         // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support
2982         if ( right_wildcard )
2983         {
2984 
2985             OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) );
2986             // Get the last word delimiter position
2987             bool not_suffix;
2988 
2989             while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ])))
2990                 ;
2991             // search the first occurrence (with a left word delimitation, if needed)
2992             sal_Int32 nFndPos = -1;
2993             do {
2994                 nFndPos = rTxt.indexOf( sTmp, nFndPos + 1);
2995                 if (nFndPos == -1)
2996                     break;
2997                 not_suffix = bWasWordDelim && (nSttWdPos >= (nFndPos + sTmp.getLength()));
2998             } while ( (!left_wildcard && nFndPos && !IsWordDelim( rTxt[ nFndPos - 1 ])) || not_suffix );
2999 
3000             if ( nFndPos != -1 )
3001             {
3002                 sal_Int32 extra_repl = nFndPos + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:"
3003 
3004                 if ( left_wildcard )
3005                 {
3006                     // get the first word delimiter position before the matching ".*word.*" pattern
3007                     while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ])))
3008                         ;
3009                     if (bWasWordDelim) nFndPos++;
3010                 }
3011                 if (nEndPos + extra_repl <= nFndPos)
3012                 {
3013                     return nullptr;
3014                 }
3015                 // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations"
3016                 OUString aShort = rTxt.copy(nFndPos, nEndPos - nFndPos + extra_repl);
3017 
3018                 OUString aLong;
3019                 rStt = nFndPos;
3020                 if ( !left_wildcard )
3021                 {
3022                     sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength();
3023                     aLong = pFnd->GetLong() + (siz > 0 ? rTxt.copy(nFndPos + sTmp.getLength(), siz) : "");
3024                 } else {
3025                     OUStringBuffer buf;
3026                     do {
3027                         nSttWdPos = rTxt.indexOf( sTmp, nFndPos);
3028                         if (nSttWdPos != -1)
3029                         {
3030                             sal_Int32 nTmp(nFndPos);
3031                             while (nTmp < nSttWdPos && !IsWordDelim(rTxt[nTmp]))
3032                                 nTmp++;
3033                             if (nTmp < nSttWdPos)
3034                                 break; // word delimiter found
3035                             buf.append(rTxt.subView(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong());
3036                             nFndPos = nSttWdPos + sTmp.getLength();
3037                         }
3038                     } while (nSttWdPos != -1);
3039                     if (nEndPos - nFndPos > extra_repl)
3040                         buf.append(rTxt.subView(nFndPos, nEndPos - nFndPos));
3041                     aLong = buf.makeStringAndClear();
3042                 }
3043                 if ( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(aShort, aLong) ) )
3044                 {
3045                     if ( (rTxt.getLength() > nEndPos && IsWordDelim(rTxt[nEndPos])) || rTxt.getLength() == nEndPos )
3046                         return pNew;
3047                 }
3048             }
3049         }
3050     }
3051     return nullptr;
3052 }
3053 
3054 const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(const OUString& rTxt, sal_Int32& rStt,
3055                                                               sal_Int32 nEndPos) const
3056 {
3057     for (auto const& elem : mpImpl->maHash)
3058     {
3059         if( const SvxAutocorrWord *pTmp = WordMatches( &elem.second, rTxt, rStt, nEndPos ) )
3060             return pTmp;
3061     }
3062 
3063     for (auto const& elem : mpImpl->maSortedVector)
3064     {
3065         if( const SvxAutocorrWord *pTmp = WordMatches( &elem, rTxt, rStt, nEndPos ) )
3066             return pTmp;
3067     }
3068     return nullptr;
3069 }
3070 
3071 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
3072