xref: /core/editeng/source/misc/svxacorr.cxx (revision c73b9868)
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <memory>
21 #include <string_view>
22 #include <sal/config.h>
23 
24 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
25 #include <com/sun/star/embed/XStorage.hpp>
26 #include <com/sun/star/io/IOException.hpp>
27 #include <com/sun/star/io/XStream.hpp>
28 #include <tools/urlobj.hxx>
29 #include <i18nlangtag/mslangid.hxx>
30 #include <i18nutil/transliteration.hxx>
31 #include <sal/log.hxx>
32 #include <osl/diagnose.h>
33 #include <vcl/svapp.hxx>
34 #include <vcl/settings.hxx>
35 #include <svl/fstathelper.hxx>
36 #include <svl/urihelper.hxx>
37 #include <unotools/charclass.hxx>
38 #include <com/sun/star/i18n/UnicodeType.hpp>
39 #include <unotools/collatorwrapper.hxx>
40 #include <com/sun/star/i18n/UnicodeScript.hpp>
41 #include <com/sun/star/i18n/OrdinalSuffix.hpp>
42 #include <unotools/localedatawrapper.hxx>
43 #include <unotools/transliterationwrapper.hxx>
44 #include <comphelper/processfactory.hxx>
45 #include <comphelper/storagehelper.hxx>
46 #include <comphelper/string.hxx>
47 #include <editeng/editids.hrc>
48 #include <sot/storage.hxx>
49 #include <editeng/udlnitem.hxx>
50 #include <editeng/wghtitem.hxx>
51 #include <editeng/postitem.hxx>
52 #include <editeng/crossedoutitem.hxx>
53 #include <editeng/escapementitem.hxx>
54 #include <editeng/svxacorr.hxx>
55 #include <editeng/unolingu.hxx>
56 #include <vcl/window.hxx>
57 #include <com/sun/star/xml/sax/InputSource.hpp>
58 #include <com/sun/star/xml/sax/FastParser.hpp>
59 #include <com/sun/star/xml/sax/Writer.hpp>
60 #include <com/sun/star/xml/sax/SAXParseException.hpp>
61 #include <unotools/streamwrap.hxx>
62 #include "SvXMLAutoCorrectImport.hxx"
63 #include "SvXMLAutoCorrectExport.hxx"
64 #include "SvXMLAutoCorrectTokenHandler.hxx"
65 #include <ucbhelper/content.hxx>
66 #include <com/sun/star/ucb/ContentCreationException.hpp>
67 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
68 #include <com/sun/star/ucb/TransferInfo.hpp>
69 #include <com/sun/star/ucb/NameClash.hpp>
70 #include <tools/diagnose_ex.h>
71 #include <xmloff/xmltoken.hxx>
72 #include <unordered_map>
73 #include <rtl/character.hxx>
74 
75 using namespace ::com::sun::star::ucb;
76 using namespace ::com::sun::star::uno;
77 using namespace ::com::sun::star::xml::sax;
78 using namespace ::com::sun::star;
79 using namespace ::xmloff::token;
80 using namespace ::utl;
81 
82 namespace {
83 
84 enum class Flags {
85     NONE            = 0x00,
86     FullStop        = 0x01,
87     ExclamationMark = 0x02,
88     QuestionMark    = 0x04,
89 };
90 
91 }
92 
93 namespace o3tl {
94     template<> struct typed_flags<Flags> : is_typed_flags<Flags, 0x07> {};
95 }
96 static const sal_Unicode cNonBreakingSpace = 0xA0;
97 
98 static const char pXMLImplWrdStt_ExcptLstStr[] = "WordExceptList.xml";
99 static const char pXMLImplCplStt_ExcptLstStr[] = "SentenceExceptList.xml";
100 static const char pXMLImplAutocorr_ListStr[]   = "DocumentList.xml";
101 
102 static const char
103     /* also at these beginnings - Brackets and all kinds of begin characters */
104     sImplSttSkipChars[] = "\"\'([{\x83\x84\x89\x91\x92\x93\x94",
105     /* also at these ends - Brackets and all kinds of begin characters */
106     sImplEndSkipChars[] = "\"\')]}\x83\x84\x89\x91\x92\x93\x94";
107 
108 static OUString EncryptBlockName_Imp(const OUString& rName);
109 
110 static bool NonFieldWordDelim( const sal_Unicode c )
111 {
112     return ' ' == c || '\t' == c || 0x0a == c ||
113             cNonBreakingSpace == c || 0x2011 == c;
114 }
115 
116 static bool IsWordDelim( const sal_Unicode c )
117 {
118     return c == 0x1 || NonFieldWordDelim(c);
119 }
120 
121 
122 static bool IsLowerLetter( sal_Int32 nCharType )
123 {
124     return CharClass::isLetterType( nCharType ) &&
125            ( css::i18n::KCharacterType::LOWER & nCharType);
126 }
127 
128 static bool IsUpperLetter( sal_Int32 nCharType )
129 {
130     return CharClass::isLetterType( nCharType ) &&
131             ( css::i18n::KCharacterType::UPPER & nCharType);
132 }
133 
134 static bool lcl_IsUnsupportedUnicodeChar( CharClass const & rCC, const OUString& rTxt,
135                                    sal_Int32 nStt, sal_Int32 nEnd )
136 {
137     for( ; nStt < nEnd; ++nStt )
138     {
139         css::i18n::UnicodeScript nScript = rCC.getScript( rTxt, nStt );
140         switch( nScript )
141         {
142             case css::i18n::UnicodeScript_kCJKRadicalsSupplement:
143             case css::i18n::UnicodeScript_kHangulJamo:
144             case css::i18n::UnicodeScript_kCJKSymbolPunctuation:
145             case css::i18n::UnicodeScript_kHiragana:
146             case css::i18n::UnicodeScript_kKatakana:
147             case css::i18n::UnicodeScript_kHangulCompatibilityJamo:
148             case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth:
149             case css::i18n::UnicodeScript_kCJKCompatibility:
150             case css::i18n::UnicodeScript_k_CJKUnifiedIdeographsExtensionA:
151             case css::i18n::UnicodeScript_kCJKUnifiedIdeograph:
152             case css::i18n::UnicodeScript_kHangulSyllable:
153             case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph:
154             case css::i18n::UnicodeScript_kHalfwidthFullwidthForm:
155                 return true;
156             default: ; //do nothing
157         }
158     }
159     return false;
160 }
161 
162 static bool lcl_IsSymbolChar( CharClass const & rCC, const OUString& rTxt,
163                                   sal_Int32 nStt, sal_Int32 nEnd )
164 {
165     for( ; nStt < nEnd; ++nStt )
166     {
167         if( css::i18n::UnicodeType::PRIVATE_USE == rCC.getType( rTxt, nStt ))
168             return true;
169     }
170     return false;
171 }
172 
173 static bool lcl_IsInAsciiArr( const char* pArr, const sal_Unicode c )
174 {
175     // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks
176     if ( 0x2018 <= c && c <= 0x201F && (pArr == sImplSttSkipChars || pArr == sImplEndSkipChars) )
177         return true;
178 
179     bool bRet = false;
180     for( ; *pArr; ++pArr )
181         if( *pArr == c )
182         {
183             bRet = true;
184             break;
185         }
186     return bRet;
187 }
188 
189 SvxAutoCorrDoc::~SvxAutoCorrDoc()
190 {
191 }
192 
193 // Called by the functions:
194 //  - FnCapitalStartWord
195 //  - FnCapitalStartSentence
196 // after the exchange of characters. Then the words, if necessary, can be inserted
197 // into the exception list.
198 void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&,
199                                         sal_Unicode )
200 {
201 }
202 
203 LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32 ) const
204 {
205     return LANGUAGE_SYSTEM;
206 }
207 
208 static const LanguageTag& GetAppLang()
209 {
210     return Application::GetSettings().GetLanguageTag();
211 }
212 
213 /// Never use an unresolved LANGUAGE_SYSTEM.
214 static LanguageType GetDocLanguage( const SvxAutoCorrDoc& rDoc, sal_Int32 nPos )
215 {
216     LanguageType eLang = rDoc.GetLanguage( nPos );
217     if (eLang == LANGUAGE_SYSTEM)
218         eLang = GetAppLang().getLanguageType();     // the current work locale
219     return eLang;
220 }
221 
222 static LocaleDataWrapper& GetLocaleDataWrapper( LanguageType nLang )
223 {
224     static LocaleDataWrapper aLclDtWrp( GetAppLang() );
225     LanguageTag aLcl( nLang );
226     const LanguageTag& rLcl = aLclDtWrp.getLoadedLanguageTag();
227     if( aLcl != rLcl )
228         aLclDtWrp.setLanguageTag( aLcl );
229     return aLclDtWrp;
230 }
231 static TransliterationWrapper& GetIgnoreTranslWrapper()
232 {
233     static int bIsInit = 0;
234     static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(),
235                 TransliterationFlags::IGNORE_KANA |
236                 TransliterationFlags::IGNORE_WIDTH );
237     if( !bIsInit )
238     {
239         aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() );
240         bIsInit = 1;
241     }
242     return aWrp;
243 }
244 static CollatorWrapper& GetCollatorWrapper()
245 {
246     static CollatorWrapper aCollWrp = [&]()
247     {
248         CollatorWrapper tmp( ::comphelper::getProcessComponentContext() );
249         tmp.loadDefaultCollator( GetAppLang().getLocale(), 0 );
250         return tmp;
251     }();
252     return aCollWrp;
253 }
254 
255 bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar )
256 {
257     return  cChar == '\0' || cChar == '\t' || cChar == 0x0a ||
258             cChar == ' '  || cChar == '\'' || cChar == '\"' ||
259             cChar == '*'  || cChar == '_'  || cChar == '%' ||
260             cChar == '.'  || cChar == ','  || cChar == ';' ||
261             cChar == ':'  || cChar == '?' || cChar == '!' ||
262             cChar == '/'  || cChar == '-';
263 }
264 
265 namespace
266 {
267     bool IsCompoundWordDelimChar(sal_Unicode cChar)
268     {
269         return  cChar == '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar);
270     }
271 }
272 
273 bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar )
274 {
275     return cChar == '%' || cChar == ';' || cChar == ':'  || cChar == '?' || cChar == '!' ||
276         cChar == '/' /*case for the urls exception*/;
277 }
278 
279 ACFlags SvxAutoCorrect::GetDefaultFlags()
280 {
281     ACFlags nRet = ACFlags::Autocorrect
282                     | ACFlags::CapitalStartSentence
283                     | ACFlags::CapitalStartWord
284                     | ACFlags::ChgOrdinalNumber
285                     | ACFlags::ChgToEnEmDash
286                     | ACFlags::AddNonBrkSpace
287                     | ACFlags::ChgWeightUnderl
288                     | ACFlags::SetINetAttr
289                     | ACFlags::ChgQuotes
290                     | ACFlags::SaveWordCplSttLst
291                     | ACFlags::SaveWordWrdSttLst
292                     | ACFlags::CorrectCapsLock;
293     LanguageType eLang = GetAppLang().getLanguageType();
294     if( eLang.anyOf(
295         LANGUAGE_ENGLISH,
296         LANGUAGE_ENGLISH_US,
297         LANGUAGE_ENGLISH_UK,
298         LANGUAGE_ENGLISH_AUS,
299         LANGUAGE_ENGLISH_CAN,
300         LANGUAGE_ENGLISH_NZ,
301         LANGUAGE_ENGLISH_EIRE,
302         LANGUAGE_ENGLISH_SAFRICA,
303         LANGUAGE_ENGLISH_JAMAICA,
304         LANGUAGE_ENGLISH_CARIBBEAN))
305         nRet &= ~ACFlags(ACFlags::ChgQuotes|ACFlags::ChgSglQuotes);
306     return nRet;
307 }
308 
309 static constexpr sal_Unicode cEmDash = 0x2014;
310 static constexpr sal_Unicode cEnDash = 0x2013;
311 static constexpr sal_Unicode cApostrophe = 0x2019;
312 
313 SvxAutoCorrect::SvxAutoCorrect( const OUString& rShareAutocorrFile,
314                                 const OUString& rUserAutocorrFile )
315     : sShareAutoCorrFile( rShareAutocorrFile )
316     , sUserAutoCorrFile( rUserAutocorrFile )
317     , eCharClassLang( LANGUAGE_DONTKNOW )
318     , nFlags(SvxAutoCorrect::GetDefaultFlags())
319     , cStartDQuote( 0 )
320     , cEndDQuote( 0 )
321     , cStartSQuote( 0 )
322     , cEndSQuote( 0 )
323 {
324 }
325 
326 SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy )
327     : sShareAutoCorrFile( rCpy.sShareAutoCorrFile )
328     , sUserAutoCorrFile( rCpy.sUserAutoCorrFile )
329     , aSwFlags( rCpy.aSwFlags )
330     , eCharClassLang(rCpy.eCharClassLang)
331     , nFlags( rCpy.nFlags & ~ACFlags(ACFlags::ChgWordLstLoad|ACFlags::CplSttLstLoad|ACFlags::WrdSttLstLoad))
332     , cStartDQuote( rCpy.cStartDQuote )
333     , cEndDQuote( rCpy.cEndDQuote )
334     , cStartSQuote( rCpy.cStartSQuote )
335     , cEndSQuote( rCpy.cEndSQuote )
336 {
337 }
338 
339 
340 SvxAutoCorrect::~SvxAutoCorrect()
341 {
342 }
343 
344 void SvxAutoCorrect::GetCharClass_( LanguageType eLang )
345 {
346     pCharClass.reset( new CharClass( LanguageTag( eLang)) );
347     eCharClassLang = eLang;
348 }
349 
350 void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag, bool bOn )
351 {
352     ACFlags nOld = nFlags;
353     nFlags = bOn ? nFlags | nFlag
354                  : nFlags & ~nFlag;
355 
356     if( !bOn )
357     {
358         if( (nOld & ACFlags::CapitalStartSentence) != (nFlags & ACFlags::CapitalStartSentence) )
359             nFlags &= ~ACFlags::CplSttLstLoad;
360         if( (nOld & ACFlags::CapitalStartWord) != (nFlags & ACFlags::CapitalStartWord) )
361             nFlags &= ~ACFlags::WrdSttLstLoad;
362         if( (nOld & ACFlags::Autocorrect) != (nFlags & ACFlags::Autocorrect) )
363             nFlags &= ~ACFlags::ChgWordLstLoad;
364     }
365 }
366 
367 
368 // Correct TWo INitial CApitals
369 void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
370                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
371                                     LanguageType eLang )
372 {
373     CharClass& rCC = GetCharClass( eLang );
374 
375     // Delete all non alphanumeric. Test the characters at the beginning/end of
376     // the word ( recognizes: "(min.", "/min.", and so on.)
377     for( ; nSttPos < nEndPos; ++nSttPos )
378         if( rCC.isLetterNumeric( rTxt, nSttPos ))
379             break;
380     for( ; nSttPos < nEndPos; --nEndPos )
381         if( rCC.isLetterNumeric( rTxt, nEndPos - 1 ))
382             break;
383 
384     // Is the word a compounded word separated by delimiters?
385     // If so, keep track of all delimiters so each constituent
386     // word can be checked for two initial capital letters.
387     std::deque<sal_Int32> aDelimiters;
388 
389     // Always check for two capitals at the beginning
390     // of the entire word, so start at nSttPos.
391     aDelimiters.push_back(nSttPos);
392 
393     // Find all compound word delimiters
394     for (sal_Int32 n = nSttPos; n < nEndPos; ++n)
395     {
396         if (IsCompoundWordDelimChar(rTxt[ n ]))
397         {
398             aDelimiters.push_back( n + 1 ); // Get position of char after delimiter
399         }
400     }
401 
402     // Decide where to put the terminating delimiter.
403     // If the last AutoCorrect char was a newline, then the AutoCorrect
404     // char will not be included in rTxt.
405     // If the last AutoCorrect char was not a newline, then the AutoCorrect
406     // character will be the last character in rTxt.
407     if (!IsCompoundWordDelimChar(rTxt[nEndPos-1]))
408         aDelimiters.push_back(nEndPos);
409 
410     // Iterate through the word and all words that compose it.
411     // Two capital letters at the beginning of word?
412     for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI)
413     {
414         nSttPos = aDelimiters[nI];
415         nEndPos = aDelimiters[nI + 1];
416 
417         if( nSttPos+2 < nEndPos &&
418             IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) &&
419             IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) &&
420             // Is the third character a lower case
421             IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) &&
422             // Do not replace special attributes
423             0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ])
424         {
425             // test if the word is in an exception list
426             OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 ));
427             if( !FindInWrdSttExceptList(eLang, sWord) )
428             {
429                 // Check that word isn't correctly spelled before correcting:
430                 css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller =
431                     LinguMgr::GetSpellChecker();
432                 if( xSpeller->hasLanguage(static_cast<sal_uInt16>(eLang)) )
433                 {
434                     Sequence< css::beans::PropertyValue > aEmptySeq;
435                     if (xSpeller->isValid(sWord, static_cast<sal_uInt16>(eLang), aEmptySeq))
436                     {
437                         return;
438                     }
439                 }
440                 sal_Unicode cSave = rTxt[ nSttPos ];
441                 OUString sChar = rCC.lowercase( OUString(cSave) );
442                 if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ))
443                 {
444                     if( ACFlags::SaveWordWrdSttLst & nFlags )
445                         rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave );
446                 }
447             }
448         }
449     }
450 }
451 
452 // Format ordinal numbers suffixes (1st -> 1^st)
453 bool SvxAutoCorrect::FnChgOrdinalNumber(
454     SvxAutoCorrDoc& rDoc, const OUString& rTxt,
455     sal_Int32 nSttPos, sal_Int32 nEndPos,
456     LanguageType eLang)
457 {
458     // 1st, 2nd, 3rd, 4 - 0th
459     // 201th or 201st
460     // 12th or 12nd
461     bool bChg = false;
462 
463     // In some languages ordinal suffixes should never be
464     // changed to superscript. Let's break for those languages.
465     if (!eLang.anyOf(
466          LANGUAGE_SWEDISH,
467          LANGUAGE_SWEDISH_FINLAND))
468     {
469         CharClass& rCC = GetCharClass(eLang);
470 
471         for (; nSttPos < nEndPos; ++nSttPos)
472             if (!lcl_IsInAsciiArr(sImplSttSkipChars, rTxt[nSttPos]))
473                 break;
474         for (; nSttPos < nEndPos; --nEndPos)
475             if (!lcl_IsInAsciiArr(sImplEndSkipChars, rTxt[nEndPos - 1]))
476                 break;
477 
478 
479         // Get the last number in the string to check
480         sal_Int32 nNumEnd = nEndPos;
481         bool bFoundEnd = false;
482         bool isValidNumber = true;
483         sal_Int32 i = nEndPos;
484         while (i > nSttPos)
485         {
486             i--;
487             bool isDigit = rCC.isDigit(rTxt, i);
488             if (bFoundEnd)
489                 isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i));
490 
491             if (isDigit && !bFoundEnd)
492             {
493                 bFoundEnd = true;
494                 nNumEnd = i;
495             }
496         }
497 
498         if (bFoundEnd && isValidNumber) {
499             sal_Int32 nNum = rTxt.copy(nSttPos, nNumEnd - nSttPos + 1).toInt32();
500 
501             // Check if the characters after that number correspond to the ordinal suffix
502             uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix
503                 = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext());
504 
505             uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale());
506             for (sal_Int32 nSuff = 0; nSuff < aSuffixes.getLength(); nSuff++)
507             {
508                 OUString sSuffix(aSuffixes[nSuff]);
509                 OUString sEnd = rTxt.copy(nNumEnd + 1, nEndPos - nNumEnd - 1);
510 
511                 if (sSuffix == sEnd)
512                 {
513                     // Check if the ordinal suffix has to be set as super script
514                     if (rCC.isLetter(sSuffix))
515                     {
516                         // Do the change
517                         SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER,
518                             DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT);
519                         rDoc.SetAttr(nNumEnd + 1, nEndPos,
520                             SID_ATTR_CHAR_ESCAPEMENT,
521                             aSvxEscapementItem);
522                         bChg = true;
523                     }
524                 }
525             }
526         }
527     }
528     return bChg;
529 }
530 
531 // Replace dashes
532 bool SvxAutoCorrect::FnChgToEnEmDash(
533                                 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
534                                 sal_Int32 nSttPos, sal_Int32 nEndPos,
535                                 LanguageType eLang )
536 {
537     bool bRet = false;
538     CharClass& rCC = GetCharClass( eLang );
539     if (eLang == LANGUAGE_SYSTEM)
540         eLang = GetAppLang().getLanguageType();
541     bool bAlwaysUseEmDash = (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN);
542 
543     // replace " - " or " --" with "enDash"
544     if( 1 < nSttPos && 1 <= nEndPos - nSttPos )
545     {
546         sal_Unicode cCh = rTxt[ nSttPos ];
547         if( '-' == cCh )
548         {
549             if( 1 < nEndPos - nSttPos &&
550                 ' ' == rTxt[ nSttPos-1 ] &&
551                 '-' == rTxt[ nSttPos+1 ])
552             {
553                 sal_Int32 n;
554                 for( n = nSttPos+2; n < nEndPos && lcl_IsInAsciiArr(
555                             sImplSttSkipChars,(cCh = rTxt[ n ]));
556                         ++n )
557                     ;
558 
559                 // found: " --[<AnySttChars>][A-z0-9]
560                 if( rCC.isLetterNumeric( OUString(cCh) ) )
561                 {
562                     for( n = nSttPos-1; n && lcl_IsInAsciiArr(
563                             sImplEndSkipChars,(cCh = rTxt[ --n ])); )
564                         ;
565 
566                     // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9]
567                     if( rCC.isLetterNumeric( OUString(cCh) ))
568                     {
569                         rDoc.Delete( nSttPos, nSttPos + 2 );
570                         rDoc.Insert( nSttPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) );
571                         bRet = true;
572                     }
573                 }
574             }
575         }
576         else if( 3 < nSttPos &&
577                  ' ' == rTxt[ nSttPos-1 ] &&
578                  '-' == rTxt[ nSttPos-2 ])
579         {
580             sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2;
581             if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) )
582             {
583                 --nTmpPos;
584                 ++nLen;
585                 cCh = rTxt[ nTmpPos-1 ];
586             }
587             if( ' ' == cCh )
588             {
589                 for( n = nSttPos; n < nEndPos && lcl_IsInAsciiArr(
590                             sImplSttSkipChars,(cCh = rTxt[ n ]));
591                         ++n )
592                     ;
593 
594                 // found: " - [<AnySttChars>][A-z0-9]
595                 if( rCC.isLetterNumeric( OUString(cCh) ) )
596                 {
597                     cCh = ' ';
598                     for( n = nTmpPos-1; n && lcl_IsInAsciiArr(
599                             sImplEndSkipChars,(cCh = rTxt[ --n ])); )
600                             ;
601                     // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9]
602                     if( rCC.isLetterNumeric( OUString(cCh) ))
603                     {
604                         rDoc.Delete( nTmpPos, nTmpPos + nLen );
605                         rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) );
606                         bRet = true;
607                     }
608                 }
609             }
610         }
611     }
612 
613     // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash"
614     // [0-9]--[0-9] double dash always replaced with "enDash"
615     // Finnish and Hungarian use enDash instead of emDash.
616     bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH);
617     if( 4 <= nEndPos - nSttPos )
618     {
619         OUString sTmp( rTxt.copy( nSttPos, nEndPos - nSttPos ) );
620         sal_Int32 nFndPos = sTmp.indexOf("--");
621         if( nFndPos != -1 && nFndPos &&
622             nFndPos + 2 < sTmp.getLength() &&
623             ( rCC.isLetterNumeric( sTmp, nFndPos - 1 ) ||
624               lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nFndPos - 1 ] )) &&
625             ( rCC.isLetterNumeric( sTmp, nFndPos + 2 ) ||
626             lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nFndPos + 2 ] )))
627         {
628             nSttPos = nSttPos + nFndPos;
629             rDoc.Delete( nSttPos, nSttPos + 2 );
630             rDoc.Insert( nSttPos, (bEnDash || (rCC.isDigit( sTmp, nFndPos - 1 ) &&
631                 rCC.isDigit( sTmp, nFndPos + 2 )) ? OUString(cEnDash) : OUString(cEmDash)) );
632             bRet = true;
633         }
634     }
635     return bRet;
636 }
637 
638 // Add non-breaking space before specific punctuation marks in French text
639 bool SvxAutoCorrect::FnAddNonBrkSpace(
640                                 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
641                                 sal_Int32 nEndPos,
642                                 LanguageType eLang, bool& io_bNbspRunNext )
643 {
644     bool bRet = false;
645 
646     CharClass& rCC = GetCharClass( eLang );
647 
648     if ( rCC.getLanguageTag().getLanguage() == "fr" )
649     {
650         bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA");
651         OUString allChars = ":;?!%";
652         OUString chars( allChars );
653         if ( bFrCA )
654             chars = ":";
655 
656         sal_Unicode cChar = rTxt[ nEndPos ];
657         bool bHasSpace = chars.indexOf( cChar ) != -1;
658         bool bIsSpecial = allChars.indexOf( cChar ) != -1;
659         if ( bIsSpecial )
660         {
661             // Get the last word delimiter position
662             sal_Int32 nSttWdPos = nEndPos;
663             bool bWasWordDelim = false;
664             while( nSttWdPos )
665             {
666                 bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]);
667                 if (bWasWordDelim)
668                     break;
669             }
670 
671             //See if the text is the start of a protocol string, e.g. have text of
672             //"http" see if it is the start of "http:" and if so leave it alone
673             sal_Int32 nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0);
674             sal_Int32 nProtocolLen = nEndPos - nSttWdPos + 1;
675             if (nIndex + nProtocolLen <= rTxt.getLength())
676             {
677                 if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid)
678                     return false;
679             }
680 
681             // Check the presence of "://" in the word
682             sal_Int32 nStrPos = rTxt.indexOf( "://", nSttWdPos + 1 );
683             if ( nStrPos == -1 && nEndPos > 0 )
684             {
685                 // Check the previous char
686                 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
687                 if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' )
688                 {
689                     // Remove any previous normal space
690                     sal_Int32 nPos = nEndPos - 1;
691                     while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace )
692                     {
693                         if ( nPos == 0 ) break;
694                         nPos--;
695                         cPrevChar = rTxt[ nPos ];
696                     }
697 
698                     nPos++;
699                     if ( nEndPos - nPos > 0 )
700                         rDoc.Delete( nPos, nEndPos );
701 
702                     // Add the non-breaking space at the end pos
703                     if ( bHasSpace )
704                         rDoc.Insert( nPos, OUString(cNonBreakingSpace) );
705                     io_bNbspRunNext = true;
706                     bRet = true;
707                 }
708                 else if ( chars.indexOf( cPrevChar ) != -1 )
709                     io_bNbspRunNext = true;
710             }
711         }
712         else if ( cChar == '/' && nEndPos > 1 && rTxt.getLength() > (nEndPos - 1) )
713         {
714             // Remove the hardspace right before to avoid formatting URLs
715             sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
716             sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ];
717             if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace )
718             {
719                 rDoc.Delete( nEndPos - 2, nEndPos - 1 );
720                 bRet = true;
721             }
722         }
723     }
724 
725     return bRet;
726 }
727 
728 // URL recognition
729 bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
730                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
731                                     LanguageType eLang )
732 {
733     OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos,
734                                                 GetCharClass( eLang ) ));
735     bool bRet = !sURL.isEmpty();
736     if( bRet )          // so, set attribute:
737         rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
738     return bRet;
739 }
740 
741 // Automatic *bold*, /italic/, -strikeout- and _underline_
742 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
743                                         sal_Int32 nEndPos )
744 {
745     // Condition:
746     //  at the beginning:   _, *, / or ~ after Space with the following !Space
747     //  at the end:         _, *, / or ~ before Space (word delimiter?)
748 
749     sal_Unicode cInsChar = rTxt[ nEndPos ];  // underline, bold, italic or strikeout
750     if( ++nEndPos != rTxt.getLength() &&
751         !IsWordDelim( rTxt[ nEndPos ] ) )
752         return false;
753 
754     --nEndPos;
755 
756     bool bAlphaNum = false;
757     sal_Int32 nPos = nEndPos;
758     sal_Int32  nFndPos = -1;
759     CharClass& rCC = GetCharClass( LANGUAGE_SYSTEM );
760 
761     while( nPos )
762     {
763         switch( sal_Unicode c = rTxt[ --nPos ] )
764         {
765         case '_':
766         case '-':
767         case '/':
768         case '*':
769             if( c == cInsChar )
770             {
771                 if( bAlphaNum && nPos+1 < nEndPos && ( !nPos ||
772                     IsWordDelim( rTxt[ nPos-1 ])) &&
773                     !IsWordDelim( rTxt[ nPos+1 ]))
774                         nFndPos = nPos;
775                 else
776                     // Condition is not satisfied, so cancel
777                     nFndPos = -1;
778                 nPos = 0;
779             }
780             break;
781         default:
782             if( !bAlphaNum )
783                 bAlphaNum = rCC.isLetterNumeric( rTxt, nPos );
784         }
785     }
786 
787     if( -1 != nFndPos )
788     {
789         // first delete the Character at the end - this allows insertion
790         // of an empty hint in SetAttr which would be removed by Delete
791         // (fdo#62536, AUTOFMT in Writer)
792         rDoc.Delete( nEndPos, nEndPos + 1 );
793         rDoc.Delete( nFndPos, nFndPos + 1 );
794         // Span the Attribute over the area
795         // the end.
796         if( '*' == cInsChar )           // Bold
797         {
798             SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT );
799             rDoc.SetAttr( nFndPos, nEndPos - 1,
800                           SID_ATTR_CHAR_WEIGHT,
801                           aSvxWeightItem);
802         }
803         else if( '/' == cInsChar )           // Italic
804         {
805             SvxPostureItem aSvxPostureItem( ITALIC_NORMAL, SID_ATTR_CHAR_POSTURE );
806             rDoc.SetAttr( nFndPos, nEndPos - 1,
807                           SID_ATTR_CHAR_POSTURE,
808                           aSvxPostureItem);
809         }
810         else if( '-' == cInsChar )           // Strikeout
811         {
812             SvxCrossedOutItem aSvxCrossedOutItem( STRIKEOUT_SINGLE, SID_ATTR_CHAR_STRIKEOUT );
813             rDoc.SetAttr( nFndPos, nEndPos - 1,
814                           SID_ATTR_CHAR_STRIKEOUT,
815                           aSvxCrossedOutItem);
816         }
817         else                            // Underline
818         {
819             SvxUnderlineItem aSvxUnderlineItem( LINESTYLE_SINGLE, SID_ATTR_CHAR_UNDERLINE );
820             rDoc.SetAttr( nFndPos, nEndPos - 1,
821                           SID_ATTR_CHAR_UNDERLINE,
822                           aSvxUnderlineItem);
823         }
824       }
825 
826     return -1 != nFndPos;
827 }
828 
829 // Capitalize first letter of every sentence
830 void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc& rDoc,
831                                     const OUString& rTxt, bool bNormalPos,
832                                     sal_Int32 nSttPos, sal_Int32 nEndPos,
833                                     LanguageType eLang )
834 {
835 
836     if( rTxt.isEmpty() || nEndPos <= nSttPos )
837         return;
838 
839     CharClass& rCC = GetCharClass( eLang );
840     OUString aText( rTxt );
841     const sal_Unicode *pStart = aText.getStr(),
842                       *pStr = pStart + nEndPos,
843                       *pWordStt = nullptr,
844                       *pDelim = nullptr;
845 
846     bool bAtStart = false;
847     do {
848         --pStr;
849         if (rCC.isLetter(aText, pStr - pStart))
850         {
851             if( !pWordStt )
852                 pDelim = pStr+1;
853             pWordStt = pStr;
854         }
855         else if (pWordStt && !rCC.isDigit(aText, pStr - pStart))
856         {
857             if( (lcl_IsInAsciiArr( "-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words
858                 pWordStt - 1 == pStr &&
859                 // Installation at beginning of paragraph. Replaced < by <= (#i38971#)
860                 (pStart + 1) <= pStr &&
861                 rCC.isLetter(aText, pStr-1 - pStart))
862                 pWordStt = --pStr;
863             else
864                 break;
865         }
866         bAtStart = (pStart == pStr);
867     } while( !bAtStart );
868 
869     if (!pWordStt)
870         return;    // no character to be replaced
871 
872 
873     if (rCC.isDigit(aText, pStr - pStart))
874         return; // already ok
875 
876     if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart)))
877         return; // already ok
878 
879     //See if the text is the start of a protocol string, e.g. have text of
880     //"http" see if it is the start of "http:" and if so leave it alone
881     sal_Int32 nIndex = pWordStt - pStart;
882     sal_Int32 nProtocolLen = pDelim - pWordStt + 1;
883     if (nIndex + nProtocolLen <= rTxt.getLength())
884     {
885         if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid)
886             return; // already ok
887     }
888 
889     if (0x1 == *pWordStt || 0x2 == *pWordStt)
890         return; // already ok
891 
892     // Only capitalize, if string before specified characters is long enough
893     if( *pDelim && 2 >= pDelim - pWordStt &&
894         lcl_IsInAsciiArr( ".-)>", *pDelim ) )
895         return;
896 
897     // tdf#59666 don't capitalize single Greek letters (except in Greek texts)
898     if ( 1 == pDelim - pWordStt && 0x03B1 <= *pWordStt && *pWordStt <= 0x03C9 && eLang != LANGUAGE_GREEK )
899         return;
900 
901     if( !bAtStart ) // Still no beginning of a paragraph?
902     {
903         if (NonFieldWordDelim(*pStr))
904         {
905             for (;;)
906             {
907                 bAtStart = (pStart == pStr--);
908                 if (bAtStart || !NonFieldWordDelim(*pStr))
909                     break;
910             }
911         }
912         // Asian full stop, full width full stop, full width exclamation mark
913         // and full width question marks are treated as word delimiters
914         else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr &&
915                   0xFF1F != *pStr )
916             return; // no valid separator -> no replacement
917     }
918 
919     // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
920     if (FindInWrdSttExceptList(eLang, OUString(pWordStt, pDelim - pWordStt)))
921         return;
922 
923     if( bAtStart )  // at the beginning of a paragraph?
924     {
925         // Check out the previous paragraph, if it exists.
926         // If so, then check to paragraph separator at the end.
927         OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos);
928         if (!pPrevPara)
929         {
930             // valid separator -> replace
931             OUString sChar( *pWordStt );
932             sChar = rCC.titlecase(sChar); //see fdo#56740
933             if (!comphelper::string::equals(sChar, *pWordStt))
934                rDoc.ReplaceRange( pWordStt - pStart, 1, sChar );
935             return;
936         }
937 
938         aText = *pPrevPara;
939         bAtStart = false;
940         pStart = aText.getStr();
941         pStr = pStart + aText.getLength();
942 
943         do {            // overwrite all blanks
944             --pStr;
945             if (!NonFieldWordDelim(*pStr))
946                 break;
947             bAtStart = (pStart == pStr);
948         } while( !bAtStart );
949 
950         if( bAtStart )
951             return;  // no valid separator -> no replacement
952     }
953 
954     // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator.
955     // all three can happen, but not more than once!
956     const sal_Unicode* pExceptStt = nullptr;
957     bool bContinue = true;
958     Flags nFlag = Flags::NONE;
959     do
960     {
961         switch (*pStr)
962         {
963             // Western and Asian full stop
964             case '.':
965             case 0x3002:
966             case 0xFF0E:
967             {
968                 if (pStr >= pStart + 2 && *(pStr - 2) == '.')
969                 {
970                     //e.g. text "f.o.o. word": Now currently considering
971                     //capitalizing word but second last character of
972                     //previous word is a .  So probably last word is an
973                     //anagram that ends in . and not truly the end of a
974                     //previous sentence, so don't autocapitalize this word
975                     return;
976                 }
977                 if (nFlag & Flags::FullStop)
978                     return; // no valid separator -> no replacement
979                 nFlag |= Flags::FullStop;
980                 pExceptStt = pStr;
981             }
982             break;
983             case '!':
984             case 0xFF01:
985             {
986                 if (nFlag & Flags::ExclamationMark)
987                     return; // no valid separator -> no replacement
988                 nFlag |= Flags::ExclamationMark;
989             }
990             break;
991             case '?':
992             case 0xFF1F:
993             {
994                 if (nFlag & Flags::QuestionMark)
995                     return; // no valid separator -> no replacement
996                 nFlag |= Flags::QuestionMark;
997             }
998             break;
999             default:
1000                 if (nFlag == Flags::NONE)
1001                     return; // no valid separator -> no replacement
1002                 else
1003                     bContinue = false;
1004                 break;
1005         }
1006 
1007         if (bContinue && pStr-- == pStart)
1008         {
1009             return; // no valid separator -> no replacement
1010         }
1011     } while (bContinue);
1012     if (Flags::FullStop != nFlag)
1013         pExceptStt = nullptr;
1014 
1015     // Only capitalize, if string is long enough
1016     if( 2 > ( pStr - pStart ) )
1017         return;
1018 
1019     if (!rCC.isLetterNumeric(aText, pStr-- - pStart))
1020     {
1021         bool bValid = false, bAlphaFnd = false;
1022         const sal_Unicode* pTmpStr = pStr;
1023         while( !bValid )
1024         {
1025             if( rCC.isDigit( aText, pTmpStr - pStart ) )
1026             {
1027                 bValid = true;
1028                 pStr = pTmpStr - 1;
1029             }
1030             else if( rCC.isLetter( aText, pTmpStr - pStart ) )
1031             {
1032                 if( bAlphaFnd )
1033                 {
1034                     bValid = true;
1035                     pStr = pTmpStr;
1036                 }
1037                 else
1038                     bAlphaFnd = true;
1039             }
1040             else if (bAlphaFnd || NonFieldWordDelim(*pTmpStr))
1041                 break;
1042 
1043             if( pTmpStr == pStart )
1044                 break;
1045 
1046             --pTmpStr;
1047         }
1048 
1049         if( !bValid )
1050             return;       // no valid separator -> no replacement
1051     }
1052 
1053     bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9';
1054 
1055     // Search for the beginning of the word
1056     while (!NonFieldWordDelim(*pStr))
1057     {
1058         if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) )
1059             bNumericOnly = false;
1060 
1061         if( pStart == pStr )
1062             break;
1063 
1064         --pStr;
1065     }
1066 
1067     if( bNumericOnly )      // consists of only numbers, then not
1068         return;
1069 
1070     if (NonFieldWordDelim(*pStr))
1071         ++pStr;
1072 
1073     OUString sWord;
1074 
1075     // check on the basis of the exception list
1076     if( pExceptStt )
1077     {
1078         sWord = OUString(pStr, pExceptStt - pStr + 1);
1079         if( FindInCplSttExceptList(eLang, sWord) )
1080             return;
1081 
1082         // Delete all non alphanumeric. Test the characters at the
1083         // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.)
1084         OUString sTmp( sWord );
1085         while( !sTmp.isEmpty() &&
1086                 !rCC.isLetterNumeric( sTmp, 0 ) )
1087             sTmp = sTmp.copy(1);
1088 
1089         // Remove all non alphanumeric characters towards the end up until
1090         // the last one.
1091         sal_Int32 nLen = sTmp.getLength();
1092         while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) )
1093             --nLen;
1094         if( nLen + 1 < sTmp.getLength() )
1095             sTmp = sTmp.copy( 0, nLen + 1 );
1096 
1097         if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() &&
1098             FindInCplSttExceptList(eLang, sTmp))
1099             return;
1100 
1101         if(FindInCplSttExceptList(eLang, sWord, true))
1102             return;
1103     }
1104 
1105     // Ok, then replace
1106     sal_Unicode cSave = *pWordStt;
1107     nSttPos = pWordStt - rTxt.getStr();
1108     OUString sChar = rCC.titlecase(OUString(cSave)); //see fdo#56740
1109     bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar );
1110 
1111     // Perhaps someone wants to have the word
1112     if( bRet && ACFlags::SaveWordCplSttLst & nFlags )
1113         rDoc.SaveCpltSttWord( ACFlags::CapitalStartSentence, nSttPos, sWord, cSave );
1114 }
1115 
1116 // Correct accidental use of cAPS LOCK key
1117 bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1118                                         sal_Int32 nSttPos, sal_Int32 nEndPos,
1119                                         LanguageType eLang )
1120 {
1121     if (nEndPos - nSttPos < 2)
1122         // string must be at least 2-character long.
1123         return false;
1124 
1125     CharClass& rCC = GetCharClass( eLang );
1126 
1127     // Check the first 2 letters.
1128     if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) )
1129         return false;
1130 
1131     if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) )
1132         return false;
1133 
1134     OUStringBuffer aConverted;
1135     aConverted.append( rCC.uppercase(OUString(rTxt[nSttPos])) );
1136     aConverted.append( rCC.lowercase(OUString(rTxt[nSttPos+1])) );
1137 
1138     // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
1139     if (FindInWrdSttExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos)))
1140         return false;
1141 
1142     for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i )
1143     {
1144         if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) )
1145             // A lowercase letter disqualifies the whole text.
1146             return false;
1147 
1148         if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) )
1149             // Another uppercase letter.  Convert it.
1150             aConverted.append( rCC.lowercase(OUString(rTxt[i])) );
1151         else
1152             // This is not an alphabetic letter.  Leave it as-is.
1153             aConverted.append( rTxt[i] );
1154     }
1155 
1156     // Replace the word.
1157     rDoc.Delete(nSttPos, nEndPos);
1158     rDoc.Insert(nSttPos, aConverted.makeStringAndClear());
1159 
1160     return true;
1161 }
1162 
1163 
1164 sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote,
1165                                         LanguageType eLang ) const
1166 {
1167     sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar
1168                                     ? GetStartDoubleQuote()
1169                                     : GetStartSingleQuote() )
1170                                    : ( '\"' == cInsChar
1171                                     ? GetEndDoubleQuote()
1172                                     : GetEndSingleQuote() );
1173     if( !cRet )
1174     {
1175         // then through the Language find the right character
1176         if( LANGUAGE_NONE == eLang )
1177             cRet = cInsChar;
1178         else
1179         {
1180             LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1181             OUString sRet( bSttQuote
1182                             ? ( '\"' == cInsChar
1183                                 ? rLcl.getDoubleQuotationMarkStart()
1184                                 : rLcl.getQuotationMarkStart() )
1185                             : ( '\"' == cInsChar
1186                                 ? rLcl.getDoubleQuotationMarkEnd()
1187                                 : rLcl.getQuotationMarkEnd() ));
1188             cRet = !sRet.isEmpty() ? sRet[0] : cInsChar;
1189         }
1190     }
1191     return cRet;
1192 }
1193 
1194 void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos,
1195                                     sal_Unicode cInsChar, bool bSttQuote,
1196                                     bool bIns, bool b_iApostrophe ) const
1197 {
1198     const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1199     sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang );
1200 
1201     OUString sChg( cInsChar );
1202     if( bIns )
1203         rDoc.Insert( nInsPos, sChg );
1204     else
1205         rDoc.Replace( nInsPos, sChg );
1206 
1207     sChg = OUString(cRet);
1208 
1209     if( '\"' == cInsChar )
1210     {
1211         if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS)
1212         {
1213             OUString s( cNonBreakingSpace ); // UNICODE code for no break space
1214             if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, s ))
1215             {
1216                 if( !bSttQuote )
1217                     ++nInsPos;
1218             }
1219         }
1220     }
1221 
1222     rDoc.Replace( nInsPos, sChg );
1223 
1224     // i' -> I' in English (last step for the undo)
1225     if( b_iApostrophe && eLang.anyOf(
1226         LANGUAGE_ENGLISH,
1227         LANGUAGE_ENGLISH_US,
1228         LANGUAGE_ENGLISH_UK,
1229         LANGUAGE_ENGLISH_AUS,
1230         LANGUAGE_ENGLISH_CAN,
1231         LANGUAGE_ENGLISH_NZ,
1232         LANGUAGE_ENGLISH_EIRE,
1233         LANGUAGE_ENGLISH_SAFRICA,
1234         LANGUAGE_ENGLISH_JAMAICA,
1235         LANGUAGE_ENGLISH_CARIBBEAN))
1236     {
1237         rDoc.Replace( nInsPos-1, "I" );
1238     }
1239 }
1240 
1241 OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPos,
1242                                 sal_Unicode cInsChar, bool bSttQuote )
1243 {
1244     const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1245     sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang );
1246 
1247     OUString sRet(cRet);
1248 
1249     if( '\"' == cInsChar )
1250     {
1251         if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS)
1252         {
1253             if( bSttQuote )
1254                 sRet += " ";
1255             else
1256                 sRet = " " + sRet;
1257         }
1258     }
1259     return sRet;
1260 }
1261 
1262 // WARNING: rText may become invalid, see comment below
1263 void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1264                                     sal_Int32 nInsPos, sal_Unicode cChar,
1265                                     bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin )
1266 {
1267     bool bIsNextRun = io_bNbspRunNext;
1268     io_bNbspRunNext = false;  // if it was set, then it has to be turned off
1269 
1270     do{                                 // only for middle check loop !!
1271         if( cChar )
1272         {
1273             // Prevent double space
1274             if( nInsPos && ' ' == cChar &&
1275                 IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace ) &&
1276                 ' ' == rTxt[ nInsPos - 1 ])
1277             {
1278                 break;
1279             }
1280 
1281             bool bSingle = '\'' == cChar;
1282             bool bIsReplaceQuote =
1283                         (IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('\"' == cChar )) ||
1284                         (IsAutoCorrFlag( ACFlags::ChgSglQuotes ) && bSingle );
1285             if( bIsReplaceQuote )
1286             {
1287                 sal_Unicode cPrev;
1288                 bool bSttQuote = !nInsPos;
1289                 bool b_iApostrophe = false;
1290                 if (!bSttQuote)
1291                 {
1292                     cPrev = rTxt[ nInsPos-1 ];
1293                     bSttQuote = NonFieldWordDelim(cPrev) ||
1294                         lcl_IsInAsciiArr( "([{", cPrev ) ||
1295                         ( cEmDash == cPrev ) ||
1296                         ( cEnDash == cPrev );
1297                     // tdf#38394 use opening quotation mark << in French l'<<word>>
1298                     if ( !bSingle && !bSttQuote && cPrev == cApostrophe &&
1299                         (nInsPos == 2 || (nInsPos > 2 && IsWordDelim( rTxt[ nInsPos-3 ] ))) )
1300                     {
1301                         const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1302                         if ( primary(eLang) == primary(LANGUAGE_FRENCH) )
1303                             bSttQuote = true;
1304                     }
1305                     // tdf#108423 for capitalization of English i'm
1306                     b_iApostrophe = bSingle && ( cPrev == 'i' ) &&
1307                         (( nInsPos == 1 ) || IsWordDelim( rTxt[ nInsPos-2 ] ));
1308                 }
1309                 InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, b_iApostrophe );
1310                 break;
1311             }
1312 
1313             if( bInsert )
1314                 rDoc.Insert( nInsPos, OUString(cChar) );
1315             else
1316                 rDoc.Replace( nInsPos, OUString(cChar) );
1317 
1318             // Hardspaces autocorrection
1319             if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace ) )
1320             {
1321                 if ( NeedsHardspaceAutocorr( cChar ) &&
1322                     FnAddNonBrkSpace( rDoc, rTxt, nInsPos, GetDocLanguage( rDoc, nInsPos ), io_bNbspRunNext ) )
1323                 {
1324                     ;
1325                 }
1326                 else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) )
1327                 {
1328                     // Remove the NBSP if it wasn't an autocorrection
1329                     if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) &&
1330                             cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace )
1331                     {
1332                         // Look for the last HARD_SPACE
1333                         sal_Int32 nPos = nInsPos - 1;
1334                         bool bContinue = true;
1335                         while ( bContinue )
1336                         {
1337                             const sal_Unicode cTmpChar = rTxt[ nPos ];
1338                             if ( cTmpChar == cNonBreakingSpace )
1339                             {
1340                                 rDoc.Delete( nPos, nPos + 1 );
1341                                 bContinue = false;
1342                             }
1343                             else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 )
1344                                 bContinue = false;
1345                             nPos--;
1346                         }
1347                     }
1348                 }
1349             }
1350         }
1351 
1352         if( !nInsPos )
1353             break;
1354 
1355         sal_Int32 nPos = nInsPos - 1;
1356 
1357         if( IsWordDelim( rTxt[ nPos ]))
1358             break;
1359 
1360         // Set bold or underline automatically?
1361         if (('*' == cChar || '_' == cChar || '/' == cChar || '-' == cChar) && (nPos+1 < rTxt.getLength()))
1362         {
1363             if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl ) )
1364             {
1365                 FnChgWeightUnderl( rDoc, rTxt, nPos+1 );
1366             }
1367             break;
1368         }
1369 
1370         while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1371             ;
1372 
1373         // Found a Paragraph-start or a Blank, search for the word shortcut in
1374         // auto.
1375         sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
1376         if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1377             --nCapLttrPos;          // begin of paragraph and no blank
1378 
1379         const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1380         CharClass& rCC = GetCharClass( eLang );
1381 
1382         // no symbol characters
1383         if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos ))
1384             break;
1385 
1386         if( IsAutoCorrFlag( ACFlags::Autocorrect ) )
1387         {
1388             // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1389             // and becomes INVALID if ChgAutoCorrWord returns true!
1390             // => use aPara/pPara to create a valid copy of the string!
1391             OUString aPara;
1392             OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr;
1393 
1394             bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos,
1395                                                     *this, pPara );
1396             if( !bChgWord )
1397             {
1398                 sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos;
1399                 while( nCapLttrPos1 < nInsPos &&
1400                         lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] )
1401                         )
1402                         ++nCapLttrPos1;
1403                 while( nCapLttrPos1 < nInsPos1 && nInsPos1 &&
1404                         lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] )
1405                         )
1406                         --nInsPos1;
1407 
1408                 if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) &&
1409                     nCapLttrPos1 < nInsPos1 &&
1410                     rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara ))
1411                 {
1412                     bChgWord = true;
1413                     nCapLttrPos = nCapLttrPos1;
1414                 }
1415             }
1416 
1417             if( bChgWord )
1418             {
1419                 if( !aPara.isEmpty() )
1420                 {
1421                     sal_Int32 nEnd = nCapLttrPos;
1422                     while( nEnd < aPara.getLength() &&
1423                             !IsWordDelim( aPara[ nEnd ]))
1424                         ++nEnd;
1425 
1426                     // Capital letter at beginning of paragraph?
1427                     if( IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1428                     {
1429                         FnCapitalStartSentence( rDoc, aPara, false,
1430                                                 nCapLttrPos, nEnd, eLang );
1431                     }
1432 
1433                     if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1434                     {
1435                         FnChgToEnEmDash( rDoc, aPara, nCapLttrPos, nEnd, eLang );
1436                     }
1437                 }
1438                 break;
1439             }
1440         }
1441 
1442         if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber ) &&
1443                 (nInsPos >= 2 ) &&       // fdo#69762 avoid autocorrect for 2e-3
1444                 ( '-' != cChar || 'E' != rtl::toAsciiUpperCase(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) &&
1445                 FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
1446             ( IsAutoCorrFlag( ACFlags::SetINetAttr ) &&
1447                 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
1448                 FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
1449             ;
1450         else
1451         {
1452             bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK);
1453             bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos );
1454 
1455             if ( bLockKeyOn && IsAutoCorrFlag( ACFlags::CorrectCapsLock ) &&
1456                  FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) )
1457             {
1458                 // Correct accidental use of cAPS LOCK key (do this only when
1459                 // the caps or shift lock key is pressed). Turn off the caps
1460                 // lock afterwards.
1461                 pFrameWin->SimulateKeyPress( KEY_CAPSLOCK );
1462             }
1463 
1464             // Capital letter at beginning of paragraph ?
1465             if( !bUnsupported &&
1466                 IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1467             {
1468                 FnCapitalStartSentence( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang );
1469             }
1470 
1471             // Two capital letters at beginning of word ??
1472             if( !bUnsupported &&
1473                 IsAutoCorrFlag( ACFlags::CapitalStartWord ) )
1474             {
1475                 FnCapitalStartWord( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1476             }
1477 
1478             if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1479             {
1480                 FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1481             }
1482         }
1483 
1484     } while( false );
1485 }
1486 
1487 SvxAutoCorrectLanguageLists& SvxAutoCorrect::GetLanguageList_(
1488                                                         LanguageType eLang )
1489 {
1490     LanguageTag aLanguageTag( eLang);
1491     if (m_aLangTable.find(aLanguageTag) == m_aLangTable.end())
1492         (void)CreateLanguageFile(aLanguageTag);
1493     return *(m_aLangTable.find(aLanguageTag)->second);
1494 }
1495 
1496 void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang )
1497 {
1498     auto const iter = m_aLangTable.find(LanguageTag(eLang));
1499     if (iter != m_aLangTable.end() && iter->second)
1500         iter->second->SaveCplSttExceptList();
1501     else
1502     {
1503         SAL_WARN("editeng", "Save an empty list? ");
1504     }
1505 }
1506 
1507 void SvxAutoCorrect::SaveWrdSttExceptList(LanguageType eLang)
1508 {
1509     auto const iter = m_aLangTable.find(LanguageTag(eLang));
1510     if (iter != m_aLangTable.end() && iter->second)
1511         iter->second->SaveWrdSttExceptList();
1512     else
1513     {
1514         SAL_WARN("editeng", "Save an empty list? ");
1515     }
1516 }
1517 
1518 // Adds a single word. The list will immediately be written to the file!
1519 bool SvxAutoCorrect::AddCplSttException( const OUString& rNew,
1520                                         LanguageType eLang )
1521 {
1522     SvxAutoCorrectLanguageLists* pLists = nullptr;
1523     // either the right language is present or it will be this in the general list
1524     auto iter = m_aLangTable.find(LanguageTag(eLang));
1525     if (iter != m_aLangTable.end())
1526         pLists = iter->second.get();
1527     else
1528     {
1529         LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1530         iter = m_aLangTable.find(aLangTagUndetermined);
1531         if (iter != m_aLangTable.end())
1532             pLists = iter->second.get();
1533         else if(CreateLanguageFile(aLangTagUndetermined))
1534             pLists = m_aLangTable.find(aLangTagUndetermined)->second.get();
1535     }
1536     OSL_ENSURE(pLists, "No auto correction data");
1537     return pLists && pLists->AddToCplSttExceptList(rNew);
1538 }
1539 
1540 // Adds a single word. The list will immediately be written to the file!
1541 bool SvxAutoCorrect::AddWrtSttException( const OUString& rNew,
1542                                          LanguageType eLang )
1543 {
1544     SvxAutoCorrectLanguageLists* pLists = nullptr;
1545     //either the right language is present or it is set in the general list
1546     auto iter = m_aLangTable.find(LanguageTag(eLang));
1547     if (iter != m_aLangTable.end())
1548         pLists = iter->second.get();
1549     else
1550     {
1551         LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1552         iter = m_aLangTable.find(aLangTagUndetermined);
1553         if (iter != m_aLangTable.end())
1554             pLists = iter->second.get();
1555         else if(CreateLanguageFile(aLangTagUndetermined))
1556             pLists = m_aLangTable.find(aLangTagUndetermined)->second.get();
1557     }
1558     OSL_ENSURE(pLists, "No auto correction file!");
1559     return pLists && pLists->AddToWrdSttExceptList(rNew);
1560 }
1561 
1562 OUString SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const OUString& rTxt,
1563                                              sal_Int32 nPos)
1564 {
1565     OUString sRet;
1566     if( !nPos )
1567         return sRet;
1568 
1569     sal_Int32 nEnd = nPos;
1570 
1571     // it must be followed by a blank or tab!
1572     if( ( nPos < rTxt.getLength() &&
1573         !IsWordDelim( rTxt[ nPos ])) ||
1574         IsWordDelim( rTxt[ --nPos ]))
1575         return sRet;
1576 
1577     while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1578         ;
1579 
1580     // Found a Paragraph-start or a Blank, search for the word shortcut in
1581     // auto.
1582     sal_Int32 nCapLttrPos = nPos+1;        // on the 1st Character
1583     if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1584         --nCapLttrPos;          // Beginning of pargraph and no Blank!
1585 
1586     while( lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) )
1587         if( ++nCapLttrPos >= nEnd )
1588             return sRet;
1589 
1590     if( 3 > nEnd - nCapLttrPos )
1591         return sRet;
1592 
1593     const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1594 
1595     CharClass& rCC = GetCharClass(eLang);
1596 
1597     if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnd ))
1598         return sRet;
1599 
1600     sRet = rTxt.copy( nCapLttrPos, nEnd - nCapLttrPos );
1601     return sRet;
1602 }
1603 
1604 // static
1605 std::vector<OUString> SvxAutoCorrect::GetChunkForAutoText(const OUString& rTxt,
1606                                                           const sal_Int32 nPos)
1607 {
1608     constexpr sal_Int32 nMinLen = 3;
1609     constexpr sal_Int32 nMaxLen = 9;
1610     std::vector<OUString> aRes;
1611     if (nPos >= nMinLen)
1612     {
1613         sal_Int32 nBegin = std::max<sal_Int32>(nPos - nMaxLen, 0);
1614         // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation)
1615         if (nBegin > 0 && !IsWordDelim(rTxt[nBegin-1]))
1616         {
1617             while (nBegin + nMinLen <= nPos && !IsWordDelim(rTxt[nBegin]))
1618                 ++nBegin;
1619         }
1620         if (nBegin + nMinLen <= nPos)
1621         {
1622             OUString sRes = rTxt.copy(nBegin, nPos - nBegin);
1623             aRes.push_back(sRes);
1624             bool bLastStartedWithDelim = IsWordDelim(sRes[0]);
1625             for (sal_Int32 i = 1; i <= sRes.getLength() - nMinLen; ++i)
1626             {
1627                 bool bAdd = bLastStartedWithDelim;
1628                 bLastStartedWithDelim = IsWordDelim(sRes[i]);
1629                 bAdd = bAdd || bLastStartedWithDelim;
1630                 if (bAdd)
1631                     aRes.push_back(sRes.copy(i));
1632             }
1633         }
1634     }
1635     return aRes;
1636 }
1637 
1638 bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile )
1639 {
1640     OSL_ENSURE(m_aLangTable.find(rLanguageTag) == m_aLangTable.end(), "Language already exists ");
1641 
1642     OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true ));
1643     OUString sShareDirFile( sUserDirFile );
1644 
1645     SvxAutoCorrectLanguageLists* pLists = nullptr;
1646 
1647     tools::Time nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM ), nLastCheckTime( tools::Time::EMPTY );
1648 
1649     auto nFndPos = aLastFileTable.find(rLanguageTag);
1650     if(nFndPos != aLastFileTable.end() &&
1651        (nLastCheckTime.SetTime(nFndPos->second), nLastCheckTime < nAktTime) &&
1652        nAktTime - nLastCheckTime < nMinTime)
1653     {
1654         // no need to test the file, because the last check is not older then
1655         // 2 minutes.
1656         if( bNewFile )
1657         {
1658             sShareDirFile = sUserDirFile;
1659             pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile );
1660             LanguageTag aTmp(rLanguageTag);     // this insert() needs a non-const reference
1661             m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists)));
1662             aLastFileTable.erase(nFndPos);
1663         }
1664     }
1665     else if(
1666              ( FStatHelper::IsDocument( sUserDirFile ) ||
1667                FStatHelper::IsDocument( sShareDirFile =
1668                    GetAutoCorrFileName( rLanguageTag ) ) ||
1669                FStatHelper::IsDocument( sShareDirFile =
1670                    GetAutoCorrFileName( rLanguageTag, false, false, true) )
1671              ) ||
1672         ( sShareDirFile = sUserDirFile, bNewFile )
1673           )
1674     {
1675         pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile );
1676         LanguageTag aTmp(rLanguageTag);     // this insert() needs a non-const reference
1677         m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists)));
1678         if (nFndPos != aLastFileTable.end())
1679             aLastFileTable.erase(nFndPos);
1680     }
1681     else if( !bNewFile )
1682     {
1683         aLastFileTable[rLanguageTag] = nAktTime.GetTime();
1684     }
1685     return pLists != nullptr;
1686 }
1687 
1688 bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong,
1689                                 LanguageType eLang )
1690 {
1691     LanguageTag aLanguageTag( eLang);
1692     auto const iter = m_aLangTable.find(aLanguageTag);
1693     if (iter != m_aLangTable.end())
1694         return iter->second->PutText(rShort, rLong);
1695     if(CreateLanguageFile(aLanguageTag))
1696         return m_aLangTable.find(aLanguageTag)->second->PutText(rShort, rLong);
1697     return false;
1698 }
1699 
1700 void SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries,
1701                                               std::vector<SvxAutocorrWord>& aDeleteEntries,
1702                                               LanguageType eLang )
1703 {
1704     LanguageTag aLanguageTag( eLang);
1705     auto const iter = m_aLangTable.find(aLanguageTag);
1706     if (iter != m_aLangTable.end())
1707     {
1708         iter->second->MakeCombinedChanges( aNewEntries, aDeleteEntries );
1709     }
1710     else if(CreateLanguageFile( aLanguageTag ))
1711     {
1712         m_aLangTable.find( aLanguageTag )->second->MakeCombinedChanges( aNewEntries, aDeleteEntries );
1713     }
1714 }
1715 
1716 //  - return the replacement text (only for SWG-Format, all other
1717 //    can be taken from the word list!)
1718 bool SvxAutoCorrect::GetLongText( const OUString&, OUString& )
1719 {
1720     return false;
1721 }
1722 
1723 void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& )
1724 {
1725 }
1726 
1727 // Text with attribution (only the SWG - SWG format!)
1728 bool SvxAutoCorrect::PutText( const css::uno::Reference < css::embed::XStorage >&,
1729                               const OUString&, const OUString&, SfxObjectShell&, OUString& )
1730 {
1731     return false;
1732 }
1733 
1734 OUString EncryptBlockName_Imp(const OUString& rName)
1735 {
1736     OUStringBuffer aName;
1737     aName.append('#').append(rName);
1738     for (sal_Int32 nLen = rName.getLength(), nPos = 1; nPos < nLen; ++nPos)
1739     {
1740         if (lcl_IsInAsciiArr( "!/:.\\", aName[nPos]))
1741             aName[nPos] &= 0x0f;
1742     }
1743     return aName.makeStringAndClear();
1744 }
1745 
1746 /* This code is copied from SwXMLTextBlocks::GeneratePackageName */
1747 static void GeneratePackageName ( const OUString& rShort, OUString& rPackageName )
1748 {
1749     OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7));
1750     OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US));
1751 
1752     for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos)
1753     {
1754         switch (aBuf[nPos])
1755         {
1756             case '!':
1757             case '/':
1758             case ':':
1759             case '.':
1760             case '\\':
1761                 aBuf[nPos] = '_';
1762                 break;
1763             default:
1764                 break;
1765         }
1766     }
1767 
1768     rPackageName = aBuf.makeStringAndClear();
1769 }
1770 
1771 static const SvxAutocorrWord* lcl_SearchWordsInList(
1772                 SvxAutoCorrectLanguageLists* pList, const OUString& rTxt,
1773                 sal_Int32& rStt, sal_Int32 nEndPos)
1774 {
1775     const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList();
1776     return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos );
1777 }
1778 
1779 // the search for the words in the substitution table
1780 const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList(
1781                 const OUString& rTxt, sal_Int32& rStt, sal_Int32 nEndPos,
1782                 SvxAutoCorrDoc&, LanguageTag& rLang )
1783 {
1784     const SvxAutocorrWord* pRet = nullptr;
1785     LanguageTag aLanguageTag( rLang);
1786     if( aLanguageTag.isSystemLocale() )
1787         aLanguageTag.reset( MsLangId::getSystemLanguage());
1788 
1789     /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback
1790      * list instead? */
1791 
1792     // First search for eLang, then US-English -> English
1793     // and last in LANGUAGE_UNDETERMINED
1794     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
1795     {
1796         //the language is available - so bring it on
1797         std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
1798         pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
1799         if( pRet )
1800         {
1801             rLang = aLanguageTag;
1802             return pRet;
1803         }
1804     }
1805 
1806     // If it still could not be found here, then keep on searching
1807     LanguageType eLang = aLanguageTag.getLanguageType();
1808     // the primary language for example EN
1809     aLanguageTag.reset(aLanguageTag.getLanguage());
1810     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
1811     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
1812                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
1813                  CreateLanguageFile(aLanguageTag, false)))
1814     {
1815         //the language is available - so bring it on
1816         std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
1817         pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
1818         if( pRet )
1819         {
1820             rLang = aLanguageTag;
1821             return pRet;
1822         }
1823     }
1824 
1825     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
1826             CreateLanguageFile(aLanguageTag, false))
1827     {
1828         //the language is available - so bring it on
1829         std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second;
1830         pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos );
1831         if( pRet )
1832         {
1833             rLang = aLanguageTag;
1834             return pRet;
1835         }
1836     }
1837     return nullptr;
1838 }
1839 
1840 bool SvxAutoCorrect::FindInWrdSttExceptList( LanguageType eLang,
1841                                              const OUString& sWord )
1842 {
1843     LanguageTag aLanguageTag( eLang);
1844 
1845     /* TODO-BCP47: again horrible ugliness */
1846 
1847     // First search for eLang, then primary language of eLang
1848     // and last in LANGUAGE_UNDETERMINED
1849 
1850     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
1851     {
1852         //the language is available - so bring it on
1853         auto const& pList = m_aLangTable.find(aLanguageTag)->second;
1854         if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() )
1855             return true;
1856     }
1857 
1858     // If it still could not be found here, then keep on searching
1859     // the primary language for example EN
1860     aLanguageTag.reset(aLanguageTag.getLanguage());
1861     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
1862     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
1863                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
1864                  CreateLanguageFile(aLanguageTag, false)))
1865     {
1866         //the language is available - so bring it on
1867         auto const& pList = m_aLangTable.find(aLanguageTag)->second;
1868         if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() )
1869             return true;
1870     }
1871 
1872     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
1873             CreateLanguageFile(aLanguageTag, false))
1874     {
1875         //the language is available - so bring it on
1876         auto const& pList = m_aLangTable.find(aLanguageTag)->second;
1877         if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() )
1878             return true;
1879     }
1880     return false;
1881 }
1882 
1883 static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord)
1884 {
1885     OUString sAbk('~');
1886     SvStringsISortDtor::const_iterator it = pList->find( sAbk );
1887     SvStringsISortDtor::size_type nPos = it - pList->begin();
1888     if( nPos < pList->size() )
1889     {
1890         OUString sLowerWord(sWord.toAsciiLowerCase());
1891         OUString sAbr;
1892         for( SvStringsISortDtor::size_type n = nPos; n < pList->size(); ++n )
1893         {
1894             sAbr = (*pList)[ n ];
1895             if (sAbr[0] != '~')
1896                 break;
1897             // ~ and ~. are not allowed!
1898             if( 2 < sAbr.getLength() && sAbr.getLength() - 1 <= sWord.getLength() )
1899             {
1900                 OUString sLowerAbk(sAbr.toAsciiLowerCase());
1901                 for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;)
1902                 {
1903                     if( !--i )      // agrees
1904                         return true;
1905 
1906                     if( sLowerAbk[i] != sLowerWord[--ii])
1907                         break;
1908                 }
1909             }
1910         }
1911     }
1912     OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ),
1913             "Wrongly sorted exception list?" );
1914     return false;
1915 }
1916 
1917 bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang,
1918                                 const OUString& sWord, bool bAbbreviation)
1919 {
1920     LanguageTag aLanguageTag( eLang);
1921 
1922     /* TODO-BCP47: did I mention terrible horrible ugliness? */
1923 
1924     // First search for eLang, then primary language of eLang
1925     // and last in LANGUAGE_UNDETERMINED
1926 
1927     if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
1928     {
1929         //the language is available - so bring it on
1930         const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
1931         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
1932             return true;
1933     }
1934 
1935     // If it still could not be found here, then keep on searching
1936     // the primary language for example EN
1937     aLanguageTag.reset(aLanguageTag.getLanguage());
1938     LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
1939     if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
1940                 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
1941                  CreateLanguageFile(aLanguageTag, false)))
1942     {
1943         //the language is available - so bring it on
1944         const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
1945         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
1946             return true;
1947     }
1948 
1949     if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
1950             CreateLanguageFile(aLanguageTag, false))
1951     {
1952         //the language is available - so bring it on
1953         const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList();
1954         if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
1955             return true;
1956     }
1957     return false;
1958 }
1959 
1960 OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag,
1961                                             bool bNewFile, bool bTst, bool bUnlocalized ) const
1962 {
1963     OUString sRet, sExt( rLanguageTag.getBcp47() );
1964     if (bUnlocalized)
1965     {
1966         // we don't want variant, so we'll take "fr" instead of "fr-CA" for example
1967         std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false);
1968         if (!vecFallBackStrings.empty())
1969            sExt = vecFallBackStrings[0];
1970     }
1971 
1972     sExt = "_" + sExt + ".dat";
1973     if( bNewFile )
1974         sRet = sUserAutoCorrFile + sExt;
1975     else if( !bTst )
1976         sRet = sShareAutoCorrFile + sExt;
1977     else
1978     {
1979         // test first in the user directory - if not exist, then
1980         sRet = sUserAutoCorrFile + sExt;
1981         if( !FStatHelper::IsDocument( sRet ))
1982             sRet = sShareAutoCorrFile + sExt;
1983     }
1984     return sRet;
1985 }
1986 
1987 SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists(
1988                 SvxAutoCorrect& rParent,
1989                 const OUString& rShareAutoCorrectFile,
1990                 const OUString& rUserAutoCorrectFile)
1991 :   sShareAutoCorrFile( rShareAutoCorrectFile ),
1992     sUserAutoCorrFile( rUserAutoCorrectFile ),
1993     aModifiedDate( Date::EMPTY ),
1994     aModifiedTime( tools::Time::EMPTY ),
1995     aLastCheckTime( tools::Time::EMPTY ),
1996     rAutoCorrect(rParent),
1997     nFlags(ACFlags::NONE)
1998 {
1999 }
2000 
2001 SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists()
2002 {
2003 }
2004 
2005 bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp()
2006 {
2007     // Access the file system only every 2 minutes to check the date stamp
2008     bool bRet = false;
2009 
2010     tools::Time nMinTime( 0, 2 );
2011     tools::Time nAktTime( tools::Time::SYSTEM );
2012     if( aLastCheckTime <= nAktTime) // overflow?
2013         return false;
2014     nAktTime -= aLastCheckTime;
2015     if( nAktTime > nMinTime )     // min time past
2016     {
2017         Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY );
2018         if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2019                                             &aTstDate, &aTstTime ) &&
2020             ( aModifiedDate != aTstDate || aModifiedTime != aTstTime ))
2021         {
2022             bRet = true;
2023             // then remove all the lists fast!
2024             if( (ACFlags::CplSttLstLoad & nFlags) && pCplStt_ExcptLst )
2025             {
2026                 pCplStt_ExcptLst.reset();
2027             }
2028             if( (ACFlags::WrdSttLstLoad & nFlags) && pWrdStt_ExcptLst )
2029             {
2030                 pWrdStt_ExcptLst.reset();
2031             }
2032             if( (ACFlags::ChgWordLstLoad & nFlags) && pAutocorr_List )
2033             {
2034                 pAutocorr_List.reset();
2035             }
2036             nFlags &= ~ACFlags(ACFlags::CplSttLstLoad | ACFlags::WrdSttLstLoad | ACFlags::ChgWordLstLoad );
2037         }
2038         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2039     }
2040     return bRet;
2041 }
2042 
2043 void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp(
2044                                         std::unique_ptr<SvStringsISortDtor>& rpLst,
2045                                         const char* pStrmName,
2046                                         tools::SvRef<SotStorage>& rStg)
2047 {
2048     if( rpLst )
2049         rpLst->clear();
2050     else
2051         rpLst.reset( new SvStringsISortDtor );
2052 
2053     {
2054         const OUString sStrmName( pStrmName, strlen(pStrmName), RTL_TEXTENCODING_MS_1252 );
2055 
2056         if( rStg.is() && rStg->IsStream( sStrmName ) )
2057         {
2058             tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2059                 ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) );
2060             if( ERRCODE_NONE != xStrm->GetError())
2061             {
2062                 xStrm.clear();
2063                 rStg.clear();
2064                 RemoveStream_Imp( sStrmName );
2065             }
2066             else
2067             {
2068                 uno::Reference< uno::XComponentContext > xContext =
2069                     comphelper::getProcessComponentContext();
2070 
2071                 xml::sax::InputSource aParserInput;
2072                 aParserInput.sSystemId = sStrmName;
2073 
2074                 xStrm->Seek( 0 );
2075                 xStrm->SetBufferSize( 8 * 1024 );
2076                 aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm );
2077 
2078                 // get filter
2079                 rtl::Reference< SvXMLExceptionListImport > xImport = new SvXMLExceptionListImport ( xContext, *rpLst );
2080 
2081                 // connect parser and filter
2082                 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2083                 xImport->setTokenHandler( xTokenHandler );
2084 
2085                 // parse
2086                 try
2087                 {
2088                     xImport->parseStream( aParserInput );
2089                 }
2090                 catch( const xml::sax::SAXParseException& )
2091                 {
2092                     // re throw ?
2093                 }
2094                 catch( const xml::sax::SAXException& )
2095                 {
2096                     // re throw ?
2097                 }
2098                 catch( const io::IOException& )
2099                 {
2100                     // re throw ?
2101                 }
2102             }
2103         }
2104 
2105         // Set time stamp
2106         FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2107                                         &aModifiedDate, &aModifiedTime );
2108         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2109     }
2110 
2111 }
2112 
2113 void SvxAutoCorrectLanguageLists::SaveExceptList_Imp(
2114                             const SvStringsISortDtor& rLst,
2115                             const char* pStrmName,
2116                             tools::SvRef<SotStorage> const &rStg,
2117                             bool bConvert )
2118 {
2119     if( !rStg.is() )
2120         return;
2121 
2122     OUString sStrmName( pStrmName, strlen(pStrmName), RTL_TEXTENCODING_MS_1252 );
2123     if( rLst.empty() )
2124     {
2125         rStg->Remove( sStrmName );
2126         rStg->Commit();
2127     }
2128     else
2129     {
2130         tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2131                 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2132         if( xStrm.is() )
2133         {
2134             xStrm->SetSize( 0 );
2135             xStrm->SetBufferSize( 8192 );
2136             xStrm->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2137 
2138 
2139             uno::Reference< uno::XComponentContext > xContext =
2140                 comphelper::getProcessComponentContext();
2141 
2142             uno::Reference < xml::sax::XWriter > xWriter  = xml::sax::Writer::create(xContext);
2143             uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm );
2144             xWriter->setOutputStream(xOut);
2145 
2146             uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW);
2147             rtl::Reference< SvXMLExceptionListExport > xExp( new SvXMLExceptionListExport( xContext, rLst, sStrmName, xHandler ) );
2148 
2149             xExp->exportDoc( XML_BLOCK_LIST );
2150 
2151             xStrm->Commit();
2152             if( xStrm->GetError() == ERRCODE_NONE )
2153             {
2154                 xStrm.clear();
2155                 if (!bConvert)
2156                 {
2157                     rStg->Commit();
2158                     if( ERRCODE_NONE != rStg->GetError() )
2159                     {
2160                         rStg->Remove( sStrmName );
2161                         rStg->Commit();
2162                     }
2163                 }
2164             }
2165         }
2166     }
2167 }
2168 
2169 SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList()
2170 {
2171     if( pAutocorr_List )
2172         pAutocorr_List->DeleteAndDestroyAll();
2173     else
2174         pAutocorr_List.reset( new SvxAutocorrWordList() );
2175 
2176     try
2177     {
2178         uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ );
2179         uno::Reference < io::XStream > xStrm = xStg->openStreamElement( pXMLImplAutocorr_ListStr, embed::ElementModes::READ );
2180         uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext();
2181 
2182         xml::sax::InputSource aParserInput;
2183         aParserInput.sSystemId = pXMLImplAutocorr_ListStr;
2184         aParserInput.aInputStream = xStrm->getInputStream();
2185 
2186         // get parser
2187         uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext);
2188         SAL_INFO("editeng", "AutoCorrect Import" );
2189         uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List.get(), rAutoCorrect, xStg );
2190         uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2191 
2192         // connect parser and filter
2193         xParser->setFastDocumentHandler( xFilter );
2194         xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE );
2195         xParser->setTokenHandler(xTokenHandler);
2196 
2197         // parse
2198         xParser->parseStream( aParserInput );
2199     }
2200     catch ( const uno::Exception& )
2201     {
2202         TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile);
2203     }
2204 
2205     // Set time stamp
2206     FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2207                                     &aModifiedDate, &aModifiedTime );
2208     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2209 
2210     return pAutocorr_List.get();
2211 }
2212 
2213 const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList()
2214 {
2215     if( !( ACFlags::ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() )
2216     {
2217         LoadAutocorrWordList();
2218         if( !pAutocorr_List )
2219         {
2220             OSL_ENSURE( false, "No valid list" );
2221             pAutocorr_List.reset( new SvxAutocorrWordList() );
2222         }
2223         nFlags |= ACFlags::ChgWordLstLoad;
2224     }
2225     return pAutocorr_List.get();
2226 }
2227 
2228 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList()
2229 {
2230     if( !( ACFlags::CplSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2231     {
2232         LoadCplSttExceptList();
2233         if( !pCplStt_ExcptLst )
2234         {
2235             OSL_ENSURE( false, "No valid list" );
2236             pCplStt_ExcptLst.reset( new SvStringsISortDtor );
2237         }
2238         nFlags |= ACFlags::CplSttLstLoad;
2239     }
2240     return pCplStt_ExcptLst.get();
2241 }
2242 
2243 bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew)
2244 {
2245     bool bRet = false;
2246     if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second )
2247     {
2248         MakeUserStorage_Impl();
2249         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2250 
2251         SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2252 
2253         xStg = nullptr;
2254         // Set time stamp
2255         FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2256                                             &aModifiedDate, &aModifiedTime );
2257         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2258         bRet = true;
2259     }
2260     return bRet;
2261 }
2262 
2263 bool SvxAutoCorrectLanguageLists::AddToWrdSttExceptList(const OUString& rNew)
2264 {
2265     bool bRet = false;
2266     SvStringsISortDtor* pExceptList = LoadWrdSttExceptList();
2267     if( !rNew.isEmpty() && pExceptList && pExceptList->insert( rNew ).second )
2268     {
2269         MakeUserStorage_Impl();
2270         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2271 
2272         SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2273 
2274         xStg = nullptr;
2275         // Set time stamp
2276         FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2277                                             &aModifiedDate, &aModifiedTime );
2278         aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2279         bRet = true;
2280     }
2281     return bRet;
2282 }
2283 
2284 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList()
2285 {
2286     try
2287     {
2288         tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2289         OUString sTemp ( pXMLImplCplStt_ExcptLstStr );
2290         if( xStg.is() && xStg->IsContained( sTemp ) )
2291             LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2292     }
2293     catch (const css::ucb::ContentCreationException&)
2294     {
2295     }
2296     return pCplStt_ExcptLst.get();
2297 }
2298 
2299 void SvxAutoCorrectLanguageLists::SaveCplSttExceptList()
2300 {
2301     MakeUserStorage_Impl();
2302     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2303 
2304     SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2305 
2306     xStg = nullptr;
2307 
2308     // Set time stamp
2309     FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2310                                             &aModifiedDate, &aModifiedTime );
2311     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2312 }
2313 
2314 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWrdSttExceptList()
2315 {
2316     try
2317     {
2318         tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2319         OUString sTemp ( pXMLImplWrdStt_ExcptLstStr );
2320         if( xStg.is() && xStg->IsContained( sTemp ) )
2321             LoadXMLExceptList_Imp( pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2322     }
2323     catch (const css::ucb::ContentCreationException &)
2324     {
2325         TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWrdSttExceptList");
2326     }
2327     return pWrdStt_ExcptLst.get();
2328 }
2329 
2330 void SvxAutoCorrectLanguageLists::SaveWrdSttExceptList()
2331 {
2332     MakeUserStorage_Impl();
2333     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2334 
2335     SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg );
2336 
2337     xStg = nullptr;
2338     // Set time stamp
2339     FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2340                                             &aModifiedDate, &aModifiedTime );
2341     aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2342 }
2343 
2344 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWrdSttExceptList()
2345 {
2346     if( !( ACFlags::WrdSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2347     {
2348         LoadWrdSttExceptList();
2349         if( !pWrdStt_ExcptLst )
2350         {
2351             OSL_ENSURE( false, "No valid list" );
2352             pWrdStt_ExcptLst.reset( new SvStringsISortDtor );
2353         }
2354         nFlags |= ACFlags::WrdSttLstLoad;
2355     }
2356     return pWrdStt_ExcptLst.get();
2357 }
2358 
2359 void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName )
2360 {
2361     if( sShareAutoCorrFile != sUserAutoCorrFile )
2362     {
2363         tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2364         if( xStg.is() && ERRCODE_NONE == xStg->GetError() &&
2365             xStg->IsStream( rName ) )
2366         {
2367             xStg->Remove( rName );
2368             xStg->Commit();
2369 
2370             xStg = nullptr;
2371         }
2372     }
2373 }
2374 
2375 void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl()
2376 {
2377     // The conversion needs to happen if the file is already in the user
2378     // directory and is in the old format. Additionally it needs to
2379     // happen when the file is being copied from share to user.
2380 
2381     bool bError = false, bConvert = false, bCopy = false;
2382     INetURLObject aDest;
2383     INetURLObject aSource;
2384 
2385     if (sUserAutoCorrFile != sShareAutoCorrFile )
2386     {
2387         aSource = INetURLObject ( sShareAutoCorrFile );
2388         aDest = INetURLObject ( sUserAutoCorrFile );
2389         if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) )
2390         {
2391             aDest.SetExtension ( "bak" );
2392             bConvert = true;
2393         }
2394         bCopy = true;
2395     }
2396     else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) )
2397     {
2398         aSource = INetURLObject ( sUserAutoCorrFile );
2399         aDest = INetURLObject ( sUserAutoCorrFile );
2400         aDest.SetExtension ( "bak" );
2401         bCopy = bConvert = true;
2402     }
2403     if (bCopy)
2404     {
2405         try
2406         {
2407             OUString sMain(aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ));
2408             sal_Int32 nSlashPos = sMain.lastIndexOf('/');
2409             sMain = sMain.copy(0, nSlashPos);
2410             ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2411             TransferInfo aInfo;
2412             aInfo.NameClash = NameClash::OVERWRITE;
2413             aInfo.NewTitle = aDest.GetLastName();
2414             aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DecodeMechanism::ToIUri );
2415             aInfo.MoveData  = false;
2416             aNewContent.executeCommand( "transfer", Any(aInfo));
2417         }
2418         catch (...)
2419         {
2420             bError = true;
2421         }
2422     }
2423     if (bConvert && !bError)
2424     {
2425         tools::SvRef<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), StreamMode::READ );
2426         tools::SvRef<SotStorage> xDstStg = new SotStorage( sUserAutoCorrFile, StreamMode::WRITE );
2427 
2428         if( xSrcStg.is() && xDstStg.is() )
2429         {
2430             OUString sXMLWord     ( pXMLImplWrdStt_ExcptLstStr );
2431             OUString sXMLSentence ( pXMLImplCplStt_ExcptLstStr );
2432             std::unique_ptr<SvStringsISortDtor> pTmpWordList;
2433 
2434             if (xSrcStg->IsContained( sXMLWord ) )
2435                 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xSrcStg );
2436 
2437             if (pTmpWordList)
2438             {
2439                 SaveExceptList_Imp( *pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xDstStg, true );
2440                 pTmpWordList.reset();
2441             }
2442 
2443 
2444             if (xSrcStg->IsContained( sXMLSentence ) )
2445                 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg );
2446 
2447             if (pTmpWordList)
2448             {
2449                 SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true );
2450                 pTmpWordList->clear();
2451             }
2452 
2453             GetAutocorrWordList();
2454             MakeBlocklist_Imp( *xDstStg );
2455             sShareAutoCorrFile = sUserAutoCorrFile;
2456             xDstStg = nullptr;
2457             try
2458             {
2459                 ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2460                 aContent.executeCommand ( "delete", makeAny ( true ) );
2461             }
2462             catch (...)
2463             {
2464             }
2465         }
2466     }
2467     else if( bCopy && !bError )
2468         sShareAutoCorrFile = sUserAutoCorrFile;
2469 }
2470 
2471 bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg )
2472 {
2473     bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty();
2474     if( !bRemove )
2475     {
2476         tools::SvRef<SotStorageStream> refList = rStg.OpenSotStream( pXMLImplAutocorr_ListStr,
2477                     ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2478         if( refList.is() )
2479         {
2480             refList->SetSize( 0 );
2481             refList->SetBufferSize( 8192 );
2482             refList->SetProperty( "MediaType", Any(OUString( "text/xml" )) );
2483 
2484             uno::Reference< uno::XComponentContext > xContext =
2485                 comphelper::getProcessComponentContext();
2486 
2487             uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
2488             uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList );
2489             xWriter->setOutputStream(xOut);
2490 
2491             rtl::Reference< SvXMLAutoCorrectExport > xExp( new SvXMLAutoCorrectExport( xContext, pAutocorr_List.get(), pXMLImplAutocorr_ListStr, xWriter ) );
2492 
2493             xExp->exportDoc( XML_BLOCK_LIST );
2494 
2495             refList->Commit();
2496             bRet = ERRCODE_NONE == refList->GetError();
2497             if( bRet )
2498             {
2499                 refList.clear();
2500                 rStg.Commit();
2501                 if( ERRCODE_NONE != rStg.GetError() )
2502                 {
2503                     bRemove = true;
2504                     bRet = false;
2505                 }
2506             }
2507         }
2508         else
2509             bRet = false;
2510     }
2511 
2512     if( bRemove )
2513     {
2514         rStg.Remove( pXMLImplAutocorr_ListStr );
2515         rStg.Commit();
2516     }
2517 
2518     return bRet;
2519 }
2520 
2521 bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries )
2522 {
2523     // First get the current list!
2524     GetAutocorrWordList();
2525 
2526     MakeUserStorage_Impl();
2527     tools::SvRef<SotStorage> xStorage = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2528 
2529     bool bRet = xStorage.is() && ERRCODE_NONE == xStorage->GetError();
2530 
2531     if( bRet )
2532     {
2533         for (SvxAutocorrWord & aWordToDelete : aDeleteEntries)
2534         {
2535             std::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete );
2536             if( xFoundEntry )
2537             {
2538                 if( !xFoundEntry->IsTextOnly() )
2539                 {
2540                     OUString aName( aWordToDelete.GetShort() );
2541                     if (xStorage->IsOLEStorage())
2542                         aName = EncryptBlockName_Imp(aName);
2543                     else
2544                         GeneratePackageName ( aWordToDelete.GetShort(), aName );
2545 
2546                     if( xStorage->IsContained( aName ) )
2547                     {
2548                         xStorage->Remove( aName );
2549                         bRet = xStorage->Commit();
2550                     }
2551                 }
2552             }
2553         }
2554 
2555         for (const SvxAutocorrWord & aNewEntrie : aNewEntries)
2556         {
2557             SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true );
2558             std::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd );
2559             if( xRemoved )
2560             {
2561                 if( !xRemoved->IsTextOnly() )
2562                 {
2563                     // Still have to remove the Storage
2564                     OUString sStorageName( aWordToAdd.GetShort() );
2565                     if (xStorage->IsOLEStorage())
2566                         sStorageName = EncryptBlockName_Imp(sStorageName);
2567                     else
2568                         GeneratePackageName ( aWordToAdd.GetShort(), sStorageName);
2569 
2570                     if( xStorage->IsContained( sStorageName ) )
2571                         xStorage->Remove( sStorageName );
2572                 }
2573             }
2574             bRet = pAutocorr_List->Insert( std::move(aWordToAdd) );
2575 
2576             if ( !bRet )
2577             {
2578                 break;
2579             }
2580         }
2581 
2582         if ( bRet )
2583         {
2584             bRet = MakeBlocklist_Imp( *xStorage );
2585         }
2586     }
2587     return bRet;
2588 }
2589 
2590 bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong )
2591 {
2592     // First get the current list!
2593     GetAutocorrWordList();
2594 
2595     MakeUserStorage_Impl();
2596     tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2597 
2598     bool bRet = xStg.is() && ERRCODE_NONE == xStg->GetError();
2599 
2600     // Update the word list
2601     if( bRet )
2602     {
2603         SvxAutocorrWord aNew(rShort, rLong, true );
2604         std::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew );
2605         if( xRemove )
2606         {
2607             if( !xRemove->IsTextOnly() )
2608             {
2609                 // Still have to remove the Storage
2610                 OUString sStgNm( rShort );
2611                 if (xStg->IsOLEStorage())
2612                     sStgNm = EncryptBlockName_Imp(sStgNm);
2613                 else
2614                     GeneratePackageName ( rShort, sStgNm);
2615 
2616                 if( xStg->IsContained( sStgNm ) )
2617                     xStg->Remove( sStgNm );
2618             }
2619         }
2620 
2621         if( pAutocorr_List->Insert( std::move(aNew) ) )
2622         {
2623             bRet = MakeBlocklist_Imp( *xStg );
2624             xStg = nullptr;
2625         }
2626         else
2627         {
2628             bRet = false;
2629         }
2630     }
2631     return bRet;
2632 }
2633 
2634 void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort,
2635                                                SfxObjectShell& rShell )
2636 {
2637     // First get the current list!
2638     GetAutocorrWordList();
2639 
2640     MakeUserStorage_Impl();
2641 
2642     OUString sLong;
2643     try
2644     {
2645         uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE );
2646         bool bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong );
2647         xStg = nullptr;
2648 
2649         // Update the word list
2650         if( bRet )
2651         {
2652             if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) )
2653             {
2654                 tools::SvRef<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2655                 MakeBlocklist_Imp( *xStor );
2656             }
2657         }
2658     }
2659     catch ( const uno::Exception& )
2660     {
2661     }
2662 }
2663 
2664 // Keep the list sorted ...
2665 struct SvxAutocorrWordList::CompareSvxAutocorrWordList
2666 {
2667     bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const
2668     {
2669         CollatorWrapper& rCmp = ::GetCollatorWrapper();
2670         return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0;
2671     }
2672 };
2673 
2674 namespace {
2675 
2676 typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType;
2677 
2678 }
2679 
2680 struct SvxAutocorrWordList::Impl
2681 {
2682 
2683     // only one of these contains the data
2684     // maSortedVector is manually sorted so we can optimise data movement
2685     mutable AutocorrWordSetType maSortedVector;
2686     mutable AutocorrWordHashType maHash; // key is 'Short'
2687 
2688     void DeleteAndDestroyAll()
2689     {
2690         maHash.clear();
2691         maSortedVector.clear();
2692     }
2693 };
2694 
2695 SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {}
2696 
2697 SvxAutocorrWordList::~SvxAutocorrWordList()
2698 {
2699 }
2700 
2701 void SvxAutocorrWordList::DeleteAndDestroyAll()
2702 {
2703     mpImpl->DeleteAndDestroyAll();
2704 }
2705 
2706 // returns true if inserted
2707 const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const
2708 {
2709     if ( mpImpl->maSortedVector.empty() ) // use the hash
2710     {
2711         OUString aShort = aWord.GetShort();
2712         auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) );
2713         if (inserted)
2714             return &(it->second);
2715         return nullptr;
2716     }
2717     else
2718     {
2719         auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList());
2720         CollatorWrapper& rCmp = ::GetCollatorWrapper();
2721         if (it == mpImpl->maSortedVector.end() || rCmp.compareString( aWord.GetShort(), it->GetShort() ) != 0)
2722         {
2723             it = mpImpl->maSortedVector.insert(it, std::move(aWord));
2724             return &*it;
2725         }
2726         return nullptr;
2727     }
2728 }
2729 
2730 void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt)
2731 {
2732     (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt ));
2733 }
2734 
2735 bool SvxAutocorrWordList::empty() const
2736 {
2737     return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty();
2738 }
2739 
2740 std::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord *pWord)
2741 {
2742 
2743     if ( mpImpl->maSortedVector.empty() ) // use the hash
2744     {
2745         AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() );
2746         if( it != mpImpl->maHash.end() )
2747         {
2748             SvxAutocorrWord pMatch = std::move(it->second);
2749             mpImpl->maHash.erase (it);
2750             return pMatch;
2751         }
2752     }
2753     else
2754     {
2755         auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList());
2756         if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it))
2757         {
2758             SvxAutocorrWord pMatch = std::move(*it);
2759             mpImpl->maSortedVector.erase (it);
2760             return pMatch;
2761         }
2762     }
2763     return std::optional<SvxAutocorrWord>();
2764 }
2765 
2766 // return the sorted contents - defer sorting until we have to.
2767 const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const
2768 {
2769     // convert from hash to set permanently
2770     if ( mpImpl->maSortedVector.empty() )
2771     {
2772         std::vector<SvxAutocorrWord> tmp;
2773         tmp.reserve(mpImpl->maHash.size());
2774         for (auto & rPair : mpImpl->maHash)
2775             tmp.emplace_back(std::move(rPair.second));
2776         mpImpl->maHash.clear();
2777         // sort twice - this gets the list into mostly-sorted order, which
2778         // reduces the number of times we need to invoke the expensive ICU collate fn.
2779         std::sort(tmp.begin(), tmp.end(),
2780             [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs )
2781             {
2782                 return lhs.GetShort() < rhs.GetShort();
2783             });
2784         // This beast has some O(N log(N)) in a terribly slow ICU collate fn.
2785         // stable_sort is twice as fast as sort in this situation because it does
2786         // fewer comparison operations.
2787         std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList());
2788         mpImpl->maSortedVector = std::move(tmp);
2789     }
2790     return mpImpl->maSortedVector;
2791 }
2792 
2793 const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd,
2794                                       const OUString &rTxt,
2795                                       sal_Int32 &rStt,
2796                                       sal_Int32 nEndPos) const
2797 {
2798     const OUString& rChk = pFnd->GetShort();
2799 
2800     sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern?
2801     sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern?
2802     sal_Int32 nSttWdPos = nEndPos;
2803 
2804     // direct replacement of keywords surrounded by colons (for example, ":name:")
2805     bool bColonNameColon = rTxt.getLength() > nEndPos &&
2806         rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":");
2807     if ( nEndPos + (bColonNameColon ? 1 : 0) >= rChk.getLength() - left_wildcard - right_wildcard )
2808     {
2809 
2810         bool bWasWordDelim = false;
2811         sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard;
2812         if (bColonNameColon)
2813             nCalcStt++;
2814         if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon ||
2815               ( nCalcStt < rStt &&
2816                 IsWordDelim( rTxt[ nCalcStt - 1 ] ))) )
2817         {
2818             TransliterationWrapper& rCmp = GetIgnoreTranslWrapper();
2819             OUString sWord = rTxt.copy(nCalcStt, rChk.getLength() - left_wildcard);
2820             if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) ))
2821             {
2822                 rStt = nCalcStt;
2823                 if (!left_wildcard)
2824                 {
2825                     // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14
2826                     if (rTxt.getLength() > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1)
2827                         return nullptr;
2828                     return pFnd;
2829                 }
2830                 // get the first word delimiter position before the matching ".*word" pattern
2831                 while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ])))
2832                     ;
2833                 if (bWasWordDelim) rStt++;
2834                 OUString left_pattern = rTxt.copy(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard);
2835                 // avoid double spaces before simple "word" replacement
2836                 left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().copy(1) : pFnd->GetLong();
2837                 if( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(rTxt.copy(rStt, nEndPos - rStt), left_pattern) ) )
2838                     return pNew;
2839             }
2840         } else
2841         // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support
2842         if ( right_wildcard )
2843         {
2844 
2845             OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) );
2846             // Get the last word delimiter position
2847             bool not_suffix;
2848 
2849             while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ])))
2850                 ;
2851             // search the first occurrence (with a left word delimitation, if needed)
2852             sal_Int32 nFndPos = -1;
2853             do {
2854                 nFndPos = rTxt.indexOf( sTmp, nFndPos + 1);
2855                 if (nFndPos == -1)
2856                     break;
2857                 not_suffix = bWasWordDelim && (nSttWdPos >= (nFndPos + sTmp.getLength()));
2858             } while ( (!left_wildcard && nFndPos && !IsWordDelim( rTxt[ nFndPos - 1 ])) || not_suffix );
2859 
2860             if ( nFndPos != -1 )
2861             {
2862                 sal_Int32 extra_repl = nFndPos + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:"
2863 
2864                 if ( left_wildcard )
2865                 {
2866                     // get the first word delimiter position before the matching ".*word.*" pattern
2867                     while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ])))
2868                         ;
2869                     if (bWasWordDelim) nFndPos++;
2870                 }
2871                 if (nEndPos + extra_repl <= nFndPos)
2872                 {
2873                     return nullptr;
2874                 }
2875                 // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations"
2876                 OUString aShort = rTxt.copy(nFndPos, nEndPos - nFndPos + extra_repl);
2877 
2878                 OUString aLong;
2879                 rStt = nFndPos;
2880                 if ( !left_wildcard )
2881                 {
2882                     sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength();
2883                     aLong = pFnd->GetLong() + (siz > 0 ? rTxt.copy(nFndPos + sTmp.getLength(), siz) : "");
2884                 } else {
2885                     OUStringBuffer buf;
2886                     do {
2887                         nSttWdPos = rTxt.indexOf( sTmp, nFndPos);
2888                         if (nSttWdPos != -1)
2889                         {
2890                             sal_Int32 nTmp(nFndPos);
2891                             while (nTmp < nSttWdPos && !IsWordDelim(rTxt[nTmp]))
2892                                 nTmp++;
2893                             if (nTmp < nSttWdPos)
2894                                 break; // word delimiter found
2895                             buf.append(std::u16string_view(rTxt).substr(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong());
2896                             nFndPos = nSttWdPos + sTmp.getLength();
2897                         }
2898                     } while (nSttWdPos != -1);
2899                     if (nEndPos - nFndPos > extra_repl)
2900                         buf.append(std::u16string_view(rTxt).substr(nFndPos, nEndPos - nFndPos));
2901                     aLong = buf.makeStringAndClear();
2902                 }
2903                 if ( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(aShort, aLong) ) )
2904                 {
2905                     if ( (rTxt.getLength() > nEndPos && IsWordDelim(rTxt[nEndPos])) || rTxt.getLength() == nEndPos )
2906                         return pNew;
2907                 }
2908             }
2909         }
2910     }
2911     return nullptr;
2912 }
2913 
2914 const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(const OUString& rTxt, sal_Int32& rStt,
2915                                                               sal_Int32 nEndPos) const
2916 {
2917     for (auto const& elem : mpImpl->maHash)
2918     {
2919         if( const SvxAutocorrWord *pTmp = WordMatches( &elem.second, rTxt, rStt, nEndPos ) )
2920             return pTmp;
2921     }
2922 
2923     for (auto const& elem : mpImpl->maSortedVector)
2924     {
2925         if( const SvxAutocorrWord *pTmp = WordMatches( &elem, rTxt, rStt, nEndPos ) )
2926             return pTmp;
2927     }
2928     return nullptr;
2929 }
2930 
2931 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
2932