1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20 #include <memory>
21 #include <utility>
22 #include <algorithm>
23 #include <string_view>
24 #include <sal/config.h>
25
26 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
27 #include <com/sun/star/embed/XStorage.hpp>
28 #include <com/sun/star/io/IOException.hpp>
29 #include <com/sun/star/io/XStream.hpp>
30 #include <tools/urlobj.hxx>
31 #include <i18nlangtag/mslangid.hxx>
32 #include <i18nutil/transliteration.hxx>
33 #include <sal/log.hxx>
34 #include <osl/diagnose.h>
35 #include <vcl/svapp.hxx>
36 #include <vcl/settings.hxx>
37 #include <svl/fstathelper.hxx>
38 #include <svl/urihelper.hxx>
39 #include <unotools/charclass.hxx>
40 #include <com/sun/star/i18n/UnicodeType.hpp>
41 #include <unotools/collatorwrapper.hxx>
42 #include <com/sun/star/i18n/UnicodeScript.hpp>
43 #include <com/sun/star/i18n/OrdinalSuffix.hpp>
44 #include <unotools/localedatawrapper.hxx>
45 #include <unotools/transliterationwrapper.hxx>
46 #include <comphelper/processfactory.hxx>
47 #include <comphelper/sequence.hxx>
48 #include <comphelper/storagehelper.hxx>
49 #include <o3tl/string_view.hxx>
50 #include <editeng/editids.hrc>
51 #include <sot/storage.hxx>
52 #include <editeng/udlnitem.hxx>
53 #include <editeng/wghtitem.hxx>
54 #include <editeng/postitem.hxx>
55 #include <editeng/crossedoutitem.hxx>
56 #include <editeng/escapementitem.hxx>
57 #include <editeng/svxacorr.hxx>
58 #include <editeng/unolingu.hxx>
59 #include <vcl/window.hxx>
60 #include <com/sun/star/xml/sax/InputSource.hpp>
61 #include <com/sun/star/xml/sax/FastParser.hpp>
62 #include <com/sun/star/xml/sax/Writer.hpp>
63 #include <com/sun/star/xml/sax/SAXParseException.hpp>
64 #include <unotools/streamwrap.hxx>
65 #include "SvXMLAutoCorrectImport.hxx"
66 #include "SvXMLAutoCorrectExport.hxx"
67 #include "SvXMLAutoCorrectTokenHandler.hxx"
68 #include <ucbhelper/content.hxx>
69 #include <com/sun/star/ucb/ContentCreationException.hpp>
70 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
71 #include <com/sun/star/ucb/TransferInfo.hpp>
72 #include <com/sun/star/ucb/NameClash.hpp>
73 #include <comphelper/diagnose_ex.hxx>
74 #include <xmloff/xmltoken.hxx>
75 #include <unordered_map>
76 #include <rtl/character.hxx>
77
78 using namespace ::com::sun::star::ucb;
79 using namespace ::com::sun::star::uno;
80 using namespace ::com::sun::star::xml::sax;
81 using namespace ::com::sun::star;
82 using namespace ::xmloff::token;
83 using namespace ::utl;
84
85 namespace {
86
87 enum class Flags {
88 NONE = 0x00,
89 FullStop = 0x01,
90 ExclamationMark = 0x02,
91 QuestionMark = 0x04,
92 };
93
94 }
95
96 namespace o3tl {
97 template<> struct typed_flags<Flags> : is_typed_flags<Flags, 0x07> {};
98 }
99 const sal_Unicode cNonBreakingSpace = 0xA0; // UNICODE code for no break space
100
101 constexpr OUString pXMLImplWordStart_ExcptLstStr = u"WordExceptList.xml"_ustr;
102 constexpr OUString pXMLImplCplStt_ExcptLstStr = u"SentenceExceptList.xml"_ustr;
103 constexpr OUString pXMLImplAutocorr_ListStr = u"DocumentList.xml"_ustr;
104
105 // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks
106 // Curious, why these \u0083\u0084\u0089\u0091\u0092\u0093\u0094 are handled as "begin characters"?
107 constexpr std::u16string_view
108 /* also at these beginnings - Brackets and all kinds of begin characters */
109 sImplSttSkipChars = u"\"'([{\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094",
110 /* also at these ends - Brackets and all kinds of begin characters */
111 sImplEndSkipChars = u"\"')]}\u2018\u2019\u201a\u201b\u201c\u201d\u201e\u201f\u0083\u0084\u0089\u0091\u0092\u0093\u0094";
112
113 static OUString EncryptBlockName_Imp(std::u16string_view rName);
114
NonFieldWordDelim(const sal_Unicode c)115 static bool NonFieldWordDelim( const sal_Unicode c )
116 {
117 return ' ' == c || '\t' == c || 0x0a == c ||
118 cNonBreakingSpace == c || 0x2011 == c;
119 }
120
IsWordDelim(const sal_Unicode c)121 static bool IsWordDelim( const sal_Unicode c )
122 {
123 return c == 0x1 || NonFieldWordDelim(c);
124 }
125
126
IsLowerLetter(sal_Int32 nCharType)127 static bool IsLowerLetter( sal_Int32 nCharType )
128 {
129 return CharClass::isLetterType( nCharType ) &&
130 ( css::i18n::KCharacterType::LOWER & nCharType);
131 }
132
IsUpperLetter(sal_Int32 nCharType)133 static bool IsUpperLetter( sal_Int32 nCharType )
134 {
135 return CharClass::isLetterType( nCharType ) &&
136 ( css::i18n::KCharacterType::UPPER & nCharType);
137 }
138
lcl_IsUnsupportedUnicodeChar(CharClass const & rCC,const OUString & rTxt,sal_Int32 nStt,sal_Int32 nEnd)139 static bool lcl_IsUnsupportedUnicodeChar( CharClass const & rCC, const OUString& rTxt,
140 sal_Int32 nStt, sal_Int32 nEnd )
141 {
142 for( ; nStt < nEnd; ++nStt )
143 {
144 css::i18n::UnicodeScript nScript = rCC.getScript( rTxt, nStt );
145 switch( nScript )
146 {
147 case css::i18n::UnicodeScript_kCJKRadicalsSupplement:
148 case css::i18n::UnicodeScript_kHangulJamo:
149 case css::i18n::UnicodeScript_kCJKSymbolPunctuation:
150 case css::i18n::UnicodeScript_kHiragana:
151 case css::i18n::UnicodeScript_kKatakana:
152 case css::i18n::UnicodeScript_kHangulCompatibilityJamo:
153 case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth:
154 case css::i18n::UnicodeScript_kCJKCompatibility:
155 case css::i18n::UnicodeScript_kCJKUnifiedIdeographsExtensionA:
156 case css::i18n::UnicodeScript_kCJKUnifiedIdeograph:
157 case css::i18n::UnicodeScript_kHangulSyllable:
158 case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph:
159 case css::i18n::UnicodeScript_kHalfwidthFullwidthForm:
160 return true;
161 default: ; //do nothing
162 }
163 }
164 return false;
165 }
166
lcl_IsSymbolChar(CharClass const & rCC,const OUString & rTxt,sal_Int32 nStt,sal_Int32 nEnd)167 static bool lcl_IsSymbolChar( CharClass const & rCC, const OUString& rTxt,
168 sal_Int32 nStt, sal_Int32 nEnd )
169 {
170 for( ; nStt < nEnd; ++nStt )
171 {
172 if( css::i18n::UnicodeType::PRIVATE_USE == rCC.getType( rTxt, nStt ))
173 return true;
174 }
175 return false;
176 }
177
lcl_IsInArr(std::u16string_view arr,const sal_uInt32 c)178 static bool lcl_IsInArr(std::u16string_view arr, const sal_uInt32 c)
179 {
180 return std::any_of(arr.begin(), arr.end(), [c](const auto c1) { return c1 == c; });
181 }
182
~SvxAutoCorrDoc()183 SvxAutoCorrDoc::~SvxAutoCorrDoc()
184 {
185 }
186
187 // Called by the functions:
188 // - FnCapitalStartWord
189 // - FnCapitalStartSentence
190 // after the exchange of characters. Then the words, if necessary, can be inserted
191 // into the exception list.
SaveCpltSttWord(ACFlags,sal_Int32,const OUString &,sal_Unicode)192 void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&,
193 sal_Unicode )
194 {
195 }
196
GetLanguage(sal_Int32) const197 LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32 ) const
198 {
199 return LANGUAGE_SYSTEM;
200 }
201
GetAppLang()202 static const LanguageTag& GetAppLang()
203 {
204 return Application::GetSettings().GetLanguageTag();
205 }
206
207 /// Never use an unresolved LANGUAGE_SYSTEM.
GetDocLanguage(const SvxAutoCorrDoc & rDoc,sal_Int32 nPos)208 static LanguageType GetDocLanguage( const SvxAutoCorrDoc& rDoc, sal_Int32 nPos )
209 {
210 LanguageType eLang = rDoc.GetLanguage( nPos );
211 if (eLang == LANGUAGE_SYSTEM)
212 eLang = GetAppLang().getLanguageType(); // the current work locale
213 return eLang;
214 }
215
GetLocaleDataWrapper(LanguageType nLang)216 static LocaleDataWrapper& GetLocaleDataWrapper( LanguageType nLang )
217 {
218 static std::unique_ptr<LocaleDataWrapper> xLclDtWrp;
219 LanguageTag aLcl( nLang );
220 if (!xLclDtWrp || xLclDtWrp->getLoadedLanguageTag() != aLcl)
221 xLclDtWrp.reset(new LocaleDataWrapper(std::move(aLcl)));
222 return *xLclDtWrp;
223 }
GetIgnoreTranslWrapper()224 static TransliterationWrapper& GetIgnoreTranslWrapper()
225 {
226 static int bIsInit = 0;
227 static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(),
228 TransliterationFlags::IGNORE_KANA |
229 TransliterationFlags::IGNORE_WIDTH );
230 if( !bIsInit )
231 {
232 aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() );
233 bIsInit = 1;
234 }
235 return aWrp;
236 }
GetCollatorWrapper()237 static CollatorWrapper& GetCollatorWrapper()
238 {
239 static CollatorWrapper aCollWrp = []()
240 {
241 CollatorWrapper tmp( ::comphelper::getProcessComponentContext() );
242 tmp.loadDefaultCollator( GetAppLang().getLocale(), 0 );
243 return tmp;
244 }();
245 return aCollWrp;
246 }
247
IsAutoCorrectChar(sal_Unicode cChar)248 bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar )
249 {
250 return cChar == '\0' || cChar == '\t' || cChar == 0x0a ||
251 cChar == ' ' || cChar == '\'' || cChar == '\"' ||
252 cChar == '*' || cChar == '_' || cChar == '%' ||
253 cChar == '.' || cChar == ',' || cChar == ';' ||
254 cChar == ':' || cChar == '?' || cChar == '!' ||
255 cChar == '<' || cChar == '>' ||
256 cChar == '/' || cChar == '-';
257 }
258
259 namespace
260 {
IsCompoundWordDelimChar(sal_Unicode cChar)261 bool IsCompoundWordDelimChar(sal_Unicode cChar)
262 {
263 return cChar == '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar);
264 }
265 }
266
NeedsHardspaceAutocorr(sal_Unicode cChar)267 bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar )
268 {
269 return cChar == '%' || cChar == ';' || cChar == ':' || cChar == '?' || cChar == '!' ||
270 cChar == '/' /*case for the urls exception*/;
271 }
272
GetDefaultFlags()273 ACFlags SvxAutoCorrect::GetDefaultFlags()
274 {
275 ACFlags nRet = ACFlags::Autocorrect
276 | ACFlags::CapitalStartSentence
277 | ACFlags::CapitalStartWord
278 | ACFlags::ChgOrdinalNumber
279 | ACFlags::ChgToEnEmDash
280 | ACFlags::AddNonBrkSpace
281 | ACFlags::TransliterateRTL
282 | ACFlags::ChgAngleQuotes
283 | ACFlags::ChgWeightUnderl
284 | ACFlags::SetINetAttr
285 | ACFlags::SetDOIAttr
286 | ACFlags::ChgQuotes
287 | ACFlags::SaveWordCplSttLst
288 | ACFlags::SaveWordWordStartLst
289 | ACFlags::CorrectCapsLock;
290 LanguageType eLang = GetAppLang().getLanguageType();
291 if( eLang.anyOf(
292 LANGUAGE_ENGLISH,
293 LANGUAGE_ENGLISH_US,
294 LANGUAGE_ENGLISH_UK,
295 LANGUAGE_ENGLISH_AUS,
296 LANGUAGE_ENGLISH_CAN,
297 LANGUAGE_ENGLISH_NZ,
298 LANGUAGE_ENGLISH_EIRE,
299 LANGUAGE_ENGLISH_SAFRICA,
300 LANGUAGE_ENGLISH_JAMAICA,
301 LANGUAGE_ENGLISH_CARIBBEAN))
302 nRet &= ~ACFlags(ACFlags::ChgQuotes|ACFlags::ChgSglQuotes);
303 return nRet;
304 }
305
306 constexpr sal_Unicode cEmDash = 0x2014;
307 constexpr sal_Unicode cEnDash = 0x2013;
308 constexpr OUString sEmDash(u"\u2014"_ustr);
309 constexpr OUString sEnDash(u"\u2013"_ustr);
310 constexpr sal_Unicode cApostrophe = 0x2019;
311 constexpr sal_Unicode cLeftDoubleAngleQuote = 0xAB;
312 constexpr sal_Unicode cRightDoubleAngleQuote = 0xBB;
313 constexpr sal_Unicode cLeftSingleAngleQuote = 0x2039;
314 constexpr sal_Unicode cRightSingleAngleQuote = 0x203A;
315 // stop characters for searching preceding quotes
316 // (the first character is also the opening quote we are looking for)
317 const sal_Unicode aStopDoubleAngleQuoteStart[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,,
318 const sal_Unicode aStopDoubleAngleQuoteEnd[] = { cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0x201D, 0x201E, 0 }; // preceding >>
319 // preceding << for Romanian, handle also alternative primary closing quotation mark U+201C
320 const sal_Unicode aStopDoubleAngleQuoteEndRo[] = { cLeftDoubleAngleQuote, cRightDoubleAngleQuote, 0x201D, 0x201E, 0x201C, 0 };
321 const sal_Unicode aStopSingleQuoteEnd[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 };
322 const sal_Unicode aStopSingleQuoteEndRuUa[] = { 0x201E, 0x201C, cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0 };
323
SvxAutoCorrect(OUString aShareAutocorrFile,OUString aUserAutocorrFile)324 SvxAutoCorrect::SvxAutoCorrect( OUString aShareAutocorrFile,
325 OUString aUserAutocorrFile )
326 : sShareAutoCorrFile(std::move( aShareAutocorrFile ))
327 , sUserAutoCorrFile(std::move( aUserAutocorrFile ))
328 , eCharClassLang( LANGUAGE_DONTKNOW )
329 , nFlags(SvxAutoCorrect::GetDefaultFlags())
330 , cStartDQuote( 0 )
331 , cEndDQuote( 0 )
332 , cStartSQuote( 0 )
333 , cEndSQuote( 0 )
334 {
335 }
336
SvxAutoCorrect(const SvxAutoCorrect & rCpy)337 SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy )
338 : sShareAutoCorrFile( rCpy.sShareAutoCorrFile )
339 , sUserAutoCorrFile( rCpy.sUserAutoCorrFile )
340 , aSwFlags( rCpy.aSwFlags )
341 , eCharClassLang(rCpy.eCharClassLang)
342 , nFlags( rCpy.nFlags & ~ACFlags(ACFlags::ChgWordLstLoad|ACFlags::CplSttLstLoad|ACFlags::WordStartLstLoad))
343 , cStartDQuote( rCpy.cStartDQuote )
344 , cEndDQuote( rCpy.cEndDQuote )
345 , cStartSQuote( rCpy.cStartSQuote )
346 , cEndSQuote( rCpy.cEndSQuote )
347 {
348 }
349
350
~SvxAutoCorrect()351 SvxAutoCorrect::~SvxAutoCorrect()
352 {
353 }
354
GetCharClass_(LanguageType eLang)355 void SvxAutoCorrect::GetCharClass_( LanguageType eLang )
356 {
357 moCharClass.emplace( LanguageTag( eLang) );
358 eCharClassLang = eLang;
359 }
360
SetAutoCorrFlag(ACFlags nFlag,bool bOn)361 void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag, bool bOn )
362 {
363 ACFlags nOld = nFlags;
364 nFlags = bOn ? nFlags | nFlag
365 : nFlags & ~nFlag;
366
367 if( !bOn )
368 {
369 if( (nOld & ACFlags::CapitalStartSentence) != (nFlags & ACFlags::CapitalStartSentence) )
370 nFlags &= ~ACFlags::CplSttLstLoad;
371 if( (nOld & ACFlags::CapitalStartWord) != (nFlags & ACFlags::CapitalStartWord) )
372 nFlags &= ~ACFlags::WordStartLstLoad;
373 if( (nOld & ACFlags::Autocorrect) != (nFlags & ACFlags::Autocorrect) )
374 nFlags &= ~ACFlags::ChgWordLstLoad;
375 }
376 }
377
378
379 // Correct TWo INitial CApitals
FnCapitalStartWord(SvxAutoCorrDoc & rDoc,const OUString & rTxt,sal_Int32 nSttPos,sal_Int32 nEndPos,LanguageType eLang)380 void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
381 sal_Int32 nSttPos, sal_Int32 nEndPos,
382 LanguageType eLang )
383 {
384 CharClass& rCC = GetCharClass( eLang );
385
386 // Delete all non alphanumeric. Test the characters at the beginning/end of
387 // the word ( recognizes: "(min.", "/min.", and so on.)
388 for( ; nSttPos < nEndPos; ++nSttPos )
389 if( rCC.isLetterNumeric( rTxt, nSttPos ))
390 break;
391 for( ; nSttPos < nEndPos; --nEndPos )
392 if( rCC.isLetterNumeric( rTxt, nEndPos - 1 ))
393 break;
394
395 // Is the word a compounded word separated by delimiters?
396 // If so, keep track of all delimiters so each constituent
397 // word can be checked for two initial capital letters.
398 std::deque<sal_Int32> aDelimiters;
399
400 // Always check for two capitals at the beginning
401 // of the entire word, so start at nSttPos.
402 aDelimiters.push_back(nSttPos);
403
404 // Find all compound word delimiters
405 for (sal_Int32 n = nSttPos; n < nEndPos; ++n)
406 {
407 if (IsCompoundWordDelimChar(rTxt[ n ]))
408 {
409 aDelimiters.push_back( n + 1 ); // Get position of char after delimiter
410 }
411 }
412
413 // Decide where to put the terminating delimiter.
414 // If the last AutoCorrect char was a newline, then the AutoCorrect
415 // char will not be included in rTxt.
416 // If the last AutoCorrect char was not a newline, then the AutoCorrect
417 // character will be the last character in rTxt.
418 if (!IsCompoundWordDelimChar(rTxt[nEndPos-1]))
419 aDelimiters.push_back(nEndPos);
420
421 // Iterate through the word and all words that compose it.
422 // Two capital letters at the beginning of word?
423 for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI)
424 {
425 nSttPos = aDelimiters[nI];
426 nEndPos = aDelimiters[nI + 1];
427
428 if( nSttPos+2 < nEndPos &&
429 IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) &&
430 IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) &&
431 // Is the third character a lower case
432 IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) &&
433 // Do not replace special attributes
434 0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ])
435 {
436 // test if the word is in an exception list
437 OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 ));
438 if( !FindInWordStartExceptList(eLang, sWord) )
439 {
440 // Check that word isn't correctly spelt before correcting:
441 css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller =
442 LinguMgr::GetSpellChecker();
443 if( xSpeller->hasLanguage(static_cast<sal_uInt16>(eLang)) )
444 {
445 Sequence< css::beans::PropertyValue > aEmptySeq;
446 if (xSpeller->isValid(sWord, static_cast<sal_uInt16>(eLang), aEmptySeq))
447 {
448 return;
449 }
450 }
451 sal_Unicode cSave = rTxt[ nSttPos ];
452 OUString sChar = rCC.lowercase( OUString(cSave) );
453 if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ))
454 {
455 if( ACFlags::SaveWordWordStartLst & nFlags )
456 rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave );
457 }
458 }
459 }
460 }
461 }
462
463 // Format ordinal numbers suffixes (1st -> 1^st)
FnChgOrdinalNumber(SvxAutoCorrDoc & rDoc,const OUString & rTxt,sal_Int32 nSttPos,sal_Int32 nEndPos,LanguageType eLang)464 bool SvxAutoCorrect::FnChgOrdinalNumber(
465 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
466 sal_Int32 nSttPos, sal_Int32 nEndPos,
467 LanguageType eLang)
468 {
469 // 1st, 2nd, 3rd, 4 - 0th
470 // 201th or 201st
471 // 12th or 12nd
472 bool bChg = false;
473
474 // In some languages ordinal suffixes should never be
475 // changed to superscript. Let's break for those languages.
476 if (!eLang.anyOf(
477 LANGUAGE_CATALAN, // tdf#156792
478 LANGUAGE_CATALAN_VALENCIAN,
479 LANGUAGE_SWEDISH,
480 LANGUAGE_SWEDISH_FINLAND))
481 {
482 CharClass& rCC = GetCharClass(eLang);
483
484 for (; nSttPos < nEndPos; ++nSttPos)
485 if (!lcl_IsInArr(sImplSttSkipChars, rTxt[nSttPos]))
486 break;
487 for (; nSttPos < nEndPos; --nEndPos)
488 if (!lcl_IsInArr(sImplEndSkipChars, rTxt[nEndPos - 1]))
489 break;
490
491
492 // Get the last number in the string to check
493 sal_Int32 nNumEnd = nEndPos;
494 bool bFoundEnd = false;
495 bool isValidNumber = true;
496 sal_Int32 i = nEndPos;
497 while (i > nSttPos)
498 {
499 i--;
500 bool isDigit = rCC.isDigit(rTxt, i);
501 if (bFoundEnd)
502 isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i));
503
504 if (isDigit && !bFoundEnd)
505 {
506 bFoundEnd = true;
507 nNumEnd = i;
508 }
509 }
510
511 if (bFoundEnd && isValidNumber) {
512 sal_Int32 nNum = o3tl::toInt32(rTxt.subView(nSttPos, nNumEnd - nSttPos + 1));
513 std::u16string_view sEnd = rTxt.subView(nNumEnd + 1, nEndPos - nNumEnd - 1);
514
515 // Check if the characters after that number correspond to the ordinal suffix
516 uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix
517 = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext());
518
519 uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale());
520
521 // add extra suffixes for languages not handled by i18npool/ICU
522 if ( primary(eLang) == primary(LANGUAGE_PORTUGUESE) &&
523 ( nEndPos == nNumEnd + 3 || nEndPos == nNumEnd + 4 ) &&
524 ( sEnd[0] == 'a' || sEnd[0] == 'o' || sEnd[0] == 'r' ) )
525 {
526 auto aExtendedSuffixes = comphelper::sequenceToContainer< std::vector<OUString> >(aSuffixes);
527 aExtendedSuffixes.push_back(u"as"_ustr); // plural form of 'a'
528 aExtendedSuffixes.push_back(u"os"_ustr); // plural form of 'o'
529 aExtendedSuffixes.push_back(u"ra"_ustr); // alternative form of 'a'
530 aExtendedSuffixes.push_back(u"ro"_ustr); // alternative form of 'o'
531 aExtendedSuffixes.push_back(u"ras"_ustr); // alternative form of "as"
532 aExtendedSuffixes.push_back(u"ros"_ustr); // alternative form of "os"
533 aSuffixes = comphelper::containerToSequence(aExtendedSuffixes);
534 }
535
536 for (OUString const & sSuffix : aSuffixes)
537 {
538 if (sSuffix == sEnd)
539 {
540 // Check if the ordinal suffix has to be set as super script
541 if (rCC.isLetter(sSuffix))
542 {
543 sal_Int32 nNumberChanged = 0;
544 sal_Int32 nSuffixChanged = 0;
545 // exceptions for Portuguese
546 // add missing dot: 1a -> 1.ª
547 // and remove optional 'r': 1ro -> 1.º
548 if ( primary(eLang) == primary(LANGUAGE_PORTUGUESE) )
549 {
550 if ( sSuffix.startsWith("r") )
551 {
552 rDoc.Delete( nNumEnd + 1, nNumEnd + 2 );
553 nSuffixChanged = -1;
554 }
555 rDoc.Insert( nNumEnd + 1, u"."_ustr );
556 nNumberChanged = 1;
557 }
558
559 // Do the change
560 SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER,
561 DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT);
562 rDoc.SetAttr(nNumEnd + 1 + nNumberChanged,
563 nEndPos + nNumberChanged + nSuffixChanged,
564 SID_ATTR_CHAR_ESCAPEMENT,
565 aSvxEscapementItem);
566 bChg = true;
567 break;
568 }
569 }
570 }
571 }
572 }
573 return bChg;
574 }
575
576 // Replace dashes
FnChgToEnEmDash(SvxAutoCorrDoc & rDoc,const OUString & rTxt,sal_Int32 nSttPos,sal_Int32 nEndPos,LanguageType eLang)577 bool SvxAutoCorrect::FnChgToEnEmDash(
578 SvxAutoCorrDoc& rDoc, const OUString& rTxt,
579 sal_Int32 nSttPos, sal_Int32 nEndPos,
580 LanguageType eLang )
581 {
582 bool bRet = false;
583 CharClass& rCC = GetCharClass( eLang );
584 if (eLang == LANGUAGE_SYSTEM)
585 eLang = GetAppLang().getLanguageType();
586 bool bAlwaysUseEmDash = (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN);
587
588 // rTxt may refer to the frame text that will change in the calls to rDoc.Delete / rDoc.Insert;
589 // keep a local copy for later use
590 OUString aOrigTxt = rTxt;
591 sal_Int32 nFirstReplacementTextLengthChange = 0;
592
593 // replace " - " or " --" with "enDash"
594 if( 1 < nSttPos && 1 <= nEndPos - nSttPos )
595 {
596 sal_Unicode cCh = rTxt[ nSttPos ];
597 if( '-' == cCh )
598 {
599 if( 1 < nEndPos - nSttPos &&
600 ' ' == rTxt[ nSttPos-1 ] &&
601 '-' == rTxt[ nSttPos+1 ])
602 {
603 sal_Int32 n;
604 for( n = nSttPos+2; n < nEndPos && lcl_IsInArr(
605 sImplSttSkipChars,(cCh = rTxt[ n ]));
606 ++n )
607 ;
608
609 // found: " --[<AnySttChars>][A-z0-9]
610 if( rCC.isLetterNumeric( OUString(cCh) ) )
611 {
612 for( n = nSttPos-1; n && lcl_IsInArr(
613 sImplEndSkipChars,(cCh = rTxt[ --n ])); )
614 ;
615
616 // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9]
617 if( rCC.isLetterNumeric( OUString(cCh) ))
618 {
619 rDoc.Delete( nSttPos, nSttPos + 2 );
620 rDoc.Insert( nSttPos, bAlwaysUseEmDash ? sEmDash : sEnDash );
621 nFirstReplacementTextLengthChange = -1; // 2 ch -> 1 ch
622 bRet = true;
623 }
624 }
625 }
626 }
627 else if( 3 < nSttPos &&
628 ' ' == rTxt[ nSttPos-1 ] &&
629 '-' == rTxt[ nSttPos-2 ])
630 {
631 sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2;
632 if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) )
633 {
634 --nTmpPos;
635 ++nLen;
636 cCh = rTxt[ nTmpPos-1 ];
637 }
638 if( ' ' == cCh )
639 {
640 for( n = nSttPos; n < nEndPos && lcl_IsInArr(
641 sImplSttSkipChars,(cCh = rTxt[ n ]));
642 ++n )
643 ;
644
645 // found: " - [<AnySttChars>][A-z0-9]
646 if( rCC.isLetterNumeric( OUString(cCh) ) )
647 {
648 cCh = ' ';
649 for( n = nTmpPos-1; n && lcl_IsInArr(
650 sImplEndSkipChars,(cCh = rTxt[ --n ])); )
651 ;
652 // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9]
653 if (rCC.isLetterNumeric(OUString(cCh)) || lcl_IsInArr(u".!?", cCh))
654 {
655 rDoc.Delete( nTmpPos, nTmpPos + nLen );
656 rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? sEmDash : sEnDash );
657 nFirstReplacementTextLengthChange = 1 - nLen; // nLen ch -> 1 ch
658 bRet = true;
659 }
660 }
661 }
662 }
663 }
664
665 // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash"
666 // [0-9]--[0-9] double dash always replaced with "enDash"
667 // Finnish and Hungarian use enDash instead of emDash.
668 bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH);
669 if( 4 <= nEndPos - nSttPos )
670 {
671 std::u16string_view sTmpView( aOrigTxt.subView( nSttPos, nEndPos - nSttPos ) );
672 size_t nFndPos = sTmpView.find(u"--");
673 if (nFndPos > 0 && nFndPos < sTmpView.size() - 2)
674 {
675 // Use proper codepoints. Currently, CharClass::isLetterNumeric is broken, it
676 // uses the index *both* as code unit index (when checking it as ASCII), *and*
677 // as code point index (when passes to css::i18n::XCharacterClassification).
678 // Oh well... Anyway, single-codepoint strings will workaround it.
679 sal_Int32 nStart = nSttPos + nFndPos;
680 sal_uInt32 chStart = aOrigTxt.iterateCodePoints(&nStart, -1);
681 OUString sStart(&chStart, 1);
682 // No idea why sImplEndSkipChars is checked at start
683 if (rCC.isLetterNumeric(sStart, 0) || lcl_IsInArr(sImplEndSkipChars, chStart))
684 {
685 sal_Int32 nEnd = nSttPos + nFndPos + 2;
686 sal_uInt32 chEnd = aOrigTxt.iterateCodePoints(&nEnd, 1);
687 OUString sEnd(&chEnd, 1);
688 // No idea why sImplSttSkipChars is checked at end
689 if (rCC.isLetterNumeric(sEnd, 0) || lcl_IsInArr(sImplSttSkipChars, chEnd))
690 {
691 nSttPos = nSttPos + nFndPos + nFirstReplacementTextLengthChange;
692 rDoc.Delete(nSttPos, nSttPos + 2);
693 rDoc.Insert(nSttPos,
694 (bEnDash || (rCC.isDigit(sStart, 0) && rCC.isDigit(sEnd, 0))
695 ? sEnDash
696 : sEmDash));
697 bRet = true;
698 }
699 }
700 }
701 }
702 return bRet;
703 }
704
705 // Add non-breaking space before specific punctuation marks in French text
FnAddNonBrkSpace(SvxAutoCorrDoc & rDoc,std::u16string_view rTxt,sal_Int32 nEndPos,LanguageType eLang,bool & io_bNbspRunNext)706 sal_Int32 SvxAutoCorrect::FnAddNonBrkSpace(
707 SvxAutoCorrDoc& rDoc, std::u16string_view rTxt,
708 sal_Int32 nEndPos,
709 LanguageType eLang, bool& io_bNbspRunNext )
710 {
711 sal_Int32 nRet = -1;
712
713 CharClass& rCC = GetCharClass( eLang );
714
715 if ( rCC.getLanguageTag().getLanguage() == "fr" )
716 {
717 bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA");
718 OUString allChars = u":;?!%"_ustr;
719 OUString chars( allChars );
720 if ( bFrCA )
721 chars = ":";
722
723 sal_Unicode cChar = rTxt[ nEndPos ];
724 bool bHasSpace = chars.indexOf( cChar ) != -1;
725 bool bIsSpecial = allChars.indexOf( cChar ) != -1;
726 if ( bIsSpecial )
727 {
728 // Get the last word delimiter position
729 sal_Int32 nSttWdPos = nEndPos;
730 bool bWasWordDelim = false;
731 while( nSttWdPos )
732 {
733 bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]);
734 if (bWasWordDelim)
735 break;
736 }
737
738 //See if the text is the start of a protocol string, e.g. have text of
739 //"http" see if it is the start of "http:" and if so leave it alone
740 size_t nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0);
741 size_t nProtocolLen = nEndPos - nSttWdPos + 1;
742 if (nIndex + nProtocolLen <= rTxt.size())
743 {
744 if (INetURLObject::CompareProtocolScheme(rTxt.substr(nIndex, nProtocolLen)) != INetProtocol::NotValid)
745 return -1;
746 }
747
748 // Check the presence of "://" in the word
749 size_t nStrPos = rTxt.find( u"://", nSttWdPos + 1 );
750 if ( nStrPos == std::u16string_view::npos && nEndPos > 0 )
751 {
752 // Check the previous char
753 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
754 if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' )
755 {
756 // Remove any previous normal space
757 sal_Int32 nPos = nEndPos - 1;
758 while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace )
759 {
760 if ( nPos == 0 ) break;
761 nPos--;
762 cPrevChar = rTxt[ nPos ];
763 }
764
765 nPos++;
766 if ( nEndPos - nPos > 0 )
767 rDoc.Delete( nPos, nEndPos );
768
769 // Add the non-breaking space at the end pos
770 if ( bHasSpace )
771 rDoc.Insert( nPos, OUString(cNonBreakingSpace) );
772 io_bNbspRunNext = true;
773 nRet = nPos;
774 }
775 else if ( chars.indexOf( cPrevChar ) != -1 )
776 io_bNbspRunNext = true;
777 }
778 }
779 else if ( cChar == '/' && nEndPos > 1 && static_cast<sal_Int32>(rTxt.size()) > (nEndPos - 1) )
780 {
781 // Remove the hardspace right before to avoid formatting URLs
782 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ];
783 sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ];
784 if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace )
785 {
786 rDoc.Delete( nEndPos - 2, nEndPos - 1 );
787 nRet = nEndPos - 1;
788 }
789 }
790 }
791
792 return nRet;
793 }
794
795 // URL recognition
FnSetINetAttr(SvxAutoCorrDoc & rDoc,const OUString & rTxt,sal_Int32 nSttPos,sal_Int32 nEndPos,LanguageType eLang)796 bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
797 sal_Int32 nSttPos, sal_Int32 nEndPos,
798 LanguageType eLang )
799 {
800 OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos,
801 GetCharClass( eLang ) ));
802 bool bRet = !sURL.isEmpty();
803 if( bRet ) // so, set attribute:
804 rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
805 return bRet;
806 }
807
808 // DOI citation recognition
FnSetDOIAttr(SvxAutoCorrDoc & rDoc,std::u16string_view rTxt,sal_Int32 nSttPos,sal_Int32 nEndPos,LanguageType eLang)809 bool SvxAutoCorrect::FnSetDOIAttr( SvxAutoCorrDoc& rDoc, std::u16string_view rTxt,
810 sal_Int32 nSttPos, sal_Int32 nEndPos,
811 LanguageType eLang )
812 {
813 OUString sURL( URIHelper::FindFirstDOIInText( rTxt, nSttPos, nEndPos, GetCharClass( eLang ) ));
814 bool bRet = !sURL.isEmpty();
815 if( bRet ) // so, set attribute:
816 rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
817 return bRet;
818 }
819
820 // Automatic *bold*, /italic/, -strikeout- and _underline_
FnChgWeightUnderl(SvxAutoCorrDoc & rDoc,const OUString & rTxt,sal_Int32 nEndPos)821 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
822 sal_Int32 nEndPos )
823 {
824 // Condition:
825 // at the beginning: _, *, / or ~ after Space with the following !Space
826 // at the end: _, *, / or ~ before Space (word delimiter?)
827
828 sal_Unicode cInsChar = rTxt[ nEndPos ]; // underline, bold, italic or strikeout
829 if( ++nEndPos != rTxt.getLength() &&
830 !IsWordDelim( rTxt[ nEndPos ] ) )
831 return false;
832
833 --nEndPos;
834
835 bool bAlphaNum = false;
836 sal_Int32 nPos = nEndPos;
837 sal_Int32 nFndPos = -1;
838 CharClass& rCC = GetCharClass( LANGUAGE_SYSTEM );
839
840 while( nPos )
841 {
842 switch( sal_Unicode c = rTxt[ --nPos ] )
843 {
844 case '_':
845 case '-':
846 case '/':
847 case '*':
848 if( c == cInsChar )
849 {
850 if( bAlphaNum && nPos+1 < nEndPos && ( !nPos ||
851 IsWordDelim( rTxt[ nPos-1 ])) &&
852 !IsWordDelim( rTxt[ nPos+1 ]))
853 nFndPos = nPos;
854 else
855 // Condition is not satisfied, so cancel
856 nFndPos = -1;
857 nPos = 0;
858 }
859 break;
860 default:
861 if( !bAlphaNum )
862 bAlphaNum = rCC.isLetterNumeric( rTxt, nPos );
863 }
864 }
865
866 if( -1 != nFndPos )
867 {
868 // first delete the Character at the end - this allows insertion
869 // of an empty hint in SetAttr which would be removed by Delete
870 // (fdo#62536, AUTOFMT in Writer)
871 rDoc.Delete( nEndPos, nEndPos + 1 );
872
873 // Span the Attribute over the area
874 // the end.
875 if( '*' == cInsChar ) // Bold
876 {
877 SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT );
878 rDoc.SetAttr( nFndPos + 1, nEndPos,
879 SID_ATTR_CHAR_WEIGHT,
880 aSvxWeightItem);
881 }
882 else if( '/' == cInsChar ) // Italic
883 {
884 SvxPostureItem aSvxPostureItem( ITALIC_NORMAL, SID_ATTR_CHAR_POSTURE );
885 rDoc.SetAttr( nFndPos + 1, nEndPos,
886 SID_ATTR_CHAR_POSTURE,
887 aSvxPostureItem);
888 }
889 else if( '-' == cInsChar ) // Strikeout
890 {
891 SvxCrossedOutItem aSvxCrossedOutItem( STRIKEOUT_SINGLE, SID_ATTR_CHAR_STRIKEOUT );
892 rDoc.SetAttr( nFndPos + 1, nEndPos,
893 SID_ATTR_CHAR_STRIKEOUT,
894 aSvxCrossedOutItem);
895 }
896 else // Underline
897 {
898 SvxUnderlineItem aSvxUnderlineItem( LINESTYLE_SINGLE, SID_ATTR_CHAR_UNDERLINE );
899 rDoc.SetAttr( nFndPos + 1, nEndPos,
900 SID_ATTR_CHAR_UNDERLINE,
901 aSvxUnderlineItem);
902 }
903 rDoc.Delete( nFndPos, nFndPos + 1 );
904 }
905
906 return -1 != nFndPos;
907 }
908
909 // Capitalize first letter of every sentence
FnCapitalStartSentence(SvxAutoCorrDoc & rDoc,const OUString & rTxt,bool bNormalPos,sal_Int32 nSttPos,sal_Int32 nEndPos,LanguageType eLang)910 void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc& rDoc,
911 const OUString& rTxt, bool bNormalPos,
912 sal_Int32 nSttPos, sal_Int32 nEndPos,
913 LanguageType eLang )
914 {
915
916 if( rTxt.isEmpty() || nEndPos <= nSttPos )
917 return;
918
919 CharClass& rCC = GetCharClass( eLang );
920 OUString aText( rTxt );
921 const sal_Unicode *pStart = aText.getStr(),
922 *pStr = pStart + nEndPos,
923 *pWordStt = nullptr,
924 *pDelim = nullptr;
925
926 bool bAtStart = false;
927 do {
928 --pStr;
929 if (rCC.isLetter(aText, pStr - pStart))
930 {
931 if( !pWordStt )
932 pDelim = pStr+1;
933 pWordStt = pStr;
934 }
935 else if (pWordStt && !rCC.isDigit(aText, pStr - pStart))
936 {
937 if( (lcl_IsInArr( u"-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words
938 pWordStt - 1 == pStr &&
939 // Installation at beginning of paragraph. Replaced < by <= (#i38971#)
940 (pStart + 1) <= pStr &&
941 rCC.isLetter(aText, pStr-1 - pStart))
942 pWordStt = --pStr;
943 else
944 break;
945 }
946 bAtStart = (pStart == pStr);
947 } while( !bAtStart );
948
949 if (!pWordStt)
950 return; // no character to be replaced
951
952
953 if (rCC.isDigit(aText, pStr - pStart))
954 return; // already ok
955
956 if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart)))
957 return; // already ok
958
959 //See if the text is the start of a protocol string, e.g. have text of
960 //"http" see if it is the start of "http:" and if so leave it alone
961 sal_Int32 nIndex = pWordStt - pStart;
962 sal_Int32 nProtocolLen = pDelim - pWordStt + 1;
963 if (nIndex + nProtocolLen <= rTxt.getLength())
964 {
965 if (INetURLObject::CompareProtocolScheme(rTxt.subView(nIndex, nProtocolLen)) != INetProtocol::NotValid)
966 return; // already ok
967 }
968
969 if (0x1 == *pWordStt || 0x2 == *pWordStt)
970 return; // already ok
971
972 // Only capitalize, if string before specified characters is long enough
973 if( *pDelim && 2 >= pDelim - pWordStt &&
974 lcl_IsInArr( u".-)>", *pDelim ) )
975 return;
976
977 // tdf#59666 don't capitalize single Greek letters (except in Greek texts)
978 if ( 1 == pDelim - pWordStt && 0x03B1 <= *pWordStt && *pWordStt <= 0x03C9 && eLang != LANGUAGE_GREEK )
979 return;
980
981 if( !bAtStart ) // Still no beginning of a paragraph?
982 {
983 if (NonFieldWordDelim(*pStr))
984 {
985 for (;;)
986 {
987 bAtStart = (pStart == pStr--);
988 if (bAtStart || !NonFieldWordDelim(*pStr))
989 break;
990 }
991 }
992 // Asian full stop, full width full stop, full width exclamation mark
993 // and full width question marks are treated as word delimiters
994 else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr &&
995 0xFF1F != *pStr )
996 return; // no valid separator -> no replacement
997 }
998
999 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
1000 if (FindInWordStartExceptList(eLang, OUString(pWordStt, pDelim - pWordStt)))
1001 return;
1002
1003 if( bAtStart ) // at the beginning of a paragraph?
1004 {
1005 // Check out the previous paragraph, if it exists.
1006 // If so, then check to paragraph separator at the end.
1007 OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos);
1008 if (!pPrevPara)
1009 {
1010 // valid separator -> replace
1011 OUString sChar( *pWordStt );
1012 sChar = rCC.titlecase(sChar); //see fdo#56740
1013 if (sChar != OUStringChar(*pWordStt))
1014 rDoc.ReplaceRange( pWordStt - pStart, 1, sChar );
1015 return;
1016 }
1017
1018 aText = *pPrevPara;
1019 bAtStart = false;
1020 pStart = aText.getStr();
1021 pStr = pStart + aText.getLength();
1022
1023 do { // overwrite all blanks
1024 --pStr;
1025 if (!NonFieldWordDelim(*pStr))
1026 break;
1027 bAtStart = (pStart == pStr);
1028 } while( !bAtStart );
1029
1030 if( bAtStart )
1031 return; // no valid separator -> no replacement
1032 }
1033
1034 // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator.
1035 // all three can happen, but not more than once!
1036 const sal_Unicode* pExceptStt = nullptr;
1037 bool bContinue = true;
1038 Flags nFlag = Flags::NONE;
1039 do
1040 {
1041 switch (*pStr)
1042 {
1043 // Western and Asian full stop
1044 case '.':
1045 case 0x3002:
1046 case 0xFF0E:
1047 {
1048 if (pStr >= pStart + 2 && *(pStr - 2) == '.')
1049 {
1050 //e.g. text "f.o.o. word": Now currently considering
1051 //capitalizing word but second last character of
1052 //previous word is a . So probably last word is an
1053 //anagram that ends in . and not truly the end of a
1054 //previous sentence, so don't autocapitalize this word
1055 return;
1056 }
1057 if (nFlag & Flags::FullStop)
1058 return; // no valid separator -> no replacement
1059 nFlag |= Flags::FullStop;
1060 pExceptStt = pStr;
1061 }
1062 break;
1063 case '!':
1064 case 0xFF01:
1065 {
1066 if (nFlag & Flags::ExclamationMark)
1067 return; // no valid separator -> no replacement
1068 nFlag |= Flags::ExclamationMark;
1069 }
1070 break;
1071 case '?':
1072 case 0xFF1F:
1073 {
1074 if (nFlag & Flags::QuestionMark)
1075 return; // no valid separator -> no replacement
1076 nFlag |= Flags::QuestionMark;
1077 }
1078 break;
1079 default:
1080 if (nFlag == Flags::NONE)
1081 return; // no valid separator -> no replacement
1082 else
1083 bContinue = false;
1084 break;
1085 }
1086
1087 if (bContinue && pStr-- == pStart)
1088 {
1089 return; // no valid separator -> no replacement
1090 }
1091 } while (bContinue);
1092 if (Flags::FullStop != nFlag)
1093 pExceptStt = nullptr;
1094
1095 // Only capitalize, if string is long enough
1096 if( 2 > ( pStr - pStart ) )
1097 return;
1098
1099 if (!rCC.isLetterNumeric(aText, pStr-- - pStart))
1100 {
1101 bool bValid = false, bAlphaFnd = false;
1102 const sal_Unicode* pTmpStr = pStr;
1103 while( !bValid )
1104 {
1105 if( rCC.isDigit( aText, pTmpStr - pStart ) )
1106 {
1107 bValid = true;
1108 pStr = pTmpStr - 1;
1109 }
1110 else if( rCC.isLetter( aText, pTmpStr - pStart ) )
1111 {
1112 if( bAlphaFnd )
1113 {
1114 bValid = true;
1115 pStr = pTmpStr;
1116 }
1117 else
1118 bAlphaFnd = true;
1119 }
1120 else if (bAlphaFnd || NonFieldWordDelim(*pTmpStr))
1121 break;
1122
1123 if( pTmpStr == pStart )
1124 break;
1125
1126 --pTmpStr;
1127 }
1128
1129 if( !bValid )
1130 return; // no valid separator -> no replacement
1131 }
1132
1133 bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9';
1134
1135 // Search for the beginning of the word
1136 while (!NonFieldWordDelim(*pStr))
1137 {
1138 if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) )
1139 bNumericOnly = false;
1140
1141 if( pStart == pStr )
1142 break;
1143
1144 --pStr;
1145 }
1146
1147 if( bNumericOnly ) // consists of only numbers, then not
1148 return;
1149
1150 if (NonFieldWordDelim(*pStr))
1151 ++pStr;
1152
1153 OUString sWord;
1154
1155 // check on the basis of the exception list
1156 if( pExceptStt )
1157 {
1158 sWord = OUString(pStr, pExceptStt - pStr + 1);
1159 if( FindInCplSttExceptList(eLang, sWord) )
1160 return;
1161
1162 // Delete all non alphanumeric. Test the characters at the
1163 // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.)
1164 OUString sTmp( sWord );
1165 while( !sTmp.isEmpty() &&
1166 !rCC.isLetterNumeric( sTmp, 0 ) )
1167 sTmp = sTmp.copy(1);
1168
1169 // Remove all non alphanumeric characters towards the end up until
1170 // the last one.
1171 sal_Int32 nLen = sTmp.getLength();
1172 while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) )
1173 --nLen;
1174 if( nLen + 1 < sTmp.getLength() )
1175 sTmp = sTmp.copy( 0, nLen + 1 );
1176
1177 if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() &&
1178 FindInCplSttExceptList(eLang, sTmp))
1179 return;
1180
1181 if(FindInCplSttExceptList(eLang, sWord, true))
1182 return;
1183 }
1184
1185 // Ok, then replace
1186 sal_Unicode cSave = *pWordStt;
1187 nSttPos = pWordStt - rTxt.getStr();
1188 OUString sChar = rCC.titlecase(OUString(cSave)); //see fdo#56740
1189 bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar );
1190
1191 // Perhaps someone wants to have the word
1192 if( bRet && ACFlags::SaveWordCplSttLst & nFlags )
1193 rDoc.SaveCpltSttWord( ACFlags::CapitalStartSentence, nSttPos, sWord, cSave );
1194 }
1195
1196 // Correct accidental use of cAPS LOCK key
FnCorrectCapsLock(SvxAutoCorrDoc & rDoc,const OUString & rTxt,sal_Int32 nSttPos,sal_Int32 nEndPos,LanguageType eLang)1197 bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1198 sal_Int32 nSttPos, sal_Int32 nEndPos,
1199 LanguageType eLang )
1200 {
1201 if (nEndPos - nSttPos < 2)
1202 // string must be at least 2-character long.
1203 return false;
1204
1205 CharClass& rCC = GetCharClass( eLang );
1206
1207 // Check the first 2 letters.
1208 if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) )
1209 return false;
1210
1211 if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) )
1212 return false;
1213
1214 OUStringBuffer aConverted;
1215 aConverted.append( rCC.uppercase(OUString(rTxt[nSttPos])) );
1216 aConverted.append( rCC.lowercase(OUString(rTxt[nSttPos+1])) );
1217
1218 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list
1219 if (FindInWordStartExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos)))
1220 return false;
1221
1222 for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i )
1223 {
1224 if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) )
1225 // A lowercase letter disqualifies the whole text.
1226 return false;
1227
1228 if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) )
1229 // Another uppercase letter. Convert it.
1230 aConverted.append( rCC.lowercase(OUString(rTxt[i])) );
1231 else
1232 // This is not an alphabetic letter. Leave it as-is.
1233 aConverted.append( rTxt[i] );
1234 }
1235
1236 // Replace the word.
1237 rDoc.Delete(nSttPos, nEndPos);
1238 rDoc.Insert(nSttPos, aConverted.makeStringAndClear());
1239
1240 return true;
1241 }
1242
1243
GetQuote(sal_Unicode cInsChar,bool bSttQuote,LanguageType eLang) const1244 sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote,
1245 LanguageType eLang ) const
1246 {
1247 sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar
1248 ? GetStartDoubleQuote()
1249 : GetStartSingleQuote() )
1250 : ( '\"' == cInsChar
1251 ? GetEndDoubleQuote()
1252 : GetEndSingleQuote() );
1253 if( !cRet )
1254 {
1255 // then through the Language find the right character
1256 if( LANGUAGE_NONE == eLang )
1257 cRet = cInsChar;
1258 else
1259 {
1260 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1261 OUString sRet( bSttQuote
1262 ? ( '\"' == cInsChar
1263 ? rLcl.getDoubleQuotationMarkStart()
1264 : rLcl.getQuotationMarkStart() )
1265 : ( '\"' == cInsChar
1266 ? rLcl.getDoubleQuotationMarkEnd()
1267 : rLcl.getQuotationMarkEnd() ));
1268 cRet = !sRet.isEmpty() ? sRet[0] : cInsChar;
1269 }
1270 }
1271 return cRet;
1272 }
1273
InsertQuote(SvxAutoCorrDoc & rDoc,sal_Int32 nInsPos,sal_Unicode cInsChar,bool bSttQuote,bool bIns,LanguageType eLang,ACQuotes eType) const1274 void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos,
1275 sal_Unicode cInsChar, bool bSttQuote,
1276 bool bIns, LanguageType eLang, ACQuotes eType ) const
1277 {
1278 sal_Unicode cRet;
1279
1280 if ( eType == ACQuotes::DoubleAngleQuote )
1281 {
1282 bool bSwiss = eLang == LANGUAGE_FRENCH_SWISS;
1283 // pressing " inside a quotation -> use second level angle quotes
1284 bool bLeftQuote = '\"' == cInsChar &&
1285 // start position and Romanian OR
1286 // not start position and Hungarian
1287 bSttQuote == (eLang != LANGUAGE_HUNGARIAN);
1288 cRet = ( '<' == cInsChar || bLeftQuote )
1289 ? ( bSwiss ? cLeftSingleAngleQuote : cLeftDoubleAngleQuote )
1290 : ( bSwiss ? cRightSingleAngleQuote : cRightDoubleAngleQuote );
1291 }
1292 else if ( eType == ACQuotes::UseApostrophe )
1293 cRet = cApostrophe;
1294 else
1295 cRet = GetQuote( cInsChar, bSttQuote, eLang );
1296
1297 OUString sChg( cInsChar );
1298 if( bIns )
1299 rDoc.Insert( nInsPos, sChg );
1300 else
1301 rDoc.Replace( nInsPos, sChg );
1302
1303 sChg = OUString(cRet);
1304
1305 if( eType == ACQuotes::NonBreakingSpace )
1306 {
1307 if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, OUStringChar(cNonBreakingSpace) ))
1308 {
1309 if( !bSttQuote )
1310 ++nInsPos;
1311 }
1312 }
1313 else if( eType == ACQuotes::DoubleAngleQuote && cInsChar != '\"' )
1314 {
1315 rDoc.Delete( nInsPos-1, nInsPos);
1316 --nInsPos;
1317 }
1318
1319 rDoc.Replace( nInsPos, sChg );
1320
1321 // i' -> I' in English (last step for the Undo)
1322 if( eType == ACQuotes::CapitalizeIAm )
1323 rDoc.Replace( nInsPos-1, u"I"_ustr );
1324 }
1325
GetQuote(SvxAutoCorrDoc const & rDoc,sal_Int32 nInsPos,sal_Unicode cInsChar,bool bSttQuote)1326 OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPos,
1327 sal_Unicode cInsChar, bool bSttQuote )
1328 {
1329 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1330 sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang );
1331
1332 OUString sRet(cRet);
1333
1334 if( '\"' == cInsChar )
1335 {
1336 if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS)
1337 {
1338 if( bSttQuote )
1339 sRet += " ";
1340 else
1341 sRet = " " + sRet;
1342 }
1343 }
1344 return sRet;
1345 }
1346
1347 // search preceding opening quote in the paragraph before the insert position
lcl_HasPrecedingChar(std::u16string_view rTxt,sal_Int32 nPos,const sal_Unicode sPrecedingChar,const sal_Unicode sStopChar,const sal_Unicode * aStopChars)1348 static bool lcl_HasPrecedingChar( std::u16string_view rTxt, sal_Int32 nPos,
1349 const sal_Unicode sPrecedingChar, const sal_Unicode sStopChar, const sal_Unicode* aStopChars )
1350 {
1351 sal_Unicode cTmpChar;
1352
1353 do {
1354 cTmpChar = rTxt[ --nPos ];
1355 if ( cTmpChar == sPrecedingChar )
1356 return true;
1357
1358 if ( cTmpChar == sStopChar )
1359 return false;
1360
1361 for ( const sal_Unicode* pCh = aStopChars; *pCh; ++pCh )
1362 if ( cTmpChar == *pCh )
1363 return false;
1364
1365 } while ( nPos > 0 );
1366
1367 return false;
1368 }
1369
1370 // WARNING: rText may become invalid, see comment below
DoAutoCorrect(SvxAutoCorrDoc & rDoc,const OUString & rTxt,sal_Int32 nInsPos,sal_Unicode cChar,bool bInsert,bool & io_bNbspRunNext,vcl::Window const * pFrameWin)1371 void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
1372 sal_Int32 nInsPos, sal_Unicode cChar,
1373 bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin )
1374 {
1375 bool bIsNextRun = io_bNbspRunNext;
1376 io_bNbspRunNext = false; // if it was set, then it has to be turned off
1377
1378 do{ // only for middle check loop !!
1379 if( cChar )
1380 {
1381 // Prevent double space
1382 if( nInsPos && ' ' == cChar &&
1383 IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace ) &&
1384 ' ' == rTxt[ nInsPos - 1 ])
1385 {
1386 break;
1387 }
1388
1389 bool bSingle = '\'' == cChar;
1390 bool bIsReplaceQuote =
1391 (IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('\"' == cChar )) ||
1392 (IsAutoCorrFlag( ACFlags::ChgSglQuotes ) && bSingle );
1393 if( bIsReplaceQuote )
1394 {
1395 bool bSttQuote = !nInsPos;
1396 ACQuotes eType = ACQuotes::NONE;
1397 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1398 if (!bSttQuote)
1399 {
1400 sal_Unicode cPrev = rTxt[ nInsPos-1 ];
1401 bSttQuote = NonFieldWordDelim(cPrev) ||
1402 lcl_IsInArr( u"([{", cPrev ) ||
1403 ( cEmDash == cPrev ) ||
1404 ( cEnDash == cPrev );
1405 // tdf#38394 use opening quotation mark << in French l'<<word>>
1406 if ( !bSingle && !bSttQuote && cPrev == cApostrophe &&
1407 primary(eLang) == primary(LANGUAGE_FRENCH) &&
1408 ( ( ( nInsPos == 2 || ( nInsPos > 2 && IsWordDelim( rTxt[ nInsPos-3 ] ) ) ) &&
1409 // abbreviated form of ce, de, je, la, le, ne, me, te, se or si
1410 u"cdjlnmtsCDJLNMTS"_ustr.indexOf( rTxt[ nInsPos-2 ] ) > -1 ) ||
1411 ( ( nInsPos == 3 || (nInsPos > 3 && IsWordDelim( rTxt[ nInsPos-4 ] ) ) ) &&
1412 // abbreviated form of que
1413 ( rTxt[ nInsPos-2 ] == 'u' || rTxt[ nInsPos-2 ] == 'U' ) &&
1414 ( rTxt[ nInsPos-3 ] == 'q' || rTxt[ nInsPos-3 ] == 'Q' ) ) ) )
1415 {
1416 bSttQuote = true;
1417 }
1418 // tdf#108423 for capitalization of English i'm
1419 else if ( bSingle && ( cPrev == 'i' ) &&
1420 primary(eLang) == primary(LANGUAGE_ENGLISH) &&
1421 ( nInsPos == 1 || IsWordDelim( rTxt[ nInsPos-2 ] ) ) )
1422 {
1423 eType = ACQuotes::CapitalizeIAm;
1424 }
1425 // tdf#133524 support >>Hungarian<< and <<Romanian>> secondary level quotations
1426 else if ( !bSingle && nInsPos &&
1427 ( ( eLang == LANGUAGE_HUNGARIAN &&
1428 lcl_HasPrecedingChar( rTxt, nInsPos,
1429 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEnd[0],
1430 bSttQuote ? aStopDoubleAngleQuoteStart[1] : aStopDoubleAngleQuoteEnd[1],
1431 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEnd + 2 ) ) ||
1432 ( eLang.anyOf(
1433 LANGUAGE_ROMANIAN,
1434 LANGUAGE_ROMANIAN_MOLDOVA ) &&
1435 lcl_HasPrecedingChar( rTxt, nInsPos,
1436 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEndRo[0],
1437 bSttQuote ? aStopDoubleAngleQuoteStart[1] : aStopDoubleAngleQuoteEndRo[1],
1438 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEndRo + 2 ) ) ) )
1439 {
1440 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1441 // only if the opening double quotation mark is the default one
1442 if ( rLcl.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart[0]) )
1443 eType = ACQuotes::DoubleAngleQuote;
1444 }
1445 else if ( bSingle && nInsPos && !bSttQuote &&
1446 // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic,
1447 // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018.
1448 // tdf#123786 the same for Russian and Ukrainian
1449 ( eLang.anyOf (
1450 LANGUAGE_CZECH,
1451 LANGUAGE_GERMAN,
1452 LANGUAGE_GERMAN_SWISS,
1453 LANGUAGE_GERMAN_AUSTRIAN,
1454 LANGUAGE_GERMAN_LUXEMBOURG,
1455 LANGUAGE_GERMAN_LIECHTENSTEIN,
1456 LANGUAGE_ICELANDIC,
1457 LANGUAGE_SLOVAK,
1458 LANGUAGE_SLOVENIAN ) ) )
1459 {
1460 sal_Unicode sStartChar = GetStartSingleQuote();
1461 sal_Unicode sEndChar = GetEndSingleQuote();
1462 if ( !sStartChar || !sEndChar ) {
1463 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1464 if ( !sStartChar ) sStartChar = rLcl.getQuotationMarkStart()[0];
1465 if ( !sEndChar ) sEndChar = rLcl.getQuotationMarkStart()[0];
1466 }
1467 if ( !lcl_HasPrecedingChar( rTxt, nInsPos, sStartChar, sEndChar, aStopSingleQuoteEnd + 1 ) )
1468 {
1469 CharClass& rCC = GetCharClass( eLang );
1470 if ( rCC.isLetter(rTxt, nInsPos-1) )
1471 {
1472 eType = ACQuotes::UseApostrophe;
1473 }
1474 }
1475 }
1476 else if ( bSingle && nInsPos && !bSttQuote &&
1477 ( eLang.anyOf (
1478 LANGUAGE_RUSSIAN,
1479 LANGUAGE_UKRAINIAN ) &&
1480 !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEndRuUa[0], aStopSingleQuoteEndRuUa[1], aStopSingleQuoteEndRuUa + 2 ) ) )
1481 {
1482 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang );
1483 CharClass& rCC = GetCharClass( eLang );
1484 if ( rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa[0]) &&
1485 // use apostrophe only after letters, not after digits or punctuation
1486 rCC.isLetter(rTxt, nInsPos-1) )
1487 {
1488 eType = ACQuotes::UseApostrophe;
1489 }
1490 }
1491 }
1492
1493 if ( eType == ACQuotes::NONE && !bSingle &&
1494 ( primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS ) )
1495 eType = ACQuotes::NonBreakingSpace;
1496
1497 InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, eLang, eType );
1498 break;
1499 }
1500 // tdf#133524 change "<<" and ">>" to double angle quotation marks
1501 else if ( IsAutoCorrFlag( ACFlags::ChgQuotes ) &&
1502 IsAutoCorrFlag( ACFlags::ChgAngleQuotes ) &&
1503 ('<' == cChar || '>' == cChar) &&
1504 nInsPos > 0 && cChar == rTxt[ nInsPos-1 ] )
1505 {
1506 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
1507 if ( eLang.anyOf(
1508 LANGUAGE_CATALAN, // primary level
1509 LANGUAGE_CATALAN_VALENCIAN, // primary level
1510 LANGUAGE_FINNISH, // alternative primary level
1511 LANGUAGE_FRENCH_SWISS, // second level
1512 LANGUAGE_GALICIAN, // primary level
1513 LANGUAGE_HUNGARIAN, // second level
1514 LANGUAGE_POLISH, // second level
1515 LANGUAGE_PORTUGUESE, // primary level
1516 LANGUAGE_PORTUGUESE_BRAZILIAN, // primary level
1517 LANGUAGE_ROMANIAN, // second level
1518 LANGUAGE_ROMANIAN_MOLDOVA, // second level
1519 LANGUAGE_SWEDISH, // alternative primary level
1520 LANGUAGE_SWEDISH_FINLAND, // alternative primary level
1521 LANGUAGE_UKRAINIAN, // primary level
1522 LANGUAGE_USER_ARAGONESE, // primary level
1523 LANGUAGE_USER_ASTURIAN ) || // primary level
1524 primary(eLang) == primary(LANGUAGE_GERMAN) || // alternative primary level
1525 primary(eLang) == primary(LANGUAGE_SPANISH) ) // primary level
1526 {
1527 InsertQuote( rDoc, nInsPos, cChar, false, bInsert, eLang, ACQuotes::DoubleAngleQuote );
1528 break;
1529 }
1530 }
1531
1532 if( bInsert )
1533 rDoc.Insert( nInsPos, OUString(cChar) );
1534 else
1535 rDoc.Replace( nInsPos, OUString(cChar) );
1536
1537 // Hardspaces autocorrection
1538 if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace ) )
1539 {
1540 // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1541 // and its length may change (even become shorter) if FnAddNonBrkSpace succeeds!
1542 sal_Int32 nUpdatedPos = -1;
1543 if (NeedsHardspaceAutocorr(cChar))
1544 nUpdatedPos = FnAddNonBrkSpace( rDoc, rTxt, nInsPos, GetDocLanguage( rDoc, nInsPos ), io_bNbspRunNext );
1545 if (nUpdatedPos >= 0)
1546 {
1547 nInsPos = nUpdatedPos;
1548 }
1549 else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) )
1550 {
1551 // Remove the NBSP if it wasn't an autocorrection
1552 if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) &&
1553 cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace )
1554 {
1555 // Look for the last HARD_SPACE
1556 sal_Int32 nPos = nInsPos - 1;
1557 bool bContinue = true;
1558 while ( bContinue )
1559 {
1560 const sal_Unicode cTmpChar = rTxt[ nPos ];
1561 if ( cTmpChar == cNonBreakingSpace )
1562 {
1563 rDoc.Delete( nPos, nPos + 1 );
1564 bContinue = false;
1565 }
1566 else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 )
1567 bContinue = false;
1568 nPos--;
1569 }
1570 }
1571 }
1572 }
1573 }
1574
1575 if( !nInsPos )
1576 break;
1577
1578 sal_Int32 nPos = nInsPos - 1;
1579
1580 if( IsWordDelim( rTxt[ nPos ]))
1581 break;
1582
1583 // Set bold or underline automatically?
1584 if (('*' == cChar || '_' == cChar || '/' == cChar || '-' == cChar) && (nPos+1 < rTxt.getLength()))
1585 {
1586 if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl ) )
1587 {
1588 FnChgWeightUnderl( rDoc, rTxt, nPos+1 );
1589 }
1590 break;
1591 }
1592
1593 while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1594 ;
1595
1596 // Found a Paragraph-start or a Blank, search for the word shortcut in
1597 // auto.
1598 sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character
1599 if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1600 --nCapLttrPos; // begin of paragraph and no blank
1601
1602 const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1603 CharClass& rCC = GetCharClass( eLang );
1604
1605 // no symbol characters
1606 if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos ))
1607 break;
1608
1609 if( IsAutoCorrFlag( ACFlags::Autocorrect ) &&
1610 // tdf#134940 fix regression of arrow "-->" resulted by premature
1611 // replacement of "--" since '>' was added to IsAutoCorrectChar()
1612 '>' != cChar )
1613 {
1614 // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1615 // and becomes INVALID if ChgAutoCorrWord returns true!
1616 // => use aPara/pPara to create a valid copy of the string!
1617 OUString aPara;
1618 OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr;
1619
1620 bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos,
1621 *this, pPara );
1622 if( !bChgWord )
1623 {
1624 sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos;
1625 while( nCapLttrPos1 < nInsPos &&
1626 lcl_IsInArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] )
1627 )
1628 ++nCapLttrPos1;
1629 while( nCapLttrPos1 < nInsPos1 && nInsPos1 &&
1630 lcl_IsInArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] )
1631 )
1632 --nInsPos1;
1633
1634 if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) &&
1635 nCapLttrPos1 < nInsPos1 &&
1636 rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara ))
1637 {
1638 bChgWord = true;
1639 nCapLttrPos = nCapLttrPos1;
1640 }
1641 }
1642
1643 if( bChgWord )
1644 {
1645 if( !aPara.isEmpty() )
1646 {
1647 sal_Int32 nEnd = nCapLttrPos;
1648 while( nEnd < aPara.getLength() &&
1649 !IsWordDelim( aPara[ nEnd ]))
1650 ++nEnd;
1651
1652 // Capital letter at beginning of paragraph?
1653 if( IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1654 {
1655 FnCapitalStartSentence( rDoc, aPara, false,
1656 nCapLttrPos, nEnd, eLang );
1657 }
1658
1659 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1660 {
1661 FnChgToEnEmDash( rDoc, aPara, nCapLttrPos, nEnd, eLang );
1662 }
1663 }
1664 break;
1665 }
1666 }
1667
1668 if( IsAutoCorrFlag( ACFlags::TransliterateRTL ) && GetDocLanguage( rDoc, nInsPos ) == LANGUAGE_HUNGARIAN )
1669 {
1670 // WARNING ATTENTION: rTxt is an alias of the text node's OUString
1671 // and becomes INVALID if TransliterateRTLWord returns true!
1672 if ( rDoc.TransliterateRTLWord( nCapLttrPos, nInsPos ) )
1673 break;
1674 }
1675
1676 if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber ) &&
1677 (nInsPos >= 2 ) && // fdo#69762 avoid autocorrect for 2e-3
1678 ( '-' != cChar || 'E' != rtl::toAsciiUpperCase(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) &&
1679 FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
1680 ( IsAutoCorrFlag( ACFlags::SetINetAttr ) &&
1681 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
1682 FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
1683 ( IsAutoCorrFlag( ACFlags::SetDOIAttr ) &&
1684 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
1685 FnSetDOIAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
1686 ;
1687 else
1688 {
1689 bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK);
1690 bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos );
1691
1692 if ( bLockKeyOn && IsAutoCorrFlag( ACFlags::CorrectCapsLock ) &&
1693 FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) )
1694 {
1695 // Correct accidental use of cAPS LOCK key (do this only when
1696 // the caps or shift lock key is pressed). Turn off the caps
1697 // lock afterwards.
1698 pFrameWin->SimulateKeyPress( KEY_CAPSLOCK );
1699 }
1700
1701 // Capital letter at beginning of paragraph ?
1702 if( !bUnsupported &&
1703 IsAutoCorrFlag( ACFlags::CapitalStartSentence ) )
1704 {
1705 FnCapitalStartSentence( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang );
1706 }
1707
1708 // Two capital letters at beginning of word ??
1709 if( !bUnsupported &&
1710 IsAutoCorrFlag( ACFlags::CapitalStartWord ) )
1711 {
1712 FnCapitalStartWord( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1713 }
1714
1715 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) )
1716 {
1717 FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang );
1718 }
1719 }
1720
1721 } while( false );
1722 }
1723
GetLanguageList_(LanguageType eLang)1724 SvxAutoCorrectLanguageLists& SvxAutoCorrect::GetLanguageList_(
1725 LanguageType eLang )
1726 {
1727 LanguageTag aLanguageTag( eLang);
1728 if (m_aLangTable.find(aLanguageTag) == m_aLangTable.end())
1729 (void)CreateLanguageFile(aLanguageTag);
1730 const auto iter = m_aLangTable.find(aLanguageTag);
1731 assert(iter != m_aLangTable.end());
1732 return iter->second;
1733 }
1734
SaveCplSttExceptList(LanguageType eLang)1735 void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang )
1736 {
1737 auto const iter = m_aLangTable.find(LanguageTag(eLang));
1738 if (iter != m_aLangTable.end())
1739 iter->second.SaveCplSttExceptList();
1740 else
1741 {
1742 SAL_WARN("editeng", "Save an empty list? ");
1743 }
1744 }
1745
SaveWordStartExceptList(LanguageType eLang)1746 void SvxAutoCorrect::SaveWordStartExceptList(LanguageType eLang)
1747 {
1748 auto const iter = m_aLangTable.find(LanguageTag(eLang));
1749 if (iter != m_aLangTable.end())
1750 iter->second.SaveWordStartExceptList();
1751 else
1752 {
1753 SAL_WARN("editeng", "Save an empty list? ");
1754 }
1755 }
1756
1757 // Adds a single word. The list will immediately be written to the file!
AddCplSttException(const OUString & rNew,LanguageType eLang)1758 bool SvxAutoCorrect::AddCplSttException( const OUString& rNew,
1759 LanguageType eLang )
1760 {
1761 SvxAutoCorrectLanguageLists* pLists = nullptr;
1762 // either the right language is present or it will be this in the general list
1763 auto iter = m_aLangTable.find(LanguageTag(eLang));
1764 if (iter != m_aLangTable.end())
1765 pLists = &iter->second;
1766 else
1767 {
1768 LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1769 iter = m_aLangTable.find(aLangTagUndetermined);
1770 if (iter != m_aLangTable.end())
1771 pLists = &iter->second;
1772 else if(CreateLanguageFile(aLangTagUndetermined))
1773 {
1774 iter = m_aLangTable.find(aLangTagUndetermined);
1775 assert(iter != m_aLangTable.end());
1776 pLists = &iter->second;
1777 }
1778 }
1779 OSL_ENSURE(pLists, "No auto correction data");
1780 return pLists && pLists->AddToCplSttExceptList(rNew);
1781 }
1782
1783 // Adds a single word. The list will immediately be written to the file!
AddWordStartException(const OUString & rNew,LanguageType eLang)1784 bool SvxAutoCorrect::AddWordStartException( const OUString& rNew,
1785 LanguageType eLang )
1786 {
1787 SvxAutoCorrectLanguageLists* pLists = nullptr;
1788 //either the right language is present or it is set in the general list
1789 auto iter = m_aLangTable.find(LanguageTag(eLang));
1790 if (iter != m_aLangTable.end())
1791 pLists = &iter->second;
1792 else
1793 {
1794 LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED);
1795 iter = m_aLangTable.find(aLangTagUndetermined);
1796 if (iter != m_aLangTable.end())
1797 pLists = &iter->second;
1798 else if(CreateLanguageFile(aLangTagUndetermined))
1799 {
1800 iter = m_aLangTable.find(aLangTagUndetermined);
1801 assert(iter != m_aLangTable.end());
1802 pLists = &iter->second;
1803 }
1804 }
1805 OSL_ENSURE(pLists, "No auto correction file!");
1806 return pLists && pLists->AddToWordStartExceptList(rNew);
1807 }
1808
GetPrevAutoCorrWord(SvxAutoCorrDoc const & rDoc,const OUString & rTxt,sal_Int32 nPos)1809 OUString SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const OUString& rTxt,
1810 sal_Int32 nPos)
1811 {
1812 OUString sRet;
1813 if( !nPos )
1814 return sRet;
1815
1816 sal_Int32 nEnd = nPos;
1817
1818 // it must be followed by a blank or tab!
1819 if( ( nPos < rTxt.getLength() &&
1820 !IsWordDelim( rTxt[ nPos ])) ||
1821 IsWordDelim( rTxt[ --nPos ]))
1822 return sRet;
1823
1824 while( nPos && !IsWordDelim( rTxt[ --nPos ]))
1825 ;
1826
1827 // Found a Paragraph-start or a Blank, search for the word shortcut in
1828 // auto.
1829 sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character
1830 if( !nPos && !IsWordDelim( rTxt[ 0 ]))
1831 --nCapLttrPos; // Beginning of paragraph and no Blank!
1832
1833 while( lcl_IsInArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) )
1834 if( ++nCapLttrPos >= nEnd )
1835 return sRet;
1836
1837 if( 3 > nEnd - nCapLttrPos )
1838 return sRet;
1839
1840 const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos );
1841
1842 CharClass& rCC = GetCharClass(eLang);
1843
1844 if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnd ))
1845 return sRet;
1846
1847 sRet = rTxt.copy( nCapLttrPos, nEnd - nCapLttrPos );
1848 return sRet;
1849 }
1850
1851 // static
GetChunkForAutoText(std::u16string_view rTxt,const sal_Int32 nPos)1852 std::vector<OUString> SvxAutoCorrect::GetChunkForAutoText(std::u16string_view rTxt,
1853 const sal_Int32 nPos)
1854 {
1855 constexpr sal_Int32 nMinLen = 3;
1856 constexpr sal_Int32 nMaxLen = 9;
1857 std::vector<OUString> aRes;
1858 if (nPos >= nMinLen)
1859 {
1860 sal_Int32 nBegin = std::max<sal_Int32>(nPos - nMaxLen, 0);
1861 // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation)
1862 if (nBegin > 0 && !IsWordDelim(rTxt[nBegin-1]))
1863 {
1864 while (nBegin + nMinLen <= nPos && !IsWordDelim(rTxt[nBegin]))
1865 ++nBegin;
1866 }
1867 if (nBegin + nMinLen <= nPos)
1868 {
1869 OUString sRes( rTxt.substr(nBegin, nPos - nBegin) );
1870 aRes.push_back(sRes);
1871 bool bLastStartedWithDelim = IsWordDelim(sRes[0]);
1872 for (sal_Int32 i = 1; i <= sRes.getLength() - nMinLen; ++i)
1873 {
1874 bool bAdd = bLastStartedWithDelim;
1875 bLastStartedWithDelim = IsWordDelim(sRes[i]);
1876 bAdd = bAdd || bLastStartedWithDelim;
1877 if (bAdd)
1878 aRes.push_back(sRes.copy(i));
1879 }
1880 }
1881 }
1882 return aRes;
1883 }
1884
CreateLanguageFile(const LanguageTag & rLanguageTag,bool bNewFile)1885 bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile )
1886 {
1887 OSL_ENSURE(m_aLangTable.find(rLanguageTag) == m_aLangTable.end(), "Language already exists ");
1888
1889 OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true ));
1890 OUString sShareDirFile( sUserDirFile );
1891
1892 SvxAutoCorrectLanguageLists* pLists = nullptr;
1893
1894 tools::Time nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM ), nLastCheckTime( tools::Time::EMPTY );
1895
1896 auto nFndPos = aLastFileTable.find(rLanguageTag);
1897 if(nFndPos != aLastFileTable.end() &&
1898 (nLastCheckTime.SetTime(nFndPos->second), nLastCheckTime < nAktTime) &&
1899 nAktTime - nLastCheckTime < nMinTime)
1900 {
1901 // no need to test the file, because the last check is not older then
1902 // 2 minutes.
1903 if( bNewFile )
1904 {
1905 sShareDirFile = sUserDirFile;
1906 auto itBool = m_aLangTable.emplace(std::piecewise_construct,
1907 std::forward_as_tuple(rLanguageTag),
1908 std::forward_as_tuple(*this, sShareDirFile, sUserDirFile));
1909 pLists = &itBool.first->second;
1910 aLastFileTable.erase(nFndPos);
1911 }
1912 }
1913 else if(
1914 ( FStatHelper::IsDocument( sUserDirFile ) ||
1915 FStatHelper::IsDocument( sShareDirFile =
1916 GetAutoCorrFileName( rLanguageTag ) ) ||
1917 FStatHelper::IsDocument( sShareDirFile =
1918 GetAutoCorrFileName( rLanguageTag, false, false, true) )
1919 ) ||
1920 ( sShareDirFile = sUserDirFile, bNewFile )
1921 )
1922 {
1923 auto itBool = m_aLangTable.emplace(std::piecewise_construct,
1924 std::forward_as_tuple(rLanguageTag),
1925 std::forward_as_tuple(*this, sShareDirFile, sUserDirFile));
1926 pLists = &itBool.first->second;
1927 if (nFndPos != aLastFileTable.end())
1928 aLastFileTable.erase(nFndPos);
1929 }
1930 else if( !bNewFile )
1931 {
1932 aLastFileTable[rLanguageTag] = nAktTime.GetTime();
1933 }
1934 return pLists != nullptr;
1935 }
1936
PutText(const OUString & rShort,const OUString & rLong,LanguageType eLang)1937 bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong,
1938 LanguageType eLang )
1939 {
1940 LanguageTag aLanguageTag( eLang);
1941 if (auto const iter = m_aLangTable.find(aLanguageTag); iter != m_aLangTable.end())
1942 return iter->second.PutText(rShort, rLong);
1943 if (CreateLanguageFile(aLanguageTag))
1944 {
1945 auto const iter = m_aLangTable.find(aLanguageTag);
1946 assert (iter != m_aLangTable.end());
1947 return iter->second.PutText(rShort, rLong);
1948 }
1949 return false;
1950 }
1951
MakeCombinedChanges(std::vector<SvxAutocorrWord> & aNewEntries,std::vector<SvxAutocorrWord> & aDeleteEntries,LanguageType eLang)1952 void SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries,
1953 std::vector<SvxAutocorrWord>& aDeleteEntries,
1954 LanguageType eLang )
1955 {
1956 LanguageTag aLanguageTag( eLang);
1957 auto iter = m_aLangTable.find(aLanguageTag);
1958 if (iter != m_aLangTable.end())
1959 {
1960 iter->second.MakeCombinedChanges( aNewEntries, aDeleteEntries );
1961 }
1962 else if(CreateLanguageFile( aLanguageTag ))
1963 {
1964 iter = m_aLangTable.find(aLanguageTag);
1965 assert(iter != m_aLangTable.end());
1966 iter->second.MakeCombinedChanges( aNewEntries, aDeleteEntries );
1967 }
1968 }
1969
1970 // - return the replacement text (only for SWG-Format, all other
1971 // can be taken from the word list!)
GetLongText(const OUString &,OUString &)1972 bool SvxAutoCorrect::GetLongText( const OUString&, OUString& )
1973 {
1974 return false;
1975 }
1976
refreshBlockList(const uno::Reference<embed::XStorage> &)1977 void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& )
1978 {
1979 }
1980
1981 // Text with attribution (only the SWG - SWG format!)
PutText(const css::uno::Reference<css::embed::XStorage> &,const OUString &,const OUString &,SfxObjectShell &,OUString &)1982 bool SvxAutoCorrect::PutText( const css::uno::Reference < css::embed::XStorage >&,
1983 const OUString&, const OUString&, SfxObjectShell&, OUString& )
1984 {
1985 return false;
1986 }
1987
EncryptBlockName_Imp(std::u16string_view rName)1988 OUString EncryptBlockName_Imp(std::u16string_view rName)
1989 {
1990 OUStringBuffer aName;
1991 aName.append('#').append(rName);
1992 for (size_t nLen = rName.size(), nPos = 1; nPos < nLen; ++nPos)
1993 {
1994 if (lcl_IsInArr( u"!/:.\\", aName[nPos]))
1995 aName[nPos] &= 0x0f;
1996 }
1997 return aName.makeStringAndClear();
1998 }
1999
2000 /* This code is copied from SwXMLTextBlocks::GeneratePackageName */
GeneratePackageName(std::u16string_view rShort,OUString & rPackageName)2001 static void GeneratePackageName ( std::u16string_view rShort, OUString& rPackageName )
2002 {
2003 OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7));
2004 OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US));
2005
2006 for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos)
2007 {
2008 switch (aBuf[nPos])
2009 {
2010 case '!':
2011 case '/':
2012 case ':':
2013 case '.':
2014 case '\\':
2015 // tdf#156769 - escape the question mark in the storage name
2016 case '?':
2017 aBuf[nPos] = '_';
2018 break;
2019 default:
2020 break;
2021 }
2022 }
2023
2024 rPackageName = aBuf.makeStringAndClear();
2025 }
2026
2027 static std::optional<SvxAutocorrWordList::WordSearchStatus>
lcl_SearchWordsInList(SvxAutoCorrectLanguageLists * pList,std::u16string_view rTxt,sal_Int32 & rStt,sal_Int32 nEndPos)2028 lcl_SearchWordsInList( SvxAutoCorrectLanguageLists* pList,
2029 std::u16string_view rTxt,
2030 sal_Int32& rStt, sal_Int32 nEndPos )
2031 {
2032 const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList();
2033 return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos );
2034 }
2035
2036 // the search for the words in the substitution table
2037 std::optional<SvxAutocorrWordList::WordSearchStatus>
SearchWordsInList(std::u16string_view rTxt,sal_Int32 & rStt,sal_Int32 nEndPos,SvxAutoCorrDoc &,LanguageTag & rLang)2038 SvxAutoCorrect::SearchWordsInList(
2039 std::u16string_view rTxt, sal_Int32& rStt, sal_Int32 nEndPos,
2040 SvxAutoCorrDoc&, LanguageTag& rLang )
2041 {
2042 LanguageTag aLanguageTag( rLang);
2043 if( aLanguageTag.isSystemLocale() )
2044 aLanguageTag.reset( MsLangId::getConfiguredSystemLanguage());
2045
2046 /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback
2047 * list instead? */
2048
2049 // First search for eLang, then US-English -> English
2050 // and last in LANGUAGE_UNDETERMINED
2051 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
2052 {
2053 //the language is available - so bring it on
2054 const auto iter = m_aLangTable.find(aLanguageTag);
2055 assert(iter != m_aLangTable.end());
2056 SvxAutoCorrectLanguageLists & rList = iter->second;
2057 auto pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos );
2058 if( pRet )
2059 {
2060 rLang = aLanguageTag;
2061 return pRet;
2062 }
2063 }
2064
2065 // If it still could not be found here, then keep on searching
2066 LanguageType eLang = aLanguageTag.getLanguageType();
2067 // the primary language for example EN
2068 aLanguageTag.reset(aLanguageTag.getLanguage());
2069 LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2070 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2071 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2072 CreateLanguageFile(aLanguageTag, false)))
2073 {
2074 //the language is available - so bring it on
2075 SvxAutoCorrectLanguageLists& rList = m_aLangTable.find(aLanguageTag)->second;
2076 auto pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos );
2077 if( pRet )
2078 {
2079 rLang = aLanguageTag;
2080 return pRet;
2081 }
2082 }
2083
2084 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2085 CreateLanguageFile(aLanguageTag, false))
2086 {
2087 //the language is available - so bring it on
2088 const auto iter = m_aLangTable.find(aLanguageTag);
2089 assert(iter != m_aLangTable.end());
2090 SvxAutoCorrectLanguageLists& rList = iter->second;
2091 auto pRet = lcl_SearchWordsInList( &rList, rTxt, rStt, nEndPos );
2092 if( pRet )
2093 {
2094 rLang = aLanguageTag;
2095 return pRet;
2096 }
2097 }
2098 return std::nullopt;
2099 }
2100
SearchWordsNext(std::u16string_view rTxt,sal_Int32 & rStt,sal_Int32 nEndPos,SvxAutocorrWordList::WordSearchStatus & rStatus)2101 bool SvxAutoCorrect::SearchWordsNext(
2102 std::u16string_view rTxt, sal_Int32& rStt, sal_Int32 nEndPos,
2103 SvxAutocorrWordList::WordSearchStatus& rStatus )
2104 {
2105 const SvxAutocorrWordList* pWordList = rStatus.GetAutocorrWordList();
2106 return pWordList->SearchWordsNext( rTxt, rStt, nEndPos, rStatus );
2107 }
2108
FindInWordStartExceptList(LanguageType eLang,const OUString & sWord)2109 bool SvxAutoCorrect::FindInWordStartExceptList( LanguageType eLang,
2110 const OUString& sWord )
2111 {
2112 LanguageTag aLanguageTag( eLang);
2113
2114 /* TODO-BCP47: again horrible ugliness */
2115
2116 // First search for eLang, then primary language of eLang
2117 // and last in LANGUAGE_UNDETERMINED
2118
2119 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
2120 {
2121 //the language is available - so bring it on
2122 const auto iter = m_aLangTable.find(aLanguageTag);
2123 assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2124 auto& rList = iter->second;
2125 if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() )
2126 return true;
2127 }
2128
2129 // If it still could not be found here, then keep on searching
2130 // the primary language for example EN
2131 aLanguageTag.reset(aLanguageTag.getLanguage());
2132 LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2133 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2134 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2135 CreateLanguageFile(aLanguageTag, false)))
2136 {
2137 //the language is available - so bring it on
2138 const auto iter = m_aLangTable.find(aLanguageTag);
2139 assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2140 auto& rList = iter->second;
2141 if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() )
2142 return true;
2143 }
2144
2145 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2146 CreateLanguageFile(aLanguageTag, false))
2147 {
2148 //the language is available - so bring it on
2149 const auto iter = m_aLangTable.find(aLanguageTag);
2150 assert(iter != m_aLangTable.end());
2151 auto& rList = iter->second;
2152 if(rList.GetWordStartExceptList()->find(sWord) != rList.GetWordStartExceptList()->end() )
2153 return true;
2154 }
2155 return false;
2156 }
2157
lcl_FindAbbreviation(const SvStringsISortDtor * pList,const OUString & sWord)2158 static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord)
2159 {
2160 SvStringsISortDtor::const_iterator it = pList->find(u"~"_ustr);
2161 SvStringsISortDtor::size_type nPos = it - pList->begin();
2162 if( nPos < pList->size() )
2163 {
2164 OUString sLowerWord(sWord.toAsciiLowerCase());
2165 OUString sAbr;
2166 for( SvStringsISortDtor::size_type n = nPos; n < pList->size(); ++n )
2167 {
2168 sAbr = (*pList)[ n ];
2169 if (sAbr[0] != '~')
2170 break;
2171 // ~ and ~. are not allowed!
2172 if( 2 < sAbr.getLength() && sAbr.getLength() - 1 <= sWord.getLength() )
2173 {
2174 OUString sLowerAbk(sAbr.toAsciiLowerCase());
2175 for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;)
2176 {
2177 if( !--i ) // agrees
2178 return true;
2179
2180 if( sLowerAbk[i] != sLowerWord[--ii])
2181 break;
2182 }
2183 }
2184 }
2185 }
2186 OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ),
2187 "Wrongly sorted exception list?" );
2188 return false;
2189 }
2190
FindInCplSttExceptList(LanguageType eLang,const OUString & sWord,bool bAbbreviation)2191 bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang,
2192 const OUString& sWord, bool bAbbreviation)
2193 {
2194 LanguageTag aLanguageTag( eLang);
2195
2196 /* TODO-BCP47: did I mention terrible horrible ugliness? */
2197
2198 // First search for eLang, then primary language of eLang
2199 // and last in LANGUAGE_UNDETERMINED
2200
2201 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false))
2202 {
2203 //the language is available - so bring it on
2204 const auto iter = m_aLangTable.find(aLanguageTag);
2205 assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2206 const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList();
2207 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2208 return true;
2209 }
2210
2211 // If it still could not be found here, then keep on searching
2212 // the primary language for example EN
2213 aLanguageTag.reset(aLanguageTag.getLanguage());
2214 LanguageType nTmpKey = aLanguageTag.getLanguageType(false);
2215 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED &&
2216 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() ||
2217 CreateLanguageFile(aLanguageTag, false)))
2218 {
2219 //the language is available - so bring it on
2220 const auto iter = m_aLangTable.find(aLanguageTag);
2221 assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2222 const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList();
2223 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2224 return true;
2225 }
2226
2227 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() ||
2228 CreateLanguageFile(aLanguageTag, false))
2229 {
2230 //the language is available - so bring it on
2231 const auto iter = m_aLangTable.find(aLanguageTag);
2232 assert(iter != m_aLangTable.end() && "CreateLanguageFile can't fail");
2233 const SvStringsISortDtor* pList = iter->second.GetCplSttExceptList();
2234 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() )
2235 return true;
2236 }
2237 return false;
2238 }
2239
GetAutoCorrFileName(const LanguageTag & rLanguageTag,bool bNewFile,bool bTst,bool bUnlocalized) const2240 OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag,
2241 bool bNewFile, bool bTst, bool bUnlocalized ) const
2242 {
2243 OUString sRet, sExt( rLanguageTag.getBcp47() );
2244 if (bUnlocalized)
2245 {
2246 // we don't want variant, so we'll take "fr" instead of "fr-CA" for example
2247 std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false);
2248 if (!vecFallBackStrings.empty())
2249 sExt = vecFallBackStrings[0];
2250 }
2251
2252 sExt = "_" + sExt + ".dat";
2253 if( bNewFile )
2254 sRet = sUserAutoCorrFile + sExt;
2255 else if( !bTst )
2256 sRet = sShareAutoCorrFile + sExt;
2257 else
2258 {
2259 // test first in the user directory - if not exist, then
2260 sRet = sUserAutoCorrFile + sExt;
2261 if( !FStatHelper::IsDocument( sRet ))
2262 sRet = sShareAutoCorrFile + sExt;
2263 }
2264 return sRet;
2265 }
2266
SvxAutoCorrectLanguageLists(SvxAutoCorrect & rParent,OUString aShareAutoCorrectFile,OUString aUserAutoCorrectFile)2267 SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists(
2268 SvxAutoCorrect& rParent,
2269 OUString aShareAutoCorrectFile,
2270 OUString aUserAutoCorrectFile)
2271 : sShareAutoCorrFile(std::move( aShareAutoCorrectFile )),
2272 sUserAutoCorrFile(std::move( aUserAutoCorrectFile )),
2273 aModifiedDate( Date::EMPTY ),
2274 aModifiedTime( tools::Time::EMPTY ),
2275 aLastCheckTime( tools::Time::EMPTY ),
2276 rAutoCorrect(rParent),
2277 nFlags(ACFlags::NONE)
2278 {
2279 }
2280
~SvxAutoCorrectLanguageLists()2281 SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists()
2282 {
2283 }
2284
IsFileChanged_Imp()2285 bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp()
2286 {
2287 // Access the file system only every 2 minutes to check the date stamp
2288 bool bRet = false;
2289
2290 tools::Time nMinTime( 0, 2 );
2291 tools::Time nAktTime( tools::Time::SYSTEM );
2292 if( aLastCheckTime <= nAktTime) // overflow?
2293 return false;
2294 nAktTime -= aLastCheckTime;
2295 if( nAktTime > nMinTime ) // min time past
2296 {
2297 Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY );
2298 if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2299 &aTstDate, &aTstTime ) &&
2300 ( aModifiedDate != aTstDate || aModifiedTime != aTstTime ))
2301 {
2302 bRet = true;
2303 // then remove all the lists fast!
2304 if( (ACFlags::CplSttLstLoad & nFlags) && pCplStt_ExcptLst )
2305 {
2306 pCplStt_ExcptLst.reset();
2307 }
2308 if( (ACFlags::WordStartLstLoad & nFlags) && pWordStart_ExcptLst )
2309 {
2310 pWordStart_ExcptLst.reset();
2311 }
2312 if( (ACFlags::ChgWordLstLoad & nFlags) && pAutocorr_List )
2313 {
2314 pAutocorr_List.reset();
2315 }
2316 nFlags &= ~ACFlags(ACFlags::CplSttLstLoad | ACFlags::WordStartLstLoad | ACFlags::ChgWordLstLoad );
2317 }
2318 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2319 }
2320 return bRet;
2321 }
2322
LoadXMLExceptList_Imp(std::unique_ptr<SvStringsISortDtor> & rpLst,const OUString & sStrmName,rtl::Reference<SotStorage> & rStg)2323 void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp(
2324 std::unique_ptr<SvStringsISortDtor>& rpLst,
2325 const OUString& sStrmName,
2326 rtl::Reference<SotStorage>& rStg)
2327 {
2328 if( rpLst )
2329 rpLst->clear();
2330 else
2331 rpLst.reset( new SvStringsISortDtor );
2332
2333 {
2334 if( rStg.is() && rStg->IsStream( sStrmName ) )
2335 {
2336 rtl::Reference<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2337 ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) );
2338 if( ERRCODE_NONE != xStrm->GetError())
2339 {
2340 xStrm.clear();
2341 rStg.clear();
2342 RemoveStream_Imp( sStrmName );
2343 }
2344 else
2345 {
2346 uno::Reference< uno::XComponentContext > xContext =
2347 comphelper::getProcessComponentContext();
2348
2349 xml::sax::InputSource aParserInput;
2350 aParserInput.sSystemId = sStrmName;
2351
2352 xStrm->Seek( 0 );
2353 xStrm->SetBufferSize( 8 * 1024 );
2354 aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm );
2355
2356 // get filter
2357 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLExceptionListImport ( xContext, *rpLst );
2358
2359 // connect parser and filter
2360 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create( xContext );
2361 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2362 xParser->setFastDocumentHandler( xFilter );
2363 xParser->registerNamespace( u"http://openoffice.org/2001/block-list"_ustr, SvXMLAutoCorrectToken::NAMESPACE );
2364 xParser->setTokenHandler( xTokenHandler );
2365
2366 // parse
2367 try
2368 {
2369 xParser->parseStream( aParserInput );
2370 }
2371 catch( const xml::sax::SAXParseException& )
2372 {
2373 // re throw ?
2374 }
2375 catch( const xml::sax::SAXException& )
2376 {
2377 // re throw ?
2378 }
2379 catch( const io::IOException& )
2380 {
2381 // re throw ?
2382 }
2383 }
2384 }
2385
2386 // Set time stamp
2387 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2388 &aModifiedDate, &aModifiedTime );
2389 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2390 }
2391
2392 }
2393
SaveExceptList_Imp(const SvStringsISortDtor & rLst,const OUString & sStrmName,rtl::Reference<SotStorage> const & rStg,bool bConvert)2394 void SvxAutoCorrectLanguageLists::SaveExceptList_Imp(
2395 const SvStringsISortDtor& rLst,
2396 const OUString& sStrmName,
2397 rtl::Reference<SotStorage> const &rStg,
2398 bool bConvert )
2399 {
2400 if( !rStg.is() )
2401 return;
2402
2403 if( rLst.empty() )
2404 {
2405 rStg->Remove( sStrmName );
2406 rStg->Commit();
2407 }
2408 else
2409 {
2410 rtl::Reference<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName,
2411 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2412 if( xStrm.is() )
2413 {
2414 xStrm->SetSize( 0 );
2415 xStrm->SetBufferSize( 8192 );
2416 xStrm->SetProperty( u"MediaType"_ustr, Any(u"text/xml"_ustr) );
2417
2418
2419 uno::Reference< uno::XComponentContext > xContext =
2420 comphelper::getProcessComponentContext();
2421
2422 uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
2423 uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm );
2424 xWriter->setOutputStream(xOut);
2425
2426 uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW);
2427 rtl::Reference< SvXMLExceptionListExport > xExp( new SvXMLExceptionListExport( xContext, rLst, sStrmName, xHandler ) );
2428
2429 xExp->exportDoc( XML_BLOCK_LIST );
2430
2431 xStrm->Commit();
2432 if( xStrm->GetError() == ERRCODE_NONE )
2433 {
2434 xStrm.clear();
2435 if (!bConvert)
2436 {
2437 rStg->Commit();
2438 if( ERRCODE_NONE != rStg->GetError() )
2439 {
2440 rStg->Remove( sStrmName );
2441 rStg->Commit();
2442 }
2443 }
2444 }
2445 }
2446 }
2447 }
2448
LoadAutocorrWordList()2449 SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList()
2450 {
2451 if( pAutocorr_List )
2452 pAutocorr_List->DeleteAndDestroyAll();
2453 else
2454 pAutocorr_List.reset( new SvxAutocorrWordList() );
2455
2456 try
2457 {
2458 uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ );
2459 uno::Reference < io::XStream > xStrm = xStg->openStreamElement( pXMLImplAutocorr_ListStr, embed::ElementModes::READ );
2460 uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext();
2461
2462 xml::sax::InputSource aParserInput;
2463 aParserInput.sSystemId = pXMLImplAutocorr_ListStr;
2464 aParserInput.aInputStream = xStrm->getInputStream();
2465
2466 // get parser
2467 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext);
2468 SAL_INFO("editeng", "AutoCorrect Import" );
2469 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List.get(), rAutoCorrect, xStg );
2470 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler;
2471
2472 // connect parser and filter
2473 xParser->setFastDocumentHandler( xFilter );
2474 xParser->registerNamespace( u"http://openoffice.org/2001/block-list"_ustr, SvXMLAutoCorrectToken::NAMESPACE );
2475 xParser->setTokenHandler(xTokenHandler);
2476
2477 // parse
2478 xParser->parseStream( aParserInput );
2479 }
2480 catch ( const uno::Exception& )
2481 {
2482 TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile);
2483 }
2484
2485 // Set time stamp
2486 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile,
2487 &aModifiedDate, &aModifiedTime );
2488 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2489
2490 return pAutocorr_List.get();
2491 }
2492
GetAutocorrWordList()2493 const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList()
2494 {
2495 if( !( ACFlags::ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() )
2496 {
2497 LoadAutocorrWordList();
2498 if( !pAutocorr_List )
2499 {
2500 OSL_ENSURE( false, "No valid list" );
2501 pAutocorr_List.reset( new SvxAutocorrWordList() );
2502 }
2503 nFlags |= ACFlags::ChgWordLstLoad;
2504 }
2505 return pAutocorr_List.get();
2506 }
2507
GetCplSttExceptList()2508 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList()
2509 {
2510 if( !( ACFlags::CplSttLstLoad & nFlags ) || IsFileChanged_Imp() )
2511 {
2512 LoadCplSttExceptList();
2513 if( !pCplStt_ExcptLst )
2514 {
2515 OSL_ENSURE( false, "No valid list" );
2516 pCplStt_ExcptLst.reset( new SvStringsISortDtor );
2517 }
2518 nFlags |= ACFlags::CplSttLstLoad;
2519 }
2520 return pCplStt_ExcptLst.get();
2521 }
2522
AddToCplSttExceptList(const OUString & rNew)2523 bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew)
2524 {
2525 bool bRet = false;
2526 if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second )
2527 {
2528 MakeUserStorage_Impl();
2529 rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);
2530
2531 SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2532
2533 xStg = nullptr;
2534 // Set time stamp
2535 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2536 &aModifiedDate, &aModifiedTime );
2537 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2538 bRet = true;
2539 }
2540 return bRet;
2541 }
2542
AddToWordStartExceptList(const OUString & rNew)2543 bool SvxAutoCorrectLanguageLists::AddToWordStartExceptList(const OUString& rNew)
2544 {
2545 bool bRet = false;
2546 if( !rNew.isEmpty() && GetWordStartExceptList()->insert( rNew ).second )
2547 {
2548 MakeUserStorage_Impl();
2549 rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);
2550
2551 SaveExceptList_Imp( *pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );
2552
2553 xStg = nullptr;
2554 // Set time stamp
2555 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2556 &aModifiedDate, &aModifiedTime );
2557 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2558 bRet = true;
2559 }
2560 return bRet;
2561 }
2562
LoadCplSttExceptList()2563 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList()
2564 {
2565 try
2566 {
2567 rtl::Reference<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2568 if( xStg.is() && xStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
2569 LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2570 }
2571 catch (const css::ucb::ContentCreationException&)
2572 {
2573 }
2574 return pCplStt_ExcptLst.get();
2575 }
2576
SaveCplSttExceptList()2577 void SvxAutoCorrectLanguageLists::SaveCplSttExceptList()
2578 {
2579 MakeUserStorage_Impl();
2580 rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);
2581
2582 SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg );
2583
2584 xStg = nullptr;
2585
2586 // Set time stamp
2587 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2588 &aModifiedDate, &aModifiedTime );
2589 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2590 }
2591
LoadWordStartExceptList()2592 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWordStartExceptList()
2593 {
2594 try
2595 {
2596 rtl::Reference<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE );
2597 if( xStg.is() && xStg->IsContained( pXMLImplWordStart_ExcptLstStr ) )
2598 LoadXMLExceptList_Imp( pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );
2599 }
2600 catch (const css::ucb::ContentCreationException &)
2601 {
2602 TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWordStartExceptList");
2603 }
2604 return pWordStart_ExcptLst.get();
2605 }
2606
SaveWordStartExceptList()2607 void SvxAutoCorrectLanguageLists::SaveWordStartExceptList()
2608 {
2609 MakeUserStorage_Impl();
2610 rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);
2611
2612 SaveExceptList_Imp( *pWordStart_ExcptLst, pXMLImplWordStart_ExcptLstStr, xStg );
2613
2614 xStg = nullptr;
2615 // Set time stamp
2616 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile,
2617 &aModifiedDate, &aModifiedTime );
2618 aLastCheckTime = tools::Time( tools::Time::SYSTEM );
2619 }
2620
GetWordStartExceptList()2621 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWordStartExceptList()
2622 {
2623 if( !( ACFlags::WordStartLstLoad & nFlags ) || IsFileChanged_Imp() )
2624 {
2625 LoadWordStartExceptList();
2626 if( !pWordStart_ExcptLst )
2627 {
2628 OSL_ENSURE( false, "No valid list" );
2629 pWordStart_ExcptLst.reset( new SvStringsISortDtor );
2630 }
2631 nFlags |= ACFlags::WordStartLstLoad;
2632 }
2633 return pWordStart_ExcptLst.get();
2634 }
2635
RemoveStream_Imp(const OUString & rName)2636 void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName )
2637 {
2638 if( sShareAutoCorrFile != sUserAutoCorrFile )
2639 {
2640 rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);
2641 if( xStg.is() && ERRCODE_NONE == xStg->GetError() &&
2642 xStg->IsStream( rName ) )
2643 {
2644 xStg->Remove( rName );
2645 xStg->Commit();
2646
2647 xStg = nullptr;
2648 }
2649 }
2650 }
2651
MakeUserStorage_Impl()2652 void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl()
2653 {
2654 // The conversion needs to happen if the file is already in the user
2655 // directory and is in the old format. Additionally it needs to
2656 // happen when the file is being copied from share to user.
2657
2658 bool bError = false, bConvert = false, bCopy = false;
2659 INetURLObject aDest;
2660 INetURLObject aSource;
2661
2662 if (sUserAutoCorrFile != sShareAutoCorrFile )
2663 {
2664 aSource = INetURLObject ( sShareAutoCorrFile );
2665 aDest = INetURLObject ( sUserAutoCorrFile );
2666 if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) )
2667 {
2668 aDest.SetExtension ( u"bak" );
2669 bConvert = true;
2670 }
2671 bCopy = true;
2672 }
2673 else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) )
2674 {
2675 aSource = INetURLObject ( sUserAutoCorrFile );
2676 aDest = INetURLObject ( sUserAutoCorrFile );
2677 aDest.SetExtension ( u"bak" );
2678 bCopy = bConvert = true;
2679 }
2680 if (bCopy)
2681 {
2682 try
2683 {
2684 OUString sMain(aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ));
2685 sal_Int32 nSlashPos = sMain.lastIndexOf('/');
2686 sMain = sMain.copy(0, nSlashPos);
2687 ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2688 TransferInfo aInfo;
2689 aInfo.NameClash = NameClash::OVERWRITE;
2690 aInfo.NewTitle = aDest.GetLastName();
2691 aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DecodeMechanism::ToIUri );
2692 aInfo.MoveData = false;
2693 aNewContent.executeCommand( u"transfer"_ustr, Any(aInfo));
2694 }
2695 catch (...)
2696 {
2697 bError = true;
2698 }
2699 }
2700 if (bConvert && !bError)
2701 {
2702 rtl::Reference<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), StreamMode::READ );
2703 rtl::Reference<SotStorage> xDstStg = new SotStorage(sUserAutoCorrFile, StreamMode::WRITE);
2704
2705 if( xSrcStg.is() && xDstStg.is() )
2706 {
2707 std::unique_ptr<SvStringsISortDtor> pTmpWordList;
2708
2709 if (xSrcStg->IsContained( pXMLImplWordStart_ExcptLstStr ) )
2710 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWordStart_ExcptLstStr, xSrcStg );
2711
2712 if (pTmpWordList)
2713 {
2714 SaveExceptList_Imp( *pTmpWordList, pXMLImplWordStart_ExcptLstStr, xDstStg, true );
2715 pTmpWordList.reset();
2716 }
2717
2718
2719 if (xSrcStg->IsContained( pXMLImplCplStt_ExcptLstStr ) )
2720 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg );
2721
2722 if (pTmpWordList)
2723 {
2724 SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true );
2725 pTmpWordList->clear();
2726 }
2727
2728 GetAutocorrWordList();
2729 MakeBlocklist_Imp( *xDstStg );
2730 sShareAutoCorrFile = sUserAutoCorrFile;
2731 xDstStg = nullptr;
2732 try
2733 {
2734 ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() );
2735 aContent.executeCommand ( u"delete"_ustr, Any ( true ) );
2736 }
2737 catch (...)
2738 {
2739 }
2740 }
2741 }
2742 else if( bCopy && !bError )
2743 sShareAutoCorrFile = sUserAutoCorrFile;
2744 }
2745
MakeBlocklist_Imp(SotStorage & rStg)2746 bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg )
2747 {
2748 bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty();
2749 if( !bRemove )
2750 {
2751 rtl::Reference<SotStorageStream> refList = rStg.OpenSotStream( pXMLImplAutocorr_ListStr,
2752 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) );
2753 if( refList.is() )
2754 {
2755 refList->SetSize( 0 );
2756 refList->SetBufferSize( 8192 );
2757 refList->SetProperty( u"MediaType"_ustr, Any(u"text/xml"_ustr) );
2758
2759 uno::Reference< uno::XComponentContext > xContext =
2760 comphelper::getProcessComponentContext();
2761
2762 uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext);
2763 uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList );
2764 xWriter->setOutputStream(xOut);
2765
2766 rtl::Reference< SvXMLAutoCorrectExport > xExp( new SvXMLAutoCorrectExport( xContext, pAutocorr_List.get(), pXMLImplAutocorr_ListStr, xWriter ) );
2767
2768 xExp->exportDoc( XML_BLOCK_LIST );
2769
2770 refList->Commit();
2771 bRet = ERRCODE_NONE == refList->GetError();
2772 if( bRet )
2773 {
2774 refList.clear();
2775 rStg.Commit();
2776 if( ERRCODE_NONE != rStg.GetError() )
2777 {
2778 bRemove = true;
2779 bRet = false;
2780 }
2781 }
2782 }
2783 else
2784 bRet = false;
2785 }
2786
2787 if( bRemove )
2788 {
2789 rStg.Remove( pXMLImplAutocorr_ListStr );
2790 rStg.Commit();
2791 }
2792
2793 return bRet;
2794 }
2795
MakeCombinedChanges(std::vector<SvxAutocorrWord> & aNewEntries,std::vector<SvxAutocorrWord> & aDeleteEntries)2796 bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries )
2797 {
2798 // First get the current list!
2799 GetAutocorrWordList();
2800
2801 MakeUserStorage_Impl();
2802 rtl::Reference<SotStorage> xStorage = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);
2803
2804 bool bRet = xStorage.is() && ERRCODE_NONE == xStorage->GetError();
2805
2806 if( bRet )
2807 {
2808 for (SvxAutocorrWord & aWordToDelete : aDeleteEntries)
2809 {
2810 std::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete );
2811 if( xFoundEntry )
2812 {
2813 if( !xFoundEntry->IsTextOnly() )
2814 {
2815 OUString aName( aWordToDelete.GetShort() );
2816 if (xStorage->IsOLEStorage())
2817 aName = EncryptBlockName_Imp(aName);
2818 else
2819 GeneratePackageName ( aWordToDelete.GetShort(), aName );
2820
2821 if( xStorage->IsContained( aName ) )
2822 {
2823 xStorage->Remove( aName );
2824 bRet = xStorage->Commit();
2825 }
2826 }
2827 }
2828 }
2829
2830 for (const SvxAutocorrWord & aNewEntrie : aNewEntries)
2831 {
2832 SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true );
2833 std::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd );
2834 if( xRemoved )
2835 {
2836 if( !xRemoved->IsTextOnly() )
2837 {
2838 // Still have to remove the Storage
2839 OUString sStorageName( aWordToAdd.GetShort() );
2840 if (xStorage->IsOLEStorage())
2841 sStorageName = EncryptBlockName_Imp(sStorageName);
2842 else
2843 GeneratePackageName ( aWordToAdd.GetShort(), sStorageName);
2844
2845 if( xStorage->IsContained( sStorageName ) )
2846 xStorage->Remove( sStorageName );
2847 }
2848 }
2849 bRet = pAutocorr_List->Insert( std::move(aWordToAdd) );
2850
2851 if ( !bRet )
2852 {
2853 break;
2854 }
2855 }
2856
2857 if ( bRet )
2858 {
2859 bRet = MakeBlocklist_Imp( *xStorage );
2860 }
2861 }
2862 return bRet;
2863 }
2864
PutText(const OUString & rShort,const OUString & rLong)2865 bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong )
2866 {
2867 // First get the current list!
2868 GetAutocorrWordList();
2869
2870 MakeUserStorage_Impl();
2871 rtl::Reference<SotStorage> xStg = new SotStorage(sUserAutoCorrFile, StreamMode::READWRITE);
2872
2873 bool bRet = xStg.is() && ERRCODE_NONE == xStg->GetError();
2874
2875 // Update the word list
2876 if( bRet )
2877 {
2878 SvxAutocorrWord aNew(rShort, rLong, true );
2879 std::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew );
2880 if( xRemove )
2881 {
2882 if( !xRemove->IsTextOnly() )
2883 {
2884 // Still have to remove the Storage
2885 OUString sStgNm( rShort );
2886 if (xStg->IsOLEStorage())
2887 sStgNm = EncryptBlockName_Imp(sStgNm);
2888 else
2889 GeneratePackageName ( rShort, sStgNm);
2890
2891 if( xStg->IsContained( sStgNm ) )
2892 xStg->Remove( sStgNm );
2893 }
2894 }
2895
2896 if( pAutocorr_List->Insert( std::move(aNew) ) )
2897 {
2898 bRet = MakeBlocklist_Imp( *xStg );
2899 xStg = nullptr;
2900 }
2901 else
2902 {
2903 bRet = false;
2904 }
2905 }
2906 return bRet;
2907 }
2908
PutText(const OUString & rShort,SfxObjectShell & rShell)2909 void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort,
2910 SfxObjectShell& rShell )
2911 {
2912 // First get the current list!
2913 GetAutocorrWordList();
2914
2915 MakeUserStorage_Impl();
2916
2917 try
2918 {
2919 uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE );
2920 OUString sLong;
2921 bool bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong );
2922 xStg = nullptr;
2923
2924 // Update the word list
2925 if( bRet )
2926 {
2927 if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) )
2928 {
2929 rtl::Reference<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE );
2930 MakeBlocklist_Imp( *xStor );
2931 }
2932 }
2933 }
2934 catch ( const uno::Exception& )
2935 {
2936 }
2937 }
2938
2939 // Keep the list sorted ...
2940 struct SvxAutocorrWordList::CompareSvxAutocorrWordList
2941 {
operator ()SvxAutocorrWordList::CompareSvxAutocorrWordList2942 bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const
2943 {
2944 CollatorWrapper& rCmp = ::GetCollatorWrapper();
2945 return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0;
2946 }
2947 };
2948
2949 namespace {
2950
2951 typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType;
2952
2953 }
2954
2955 struct SvxAutocorrWordList::Impl
2956 {
2957
2958 // only one of these contains the data
2959 // maSortedVector is manually sorted so we can optimise data movement
2960 mutable AutocorrWordSetType maSortedVector;
2961 mutable AutocorrWordHashType maHash; // key is 'Short'
2962
DeleteAndDestroyAllSvxAutocorrWordList::Impl2963 void DeleteAndDestroyAll()
2964 {
2965 maHash.clear();
2966 maSortedVector.clear();
2967 }
2968 };
2969
SvxAutocorrWordList()2970 SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {}
2971
~SvxAutocorrWordList()2972 SvxAutocorrWordList::~SvxAutocorrWordList()
2973 {
2974 }
2975
DeleteAndDestroyAll()2976 void SvxAutocorrWordList::DeleteAndDestroyAll()
2977 {
2978 mpImpl->DeleteAndDestroyAll();
2979 }
2980
2981 struct SvxAutocorrWordList::Iterator::Impl {
2982 typedef SvxAutocorrWordList::AutocorrWordSetType::const_iterator VecIterType;
2983 typedef AutocorrWordHashType::const_iterator HashIterType;
2984
2985 HashIterType mHashIter, mHashEnd;
2986 VecIterType mSortedVectorIter, mSortedVectorEnd;
2987
ImplSvxAutocorrWordList::Iterator::Impl2988 Impl(const HashIterType& hashIter, const HashIterType& hashEnd,
2989 const VecIterType& vecIter, const VecIterType& vecEnd)
2990 : mHashIter(hashIter), mHashEnd(hashEnd),
2991 mSortedVectorIter(vecIter), mSortedVectorEnd(vecEnd) {}
2992
StepSvxAutocorrWordList::Iterator::Impl2993 bool Step() {
2994 // Iterate hash table, followed by sorted vector
2995 if (mHashIter != mHashEnd) {
2996 return ++mHashIter != mHashEnd
2997 || mSortedVectorIter != mSortedVectorEnd;
2998 }
2999 return ++mSortedVectorIter != mSortedVectorEnd;
3000 }
3001
operator *SvxAutocorrWordList::Iterator::Impl3002 const SvxAutocorrWord& operator*() {
3003 return (mHashIter == mHashEnd) ? *mSortedVectorIter : mHashIter->second;
3004 }
operator ->SvxAutocorrWordList::Iterator::Impl3005 const SvxAutocorrWord* operator->() {
3006 return (mHashIter == mHashEnd) ? &*mSortedVectorIter : &mHashIter->second;
3007 }
3008 };
3009
Iterator(std::unique_ptr<SvxAutocorrWordList::Iterator::Impl> pImpl)3010 SvxAutocorrWordList::Iterator::Iterator(
3011 std::unique_ptr<SvxAutocorrWordList::Iterator::Impl> pImpl
3012 ) : mpImpl(std::move(pImpl))
3013 {
3014 }
3015
Iterator(const SvxAutocorrWordList::Iterator & it)3016 SvxAutocorrWordList::Iterator::Iterator(
3017 const SvxAutocorrWordList::Iterator& it
3018 ) : mpImpl(new Impl(*(it.mpImpl)))
3019 {
3020 }
3021
~Iterator()3022 SvxAutocorrWordList::Iterator::~Iterator()
3023 {
3024 }
3025
Step()3026 bool SvxAutocorrWordList::Iterator::Step()
3027 {
3028 return mpImpl->Step();
3029 }
3030
operator *() const3031 const SvxAutocorrWord& SvxAutocorrWordList::Iterator::operator*() const
3032 {
3033 return **mpImpl;
3034 }
3035
operator ->() const3036 const SvxAutocorrWord* SvxAutocorrWordList::Iterator::operator->() const
3037 {
3038 return mpImpl->operator->();
3039 }
3040
ContainsPattern(const OUString & aShort) const3041 bool SvxAutocorrWordList::ContainsPattern(const OUString& aShort) const
3042 {
3043 // check hash table first
3044 if (mpImpl->maHash.contains(aShort)) {
3045 return true;
3046 }
3047
3048 // then do binary search on sorted vector
3049 CollatorWrapper& rCmp = ::GetCollatorWrapper();
3050 auto it = std::lower_bound(mpImpl->maSortedVector.begin(),
3051 mpImpl->maSortedVector.end(),
3052 aShort,
3053 [&](const SvxAutocorrWord& elm,
3054 const OUString& val) {
3055 return rCmp.compareString(elm.GetShort(),
3056 val) < 0;
3057 } );
3058 if (it != mpImpl->maSortedVector.end()
3059 && rCmp.compareString(aShort, it->GetShort()) == 0)
3060 {
3061 return true;
3062 }
3063
3064 return false;
3065 }
3066
3067 // returns true if inserted
Insert(SvxAutocorrWord aWord) const3068 const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const
3069 {
3070 if ( mpImpl->maSortedVector.empty() ) // use the hash
3071 {
3072 OUString aShort = aWord.GetShort();
3073 auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) );
3074 if (inserted)
3075 return &(it->second);
3076 return nullptr;
3077 }
3078 else
3079 {
3080 auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList());
3081 CollatorWrapper& rCmp = ::GetCollatorWrapper();
3082 if (it == mpImpl->maSortedVector.end() || rCmp.compareString( aWord.GetShort(), it->GetShort() ) != 0)
3083 {
3084 it = mpImpl->maSortedVector.insert(it, std::move(aWord));
3085 return &*it;
3086 }
3087 return nullptr;
3088 }
3089 }
3090
LoadEntry(const OUString & sWrong,const OUString & sRight,bool bOnlyTxt)3091 void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt)
3092 {
3093 (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt ));
3094 }
3095
empty() const3096 bool SvxAutocorrWordList::empty() const
3097 {
3098 return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty();
3099 }
3100
FindAndRemove(const SvxAutocorrWord * pWord)3101 std::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord *pWord)
3102 {
3103
3104 if ( mpImpl->maSortedVector.empty() ) // use the hash
3105 {
3106 AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() );
3107 if( it != mpImpl->maHash.end() )
3108 {
3109 SvxAutocorrWord pMatch = std::move(it->second);
3110 mpImpl->maHash.erase (it);
3111 return pMatch;
3112 }
3113 }
3114 else
3115 {
3116 auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList());
3117 if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it))
3118 {
3119 SvxAutocorrWord pMatch = std::move(*it);
3120 mpImpl->maSortedVector.erase (it);
3121 return pMatch;
3122 }
3123 }
3124 return std::optional<SvxAutocorrWord>();
3125 }
3126
3127 // return the sorted contents - defer sorting until we have to.
getSortedContent() const3128 const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const
3129 {
3130 // convert from hash to set permanently
3131 if ( mpImpl->maSortedVector.empty() )
3132 {
3133 std::vector<SvxAutocorrWord> tmp;
3134 tmp.reserve(mpImpl->maHash.size());
3135 for (auto & rPair : mpImpl->maHash)
3136 tmp.emplace_back(std::move(rPair.second));
3137 mpImpl->maHash.clear();
3138 // sort twice - this gets the list into mostly-sorted order, which
3139 // reduces the number of times we need to invoke the expensive ICU collate fn.
3140 std::sort(tmp.begin(), tmp.end(),
3141 [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs )
3142 {
3143 return lhs.GetShort() < rhs.GetShort();
3144 });
3145 // This beast has some O(N log(N)) in a terribly slow ICU collate fn.
3146 // stable_sort is twice as fast as sort in this situation because it does
3147 // fewer comparison operations.
3148 std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList());
3149 mpImpl->maSortedVector = std::move(tmp);
3150 }
3151 return mpImpl->maSortedVector;
3152 }
3153
3154 std::optional<SvxAutocorrWord>
WordMatches(const SvxAutocorrWord * pFnd,std::u16string_view rTxt,sal_Int32 & rStt,sal_Int32 nEndPos) const3155 SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd,
3156 std::u16string_view rTxt,
3157 sal_Int32 &rStt,
3158 sal_Int32 nEndPos) const
3159 {
3160 const OUString& rChk = pFnd->GetShort();
3161
3162 sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern?
3163 sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern?
3164 assert(nEndPos >= 0);
3165 size_t nSttWdPos = nEndPos;
3166
3167 // direct replacement of keywords surrounded by colons (for example, ":name:")
3168 bool bColonNameColon = static_cast<sal_Int32>(rTxt.size()) > nEndPos &&
3169 rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":");
3170 if ( nEndPos + (bColonNameColon ? 1 : 0) < rChk.getLength() - left_wildcard - right_wildcard )
3171 {
3172 return std::nullopt;
3173 }
3174
3175 bool bWasWordDelim = false;
3176 sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard;
3177 if (bColonNameColon)
3178 nCalcStt++;
3179 if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon ||
3180 ( nCalcStt < rStt &&
3181 IsWordDelim( rTxt[ nCalcStt - 1 ] ))) )
3182 {
3183 TransliterationWrapper& rCmp = GetIgnoreTranslWrapper();
3184 OUString sWord( rTxt.substr(nCalcStt, rChk.getLength() - left_wildcard) );
3185 if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) ))
3186 {
3187 rStt = nCalcStt;
3188 if (!left_wildcard)
3189 {
3190 // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14
3191 if (static_cast<sal_Int32>(rTxt.size()) > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1)
3192 {
3193 return std::nullopt;
3194 }
3195 return *pFnd;
3196 }
3197 // get the first word delimiter position before the matching ".*word" pattern
3198 while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ])))
3199 ;
3200 if (bWasWordDelim) rStt++;
3201
3202 // don't let wildcard pattern override non-wildcard one
3203 OUString aShort(rTxt.substr(rStt, nEndPos - rStt));
3204 if (ContainsPattern(aShort)) {
3205 return std::nullopt;
3206 }
3207
3208 OUString left_pattern( rTxt.substr(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard) );
3209 // avoid double spaces before simple "word" replacement
3210 left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().subView(1) : pFnd->GetLong();
3211 return SvxAutocorrWord(aShort, left_pattern);
3212 }
3213 } else
3214 // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support
3215 if ( right_wildcard )
3216 {
3217
3218 OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) );
3219 // Get the last word delimiter position
3220 bool not_suffix;
3221
3222 while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ])))
3223 ;
3224 // search the first occurrence (with a left word delimitation, if needed)
3225 size_t nFndPos = rStt - 1;
3226 do {
3227 nFndPos = rTxt.find( sTmp, nFndPos + 1);
3228 if (nFndPos == std::u16string_view::npos)
3229 break;
3230 not_suffix = bWasWordDelim && (nSttWdPos >= (nFndPos + sTmp.getLength()));
3231 } while ( (!left_wildcard && nFndPos && !IsWordDelim( rTxt[ nFndPos - 1 ])) || not_suffix );
3232
3233 if ( nFndPos != std::u16string_view::npos )
3234 {
3235 sal_Int32 extra_repl = static_cast<sal_Int32>(nFndPos) + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:"
3236
3237 if ( left_wildcard )
3238 {
3239 // get the first word delimiter position before the matching ".*word.*" pattern
3240 while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ])))
3241 ;
3242 if (bWasWordDelim) nFndPos++;
3243 }
3244 if (nEndPos + extra_repl <= static_cast<sal_Int32>(nFndPos))
3245 {
3246 return std::nullopt;
3247 }
3248 // return matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations"
3249 OUString aShort( rTxt.substr(nFndPos, nEndPos - nFndPos + extra_repl) );
3250 // don't let wildcard pattern override non-wildcard one
3251 if (ContainsPattern(aShort)) {
3252 return std::nullopt;
3253 }
3254
3255 OUString aLong;
3256 rStt = nFndPos;
3257 if ( !left_wildcard )
3258 {
3259 sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength();
3260 aLong = pFnd->GetLong() + (siz > 0 ? rTxt.substr(nFndPos + sTmp.getLength(), siz) : u"");
3261 } else {
3262 OUStringBuffer buf;
3263 do {
3264 nSttWdPos = rTxt.find( sTmp, nFndPos);
3265 if (nSttWdPos != std::u16string_view::npos)
3266 {
3267 sal_Int32 nTmp(nFndPos);
3268 while (nTmp < static_cast<sal_Int32>(nSttWdPos) && !IsWordDelim(rTxt[nTmp]))
3269 {
3270 nTmp++;
3271 }
3272 if (nTmp < static_cast<sal_Int32>(nSttWdPos)) {
3273 break; // word delimiter found
3274 }
3275 buf.append(rTxt.substr(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong());
3276 nFndPos = nSttWdPos + sTmp.getLength();
3277 }
3278 } while (nSttWdPos != std::u16string_view::npos);
3279 if (static_cast<sal_Int32>(nEndPos - nFndPos) > extra_repl) {
3280 buf.append(rTxt.substr(nFndPos, nEndPos - nFndPos));
3281 }
3282 aLong = buf.makeStringAndClear();
3283 }
3284 if ( (static_cast<sal_Int32>(rTxt.size()) > nEndPos && IsWordDelim(rTxt[nEndPos])) || static_cast<sal_Int32>(rTxt.size()) == nEndPos )
3285 {
3286 return SvxAutocorrWord(aShort, aLong);
3287 }
3288 }
3289 }
3290 return std::nullopt;
3291 }
3292
3293 std::optional<SvxAutocorrWordList::WordSearchStatus>
SearchWordsInList(std::u16string_view rTxt,sal_Int32 & rStt,sal_Int32 nEndPos) const3294 SvxAutocorrWordList::SearchWordsInList(std::u16string_view rTxt,
3295 sal_Int32& rStt,
3296 sal_Int32 nEndPos) const
3297 {
3298 for (auto it = mpImpl->maHash.begin(); it != mpImpl->maHash.end(); ++it)
3299 {
3300 if(auto pTmp = WordMatches(&it->second, rTxt, rStt, nEndPos))
3301 {
3302 return WordSearchStatus(
3303 *pTmp, this,
3304 Iterator(std::make_unique<Iterator::Impl>(
3305 it, mpImpl->maHash.end(),
3306 mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end()
3307 ))
3308 );
3309 }
3310 }
3311
3312 for (auto it = mpImpl->maSortedVector.begin();
3313 it != mpImpl->maSortedVector.end(); ++it)
3314 {
3315 if(auto pTmp = WordMatches(&*it, rTxt, rStt, nEndPos))
3316 {
3317 return WordSearchStatus(
3318 *pTmp, this,
3319 Iterator(std::make_unique<Iterator::Impl>(
3320 mpImpl->maHash.end(), mpImpl->maHash.end(),
3321 it, mpImpl->maSortedVector.end()
3322 ))
3323 );
3324 }
3325 }
3326
3327 return std::nullopt;
3328 }
3329
3330 bool
SearchWordsNext(std::u16string_view rTxt,sal_Int32 & rStt,sal_Int32 nEndPos,SvxAutocorrWordList::WordSearchStatus & rStatus) const3331 SvxAutocorrWordList::SearchWordsNext(std::u16string_view rTxt,
3332 sal_Int32& rStt,
3333 sal_Int32 nEndPos,
3334 SvxAutocorrWordList::WordSearchStatus& rStatus) const
3335 {
3336 while(rStatus.StepIter())
3337 {
3338 if(auto pTmp = WordMatches(rStatus.GetWordAtIter(),
3339 rTxt, rStt, nEndPos))
3340 {
3341 rStatus.mFnd = *pTmp;
3342 return true;
3343 }
3344 }
3345
3346 return false;
3347 }
3348
3349 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
3350