1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 2 /* 3 * This file is part of the LibreOffice project. 4 * 5 * This Source Code Form is subject to the terms of the Mozilla Public 6 * License, v. 2.0. If a copy of the MPL was not distributed with this 7 * file, You can obtain one at http://mozilla.org/MPL/2.0/. 8 * 9 * This file incorporates work covered by the following license notice: 10 * 11 * Licensed to the Apache Software Foundation (ASF) under one or more 12 * contributor license agreements. See the NOTICE file distributed 13 * with this work for additional information regarding copyright 14 * ownership. The ASF licenses this file to you under the Apache 15 * License, Version 2.0 (the "License"); you may not use this file 16 * except in compliance with the License. You may obtain a copy of 17 * the License at http://www.apache.org/licenses/LICENSE-2.0 . 18 */ 19 20 #include <memory> 21 #include <string_view> 22 #include <sal/config.h> 23 24 #include <com/sun/star/linguistic2/XSpellChecker1.hpp> 25 #include <com/sun/star/embed/XStorage.hpp> 26 #include <com/sun/star/io/IOException.hpp> 27 #include <com/sun/star/io/XStream.hpp> 28 #include <tools/urlobj.hxx> 29 #include <i18nlangtag/mslangid.hxx> 30 #include <i18nutil/transliteration.hxx> 31 #include <sal/log.hxx> 32 #include <osl/diagnose.h> 33 #include <vcl/svapp.hxx> 34 #include <vcl/settings.hxx> 35 #include <svl/fstathelper.hxx> 36 #include <svl/urihelper.hxx> 37 #include <unotools/charclass.hxx> 38 #include <com/sun/star/i18n/UnicodeType.hpp> 39 #include <unotools/collatorwrapper.hxx> 40 #include <com/sun/star/i18n/UnicodeScript.hpp> 41 #include <com/sun/star/i18n/OrdinalSuffix.hpp> 42 #include <unotools/localedatawrapper.hxx> 43 #include <unotools/transliterationwrapper.hxx> 44 #include <comphelper/processfactory.hxx> 45 #include <comphelper/storagehelper.hxx> 46 #include <comphelper/string.hxx> 47 #include <editeng/editids.hrc> 48 #include <sot/storage.hxx> 49 #include <editeng/udlnitem.hxx> 50 #include <editeng/wghtitem.hxx> 51 #include <editeng/postitem.hxx> 52 #include <editeng/crossedoutitem.hxx> 53 #include <editeng/escapementitem.hxx> 54 #include <editeng/svxacorr.hxx> 55 #include <editeng/unolingu.hxx> 56 #include <vcl/window.hxx> 57 #include <com/sun/star/xml/sax/InputSource.hpp> 58 #include <com/sun/star/xml/sax/FastParser.hpp> 59 #include <com/sun/star/xml/sax/Writer.hpp> 60 #include <com/sun/star/xml/sax/SAXParseException.hpp> 61 #include <unotools/streamwrap.hxx> 62 #include "SvXMLAutoCorrectImport.hxx" 63 #include "SvXMLAutoCorrectExport.hxx" 64 #include "SvXMLAutoCorrectTokenHandler.hxx" 65 #include <ucbhelper/content.hxx> 66 #include <com/sun/star/ucb/ContentCreationException.hpp> 67 #include <com/sun/star/ucb/XCommandEnvironment.hpp> 68 #include <com/sun/star/ucb/TransferInfo.hpp> 69 #include <com/sun/star/ucb/NameClash.hpp> 70 #include <tools/diagnose_ex.h> 71 #include <xmloff/xmltoken.hxx> 72 #include <unordered_map> 73 #include <rtl/character.hxx> 74 75 using namespace ::com::sun::star::ucb; 76 using namespace ::com::sun::star::uno; 77 using namespace ::com::sun::star::xml::sax; 78 using namespace ::com::sun::star; 79 using namespace ::xmloff::token; 80 using namespace ::utl; 81 82 namespace { 83 84 enum class Flags { 85 NONE = 0x00, 86 FullStop = 0x01, 87 ExclamationMark = 0x02, 88 QuestionMark = 0x04, 89 }; 90 91 } 92 93 namespace o3tl { 94 template<> struct typed_flags<Flags> : is_typed_flags<Flags, 0x07> {}; 95 } 96 const sal_Unicode cNonBreakingSpace = 0xA0; // UNICODE code for no break space 97 98 constexpr OUStringLiteral pXMLImplWrdStt_ExcptLstStr = u"WordExceptList.xml"; 99 constexpr OUStringLiteral pXMLImplCplStt_ExcptLstStr = u"SentenceExceptList.xml"; 100 constexpr OUStringLiteral pXMLImplAutocorr_ListStr = u"DocumentList.xml"; 101 102 const char 103 /* also at these beginnings - Brackets and all kinds of begin characters */ 104 sImplSttSkipChars[] = "\"\'([{\x83\x84\x89\x91\x92\x93\x94", 105 /* also at these ends - Brackets and all kinds of begin characters */ 106 sImplEndSkipChars[] = "\"\')]}\x83\x84\x89\x91\x92\x93\x94"; 107 108 static OUString EncryptBlockName_Imp(const OUString& rName); 109 110 static bool NonFieldWordDelim( const sal_Unicode c ) 111 { 112 return ' ' == c || '\t' == c || 0x0a == c || 113 cNonBreakingSpace == c || 0x2011 == c; 114 } 115 116 static bool IsWordDelim( const sal_Unicode c ) 117 { 118 return c == 0x1 || NonFieldWordDelim(c); 119 } 120 121 122 static bool IsLowerLetter( sal_Int32 nCharType ) 123 { 124 return CharClass::isLetterType( nCharType ) && 125 ( css::i18n::KCharacterType::LOWER & nCharType); 126 } 127 128 static bool IsUpperLetter( sal_Int32 nCharType ) 129 { 130 return CharClass::isLetterType( nCharType ) && 131 ( css::i18n::KCharacterType::UPPER & nCharType); 132 } 133 134 static bool lcl_IsUnsupportedUnicodeChar( CharClass const & rCC, const OUString& rTxt, 135 sal_Int32 nStt, sal_Int32 nEnd ) 136 { 137 for( ; nStt < nEnd; ++nStt ) 138 { 139 css::i18n::UnicodeScript nScript = rCC.getScript( rTxt, nStt ); 140 switch( nScript ) 141 { 142 case css::i18n::UnicodeScript_kCJKRadicalsSupplement: 143 case css::i18n::UnicodeScript_kHangulJamo: 144 case css::i18n::UnicodeScript_kCJKSymbolPunctuation: 145 case css::i18n::UnicodeScript_kHiragana: 146 case css::i18n::UnicodeScript_kKatakana: 147 case css::i18n::UnicodeScript_kHangulCompatibilityJamo: 148 case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth: 149 case css::i18n::UnicodeScript_kCJKCompatibility: 150 case css::i18n::UnicodeScript_k_CJKUnifiedIdeographsExtensionA: 151 case css::i18n::UnicodeScript_kCJKUnifiedIdeograph: 152 case css::i18n::UnicodeScript_kHangulSyllable: 153 case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph: 154 case css::i18n::UnicodeScript_kHalfwidthFullwidthForm: 155 return true; 156 default: ; //do nothing 157 } 158 } 159 return false; 160 } 161 162 static bool lcl_IsSymbolChar( CharClass const & rCC, const OUString& rTxt, 163 sal_Int32 nStt, sal_Int32 nEnd ) 164 { 165 for( ; nStt < nEnd; ++nStt ) 166 { 167 if( css::i18n::UnicodeType::PRIVATE_USE == rCC.getType( rTxt, nStt )) 168 return true; 169 } 170 return false; 171 } 172 173 static bool lcl_IsInAsciiArr( const char* pArr, const sal_Unicode c ) 174 { 175 // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks 176 if ( 0x2018 <= c && c <= 0x201F && (pArr == sImplSttSkipChars || pArr == sImplEndSkipChars) ) 177 return true; 178 179 bool bRet = false; 180 for( ; *pArr; ++pArr ) 181 if( *pArr == c ) 182 { 183 bRet = true; 184 break; 185 } 186 return bRet; 187 } 188 189 SvxAutoCorrDoc::~SvxAutoCorrDoc() 190 { 191 } 192 193 // Called by the functions: 194 // - FnCapitalStartWord 195 // - FnCapitalStartSentence 196 // after the exchange of characters. Then the words, if necessary, can be inserted 197 // into the exception list. 198 void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&, 199 sal_Unicode ) 200 { 201 } 202 203 LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32 ) const 204 { 205 return LANGUAGE_SYSTEM; 206 } 207 208 static const LanguageTag& GetAppLang() 209 { 210 return Application::GetSettings().GetLanguageTag(); 211 } 212 213 /// Never use an unresolved LANGUAGE_SYSTEM. 214 static LanguageType GetDocLanguage( const SvxAutoCorrDoc& rDoc, sal_Int32 nPos ) 215 { 216 LanguageType eLang = rDoc.GetLanguage( nPos ); 217 if (eLang == LANGUAGE_SYSTEM) 218 eLang = GetAppLang().getLanguageType(); // the current work locale 219 return eLang; 220 } 221 222 static LocaleDataWrapper& GetLocaleDataWrapper( LanguageType nLang ) 223 { 224 static std::unique_ptr<LocaleDataWrapper> xLclDtWrp; 225 LanguageTag aLcl( nLang ); 226 if (!xLclDtWrp || xLclDtWrp->getLoadedLanguageTag() != aLcl) 227 xLclDtWrp.reset(new LocaleDataWrapper(aLcl)); 228 return *xLclDtWrp; 229 } 230 static TransliterationWrapper& GetIgnoreTranslWrapper() 231 { 232 static int bIsInit = 0; 233 static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(), 234 TransliterationFlags::IGNORE_KANA | 235 TransliterationFlags::IGNORE_WIDTH ); 236 if( !bIsInit ) 237 { 238 aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() ); 239 bIsInit = 1; 240 } 241 return aWrp; 242 } 243 static CollatorWrapper& GetCollatorWrapper() 244 { 245 static CollatorWrapper aCollWrp = [&]() 246 { 247 CollatorWrapper tmp( ::comphelper::getProcessComponentContext() ); 248 tmp.loadDefaultCollator( GetAppLang().getLocale(), 0 ); 249 return tmp; 250 }(); 251 return aCollWrp; 252 } 253 254 bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar ) 255 { 256 return cChar == '\0' || cChar == '\t' || cChar == 0x0a || 257 cChar == ' ' || cChar == '\'' || cChar == '\"' || 258 cChar == '*' || cChar == '_' || cChar == '%' || 259 cChar == '.' || cChar == ',' || cChar == ';' || 260 cChar == ':' || cChar == '?' || cChar == '!' || 261 cChar == '<' || cChar == '>' || 262 cChar == '/' || cChar == '-'; 263 } 264 265 namespace 266 { 267 bool IsCompoundWordDelimChar(sal_Unicode cChar) 268 { 269 return cChar == '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar); 270 } 271 } 272 273 bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar ) 274 { 275 return cChar == '%' || cChar == ';' || cChar == ':' || cChar == '?' || cChar == '!' || 276 cChar == '/' /*case for the urls exception*/; 277 } 278 279 ACFlags SvxAutoCorrect::GetDefaultFlags() 280 { 281 ACFlags nRet = ACFlags::Autocorrect 282 | ACFlags::CapitalStartSentence 283 | ACFlags::CapitalStartWord 284 | ACFlags::ChgOrdinalNumber 285 | ACFlags::ChgToEnEmDash 286 | ACFlags::AddNonBrkSpace 287 | ACFlags::TransliterateRTL 288 | ACFlags::ChgAngleQuotes 289 | ACFlags::ChgWeightUnderl 290 | ACFlags::SetINetAttr 291 | ACFlags::ChgQuotes 292 | ACFlags::SaveWordCplSttLst 293 | ACFlags::SaveWordWrdSttLst 294 | ACFlags::CorrectCapsLock; 295 LanguageType eLang = GetAppLang().getLanguageType(); 296 if( eLang.anyOf( 297 LANGUAGE_ENGLISH, 298 LANGUAGE_ENGLISH_US, 299 LANGUAGE_ENGLISH_UK, 300 LANGUAGE_ENGLISH_AUS, 301 LANGUAGE_ENGLISH_CAN, 302 LANGUAGE_ENGLISH_NZ, 303 LANGUAGE_ENGLISH_EIRE, 304 LANGUAGE_ENGLISH_SAFRICA, 305 LANGUAGE_ENGLISH_JAMAICA, 306 LANGUAGE_ENGLISH_CARIBBEAN)) 307 nRet &= ~ACFlags(ACFlags::ChgQuotes|ACFlags::ChgSglQuotes); 308 return nRet; 309 } 310 311 constexpr sal_Unicode cEmDash = 0x2014; 312 constexpr sal_Unicode cEnDash = 0x2013; 313 constexpr sal_Unicode cApostrophe = 0x2019; 314 constexpr sal_Unicode cLeftDoubleAngleQuote = 0xAB; 315 constexpr sal_Unicode cRightDoubleAngleQuote = 0xBB; 316 constexpr sal_Unicode cLeftSingleAngleQuote = 0x2039; 317 constexpr sal_Unicode cRightSingleAngleQuote = 0x203A; 318 // stop characters for searching preceding quotes 319 // (the first character is also the opening quote we are looking for) 320 const sal_Unicode aStopDoubleAngleQuoteStart[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,, 321 const sal_Unicode aStopDoubleAngleQuoteEnd[] = { cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0x201D, 0x201E, 0 }; // preceding >> 322 // preceding << for Romanian, handle also alternative primary closing quotation mark U+201C 323 const sal_Unicode aStopDoubleAngleQuoteEndRo[] = { cLeftDoubleAngleQuote, cRightDoubleAngleQuote, 0x201D, 0x201E, 0x201C, 0 }; 324 const sal_Unicode aStopSingleQuoteEnd[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 }; 325 const sal_Unicode aStopSingleQuoteEndRuUa[] = { 0x201E, 0x201C, cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0 }; 326 327 SvxAutoCorrect::SvxAutoCorrect( const OUString& rShareAutocorrFile, 328 const OUString& rUserAutocorrFile ) 329 : sShareAutoCorrFile( rShareAutocorrFile ) 330 , sUserAutoCorrFile( rUserAutocorrFile ) 331 , eCharClassLang( LANGUAGE_DONTKNOW ) 332 , nFlags(SvxAutoCorrect::GetDefaultFlags()) 333 , cStartDQuote( 0 ) 334 , cEndDQuote( 0 ) 335 , cStartSQuote( 0 ) 336 , cEndSQuote( 0 ) 337 { 338 } 339 340 SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy ) 341 : sShareAutoCorrFile( rCpy.sShareAutoCorrFile ) 342 , sUserAutoCorrFile( rCpy.sUserAutoCorrFile ) 343 , aSwFlags( rCpy.aSwFlags ) 344 , eCharClassLang(rCpy.eCharClassLang) 345 , nFlags( rCpy.nFlags & ~ACFlags(ACFlags::ChgWordLstLoad|ACFlags::CplSttLstLoad|ACFlags::WrdSttLstLoad)) 346 , cStartDQuote( rCpy.cStartDQuote ) 347 , cEndDQuote( rCpy.cEndDQuote ) 348 , cStartSQuote( rCpy.cStartSQuote ) 349 , cEndSQuote( rCpy.cEndSQuote ) 350 { 351 } 352 353 354 SvxAutoCorrect::~SvxAutoCorrect() 355 { 356 } 357 358 void SvxAutoCorrect::GetCharClass_( LanguageType eLang ) 359 { 360 pCharClass.reset( new CharClass( LanguageTag( eLang)) ); 361 eCharClassLang = eLang; 362 } 363 364 void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag, bool bOn ) 365 { 366 ACFlags nOld = nFlags; 367 nFlags = bOn ? nFlags | nFlag 368 : nFlags & ~nFlag; 369 370 if( !bOn ) 371 { 372 if( (nOld & ACFlags::CapitalStartSentence) != (nFlags & ACFlags::CapitalStartSentence) ) 373 nFlags &= ~ACFlags::CplSttLstLoad; 374 if( (nOld & ACFlags::CapitalStartWord) != (nFlags & ACFlags::CapitalStartWord) ) 375 nFlags &= ~ACFlags::WrdSttLstLoad; 376 if( (nOld & ACFlags::Autocorrect) != (nFlags & ACFlags::Autocorrect) ) 377 nFlags &= ~ACFlags::ChgWordLstLoad; 378 } 379 } 380 381 382 // Correct TWo INitial CApitals 383 void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 384 sal_Int32 nSttPos, sal_Int32 nEndPos, 385 LanguageType eLang ) 386 { 387 CharClass& rCC = GetCharClass( eLang ); 388 389 // Delete all non alphanumeric. Test the characters at the beginning/end of 390 // the word ( recognizes: "(min.", "/min.", and so on.) 391 for( ; nSttPos < nEndPos; ++nSttPos ) 392 if( rCC.isLetterNumeric( rTxt, nSttPos )) 393 break; 394 for( ; nSttPos < nEndPos; --nEndPos ) 395 if( rCC.isLetterNumeric( rTxt, nEndPos - 1 )) 396 break; 397 398 // Is the word a compounded word separated by delimiters? 399 // If so, keep track of all delimiters so each constituent 400 // word can be checked for two initial capital letters. 401 std::deque<sal_Int32> aDelimiters; 402 403 // Always check for two capitals at the beginning 404 // of the entire word, so start at nSttPos. 405 aDelimiters.push_back(nSttPos); 406 407 // Find all compound word delimiters 408 for (sal_Int32 n = nSttPos; n < nEndPos; ++n) 409 { 410 if (IsCompoundWordDelimChar(rTxt[ n ])) 411 { 412 aDelimiters.push_back( n + 1 ); // Get position of char after delimiter 413 } 414 } 415 416 // Decide where to put the terminating delimiter. 417 // If the last AutoCorrect char was a newline, then the AutoCorrect 418 // char will not be included in rTxt. 419 // If the last AutoCorrect char was not a newline, then the AutoCorrect 420 // character will be the last character in rTxt. 421 if (!IsCompoundWordDelimChar(rTxt[nEndPos-1])) 422 aDelimiters.push_back(nEndPos); 423 424 // Iterate through the word and all words that compose it. 425 // Two capital letters at the beginning of word? 426 for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI) 427 { 428 nSttPos = aDelimiters[nI]; 429 nEndPos = aDelimiters[nI + 1]; 430 431 if( nSttPos+2 < nEndPos && 432 IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) && 433 IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) && 434 // Is the third character a lower case 435 IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) && 436 // Do not replace special attributes 437 0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ]) 438 { 439 // test if the word is in an exception list 440 OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 )); 441 if( !FindInWrdSttExceptList(eLang, sWord) ) 442 { 443 // Check that word isn't correctly spelt before correcting: 444 css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller = 445 LinguMgr::GetSpellChecker(); 446 if( xSpeller->hasLanguage(static_cast<sal_uInt16>(eLang)) ) 447 { 448 Sequence< css::beans::PropertyValue > aEmptySeq; 449 if (xSpeller->isValid(sWord, static_cast<sal_uInt16>(eLang), aEmptySeq)) 450 { 451 return; 452 } 453 } 454 sal_Unicode cSave = rTxt[ nSttPos ]; 455 OUString sChar = rCC.lowercase( OUString(cSave) ); 456 if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar )) 457 { 458 if( ACFlags::SaveWordWrdSttLst & nFlags ) 459 rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave ); 460 } 461 } 462 } 463 } 464 } 465 466 // Format ordinal numbers suffixes (1st -> 1^st) 467 bool SvxAutoCorrect::FnChgOrdinalNumber( 468 SvxAutoCorrDoc& rDoc, const OUString& rTxt, 469 sal_Int32 nSttPos, sal_Int32 nEndPos, 470 LanguageType eLang) 471 { 472 // 1st, 2nd, 3rd, 4 - 0th 473 // 201th or 201st 474 // 12th or 12nd 475 bool bChg = false; 476 477 // In some languages ordinal suffixes should never be 478 // changed to superscript. Let's break for those languages. 479 if (!eLang.anyOf( 480 LANGUAGE_SWEDISH, 481 LANGUAGE_SWEDISH_FINLAND)) 482 { 483 CharClass& rCC = GetCharClass(eLang); 484 485 for (; nSttPos < nEndPos; ++nSttPos) 486 if (!lcl_IsInAsciiArr(sImplSttSkipChars, rTxt[nSttPos])) 487 break; 488 for (; nSttPos < nEndPos; --nEndPos) 489 if (!lcl_IsInAsciiArr(sImplEndSkipChars, rTxt[nEndPos - 1])) 490 break; 491 492 493 // Get the last number in the string to check 494 sal_Int32 nNumEnd = nEndPos; 495 bool bFoundEnd = false; 496 bool isValidNumber = true; 497 sal_Int32 i = nEndPos; 498 while (i > nSttPos) 499 { 500 i--; 501 bool isDigit = rCC.isDigit(rTxt, i); 502 if (bFoundEnd) 503 isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i)); 504 505 if (isDigit && !bFoundEnd) 506 { 507 bFoundEnd = true; 508 nNumEnd = i; 509 } 510 } 511 512 if (bFoundEnd && isValidNumber) { 513 sal_Int32 nNum = rTxt.copy(nSttPos, nNumEnd - nSttPos + 1).toInt32(); 514 515 // Check if the characters after that number correspond to the ordinal suffix 516 uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix 517 = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext()); 518 519 const uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale()); 520 for (OUString const & sSuffix : aSuffixes) 521 { 522 OUString sEnd = rTxt.copy(nNumEnd + 1, nEndPos - nNumEnd - 1); 523 524 if (sSuffix == sEnd) 525 { 526 // Check if the ordinal suffix has to be set as super script 527 if (rCC.isLetter(sSuffix)) 528 { 529 // Do the change 530 SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER, 531 DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT); 532 rDoc.SetAttr(nNumEnd + 1, nEndPos, 533 SID_ATTR_CHAR_ESCAPEMENT, 534 aSvxEscapementItem); 535 bChg = true; 536 } 537 } 538 } 539 } 540 } 541 return bChg; 542 } 543 544 // Replace dashes 545 bool SvxAutoCorrect::FnChgToEnEmDash( 546 SvxAutoCorrDoc& rDoc, const OUString& rTxt, 547 sal_Int32 nSttPos, sal_Int32 nEndPos, 548 LanguageType eLang ) 549 { 550 bool bRet = false; 551 CharClass& rCC = GetCharClass( eLang ); 552 if (eLang == LANGUAGE_SYSTEM) 553 eLang = GetAppLang().getLanguageType(); 554 bool bAlwaysUseEmDash = (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN); 555 556 // replace " - " or " --" with "enDash" 557 if( 1 < nSttPos && 1 <= nEndPos - nSttPos ) 558 { 559 sal_Unicode cCh = rTxt[ nSttPos ]; 560 if( '-' == cCh ) 561 { 562 if( 1 < nEndPos - nSttPos && 563 ' ' == rTxt[ nSttPos-1 ] && 564 '-' == rTxt[ nSttPos+1 ]) 565 { 566 sal_Int32 n; 567 for( n = nSttPos+2; n < nEndPos && lcl_IsInAsciiArr( 568 sImplSttSkipChars,(cCh = rTxt[ n ])); 569 ++n ) 570 ; 571 572 // found: " --[<AnySttChars>][A-z0-9] 573 if( rCC.isLetterNumeric( OUString(cCh) ) ) 574 { 575 for( n = nSttPos-1; n && lcl_IsInAsciiArr( 576 sImplEndSkipChars,(cCh = rTxt[ --n ])); ) 577 ; 578 579 // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9] 580 if( rCC.isLetterNumeric( OUString(cCh) )) 581 { 582 rDoc.Delete( nSttPos, nSttPos + 2 ); 583 rDoc.Insert( nSttPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) ); 584 bRet = true; 585 } 586 } 587 } 588 } 589 else if( 3 < nSttPos && 590 ' ' == rTxt[ nSttPos-1 ] && 591 '-' == rTxt[ nSttPos-2 ]) 592 { 593 sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2; 594 if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) ) 595 { 596 --nTmpPos; 597 ++nLen; 598 cCh = rTxt[ nTmpPos-1 ]; 599 } 600 if( ' ' == cCh ) 601 { 602 for( n = nSttPos; n < nEndPos && lcl_IsInAsciiArr( 603 sImplSttSkipChars,(cCh = rTxt[ n ])); 604 ++n ) 605 ; 606 607 // found: " - [<AnySttChars>][A-z0-9] 608 if( rCC.isLetterNumeric( OUString(cCh) ) ) 609 { 610 cCh = ' '; 611 for( n = nTmpPos-1; n && lcl_IsInAsciiArr( 612 sImplEndSkipChars,(cCh = rTxt[ --n ])); ) 613 ; 614 // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9] 615 if( rCC.isLetterNumeric( OUString(cCh) )) 616 { 617 rDoc.Delete( nTmpPos, nTmpPos + nLen ); 618 rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) ); 619 bRet = true; 620 } 621 } 622 } 623 } 624 } 625 626 // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash" 627 // [0-9]--[0-9] double dash always replaced with "enDash" 628 // Finnish and Hungarian use enDash instead of emDash. 629 bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH); 630 if( 4 <= nEndPos - nSttPos ) 631 { 632 OUString sTmp( rTxt.copy( nSttPos, nEndPos - nSttPos ) ); 633 sal_Int32 nFndPos = sTmp.indexOf("--"); 634 if( nFndPos != -1 && nFndPos && 635 nFndPos + 2 < sTmp.getLength() && 636 ( rCC.isLetterNumeric( sTmp, nFndPos - 1 ) || 637 lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nFndPos - 1 ] )) && 638 ( rCC.isLetterNumeric( sTmp, nFndPos + 2 ) || 639 lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nFndPos + 2 ] ))) 640 { 641 nSttPos = nSttPos + nFndPos; 642 rDoc.Delete( nSttPos, nSttPos + 2 ); 643 rDoc.Insert( nSttPos, (bEnDash || (rCC.isDigit( sTmp, nFndPos - 1 ) && 644 rCC.isDigit( sTmp, nFndPos + 2 )) ? OUString(cEnDash) : OUString(cEmDash)) ); 645 bRet = true; 646 } 647 } 648 return bRet; 649 } 650 651 // Add non-breaking space before specific punctuation marks in French text 652 bool SvxAutoCorrect::FnAddNonBrkSpace( 653 SvxAutoCorrDoc& rDoc, const OUString& rTxt, 654 sal_Int32 nEndPos, 655 LanguageType eLang, bool& io_bNbspRunNext ) 656 { 657 bool bRet = false; 658 659 CharClass& rCC = GetCharClass( eLang ); 660 661 if ( rCC.getLanguageTag().getLanguage() == "fr" ) 662 { 663 bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA"); 664 OUString allChars = ":;?!%"; 665 OUString chars( allChars ); 666 if ( bFrCA ) 667 chars = ":"; 668 669 sal_Unicode cChar = rTxt[ nEndPos ]; 670 bool bHasSpace = chars.indexOf( cChar ) != -1; 671 bool bIsSpecial = allChars.indexOf( cChar ) != -1; 672 if ( bIsSpecial ) 673 { 674 // Get the last word delimiter position 675 sal_Int32 nSttWdPos = nEndPos; 676 bool bWasWordDelim = false; 677 while( nSttWdPos ) 678 { 679 bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]); 680 if (bWasWordDelim) 681 break; 682 } 683 684 //See if the text is the start of a protocol string, e.g. have text of 685 //"http" see if it is the start of "http:" and if so leave it alone 686 sal_Int32 nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0); 687 sal_Int32 nProtocolLen = nEndPos - nSttWdPos + 1; 688 if (nIndex + nProtocolLen <= rTxt.getLength()) 689 { 690 if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid) 691 return false; 692 } 693 694 // Check the presence of "://" in the word 695 sal_Int32 nStrPos = rTxt.indexOf( "://", nSttWdPos + 1 ); 696 if ( nStrPos == -1 && nEndPos > 0 ) 697 { 698 // Check the previous char 699 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ]; 700 if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' ) 701 { 702 // Remove any previous normal space 703 sal_Int32 nPos = nEndPos - 1; 704 while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace ) 705 { 706 if ( nPos == 0 ) break; 707 nPos--; 708 cPrevChar = rTxt[ nPos ]; 709 } 710 711 nPos++; 712 if ( nEndPos - nPos > 0 ) 713 rDoc.Delete( nPos, nEndPos ); 714 715 // Add the non-breaking space at the end pos 716 if ( bHasSpace ) 717 rDoc.Insert( nPos, OUString(cNonBreakingSpace) ); 718 io_bNbspRunNext = true; 719 bRet = true; 720 } 721 else if ( chars.indexOf( cPrevChar ) != -1 ) 722 io_bNbspRunNext = true; 723 } 724 } 725 else if ( cChar == '/' && nEndPos > 1 && rTxt.getLength() > (nEndPos - 1) ) 726 { 727 // Remove the hardspace right before to avoid formatting URLs 728 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ]; 729 sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ]; 730 if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace ) 731 { 732 rDoc.Delete( nEndPos - 2, nEndPos - 1 ); 733 bRet = true; 734 } 735 } 736 } 737 738 return bRet; 739 } 740 741 // URL recognition 742 bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 743 sal_Int32 nSttPos, sal_Int32 nEndPos, 744 LanguageType eLang ) 745 { 746 OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos, 747 GetCharClass( eLang ) )); 748 bool bRet = !sURL.isEmpty(); 749 if( bRet ) // so, set attribute: 750 rDoc.SetINetAttr( nSttPos, nEndPos, sURL ); 751 return bRet; 752 } 753 754 // Automatic *bold*, /italic/, -strikeout- and _underline_ 755 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 756 sal_Int32 nEndPos ) 757 { 758 // Condition: 759 // at the beginning: _, *, / or ~ after Space with the following !Space 760 // at the end: _, *, / or ~ before Space (word delimiter?) 761 762 sal_Unicode cInsChar = rTxt[ nEndPos ]; // underline, bold, italic or strikeout 763 if( ++nEndPos != rTxt.getLength() && 764 !IsWordDelim( rTxt[ nEndPos ] ) ) 765 return false; 766 767 --nEndPos; 768 769 bool bAlphaNum = false; 770 sal_Int32 nPos = nEndPos; 771 sal_Int32 nFndPos = -1; 772 CharClass& rCC = GetCharClass( LANGUAGE_SYSTEM ); 773 774 while( nPos ) 775 { 776 switch( sal_Unicode c = rTxt[ --nPos ] ) 777 { 778 case '_': 779 case '-': 780 case '/': 781 case '*': 782 if( c == cInsChar ) 783 { 784 if( bAlphaNum && nPos+1 < nEndPos && ( !nPos || 785 IsWordDelim( rTxt[ nPos-1 ])) && 786 !IsWordDelim( rTxt[ nPos+1 ])) 787 nFndPos = nPos; 788 else 789 // Condition is not satisfied, so cancel 790 nFndPos = -1; 791 nPos = 0; 792 } 793 break; 794 default: 795 if( !bAlphaNum ) 796 bAlphaNum = rCC.isLetterNumeric( rTxt, nPos ); 797 } 798 } 799 800 if( -1 != nFndPos ) 801 { 802 // first delete the Character at the end - this allows insertion 803 // of an empty hint in SetAttr which would be removed by Delete 804 // (fdo#62536, AUTOFMT in Writer) 805 rDoc.Delete( nEndPos, nEndPos + 1 ); 806 rDoc.Delete( nFndPos, nFndPos + 1 ); 807 // Span the Attribute over the area 808 // the end. 809 if( '*' == cInsChar ) // Bold 810 { 811 SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT ); 812 rDoc.SetAttr( nFndPos, nEndPos - 1, 813 SID_ATTR_CHAR_WEIGHT, 814 aSvxWeightItem); 815 } 816 else if( '/' == cInsChar ) // Italic 817 { 818 SvxPostureItem aSvxPostureItem( ITALIC_NORMAL, SID_ATTR_CHAR_POSTURE ); 819 rDoc.SetAttr( nFndPos, nEndPos - 1, 820 SID_ATTR_CHAR_POSTURE, 821 aSvxPostureItem); 822 } 823 else if( '-' == cInsChar ) // Strikeout 824 { 825 SvxCrossedOutItem aSvxCrossedOutItem( STRIKEOUT_SINGLE, SID_ATTR_CHAR_STRIKEOUT ); 826 rDoc.SetAttr( nFndPos, nEndPos - 1, 827 SID_ATTR_CHAR_STRIKEOUT, 828 aSvxCrossedOutItem); 829 } 830 else // Underline 831 { 832 SvxUnderlineItem aSvxUnderlineItem( LINESTYLE_SINGLE, SID_ATTR_CHAR_UNDERLINE ); 833 rDoc.SetAttr( nFndPos, nEndPos - 1, 834 SID_ATTR_CHAR_UNDERLINE, 835 aSvxUnderlineItem); 836 } 837 } 838 839 return -1 != nFndPos; 840 } 841 842 // Capitalize first letter of every sentence 843 void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc& rDoc, 844 const OUString& rTxt, bool bNormalPos, 845 sal_Int32 nSttPos, sal_Int32 nEndPos, 846 LanguageType eLang ) 847 { 848 849 if( rTxt.isEmpty() || nEndPos <= nSttPos ) 850 return; 851 852 CharClass& rCC = GetCharClass( eLang ); 853 OUString aText( rTxt ); 854 const sal_Unicode *pStart = aText.getStr(), 855 *pStr = pStart + nEndPos, 856 *pWordStt = nullptr, 857 *pDelim = nullptr; 858 859 bool bAtStart = false; 860 do { 861 --pStr; 862 if (rCC.isLetter(aText, pStr - pStart)) 863 { 864 if( !pWordStt ) 865 pDelim = pStr+1; 866 pWordStt = pStr; 867 } 868 else if (pWordStt && !rCC.isDigit(aText, pStr - pStart)) 869 { 870 if( (lcl_IsInAsciiArr( "-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words 871 pWordStt - 1 == pStr && 872 // Installation at beginning of paragraph. Replaced < by <= (#i38971#) 873 (pStart + 1) <= pStr && 874 rCC.isLetter(aText, pStr-1 - pStart)) 875 pWordStt = --pStr; 876 else 877 break; 878 } 879 bAtStart = (pStart == pStr); 880 } while( !bAtStart ); 881 882 if (!pWordStt) 883 return; // no character to be replaced 884 885 886 if (rCC.isDigit(aText, pStr - pStart)) 887 return; // already ok 888 889 if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart))) 890 return; // already ok 891 892 //See if the text is the start of a protocol string, e.g. have text of 893 //"http" see if it is the start of "http:" and if so leave it alone 894 sal_Int32 nIndex = pWordStt - pStart; 895 sal_Int32 nProtocolLen = pDelim - pWordStt + 1; 896 if (nIndex + nProtocolLen <= rTxt.getLength()) 897 { 898 if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid) 899 return; // already ok 900 } 901 902 if (0x1 == *pWordStt || 0x2 == *pWordStt) 903 return; // already ok 904 905 // Only capitalize, if string before specified characters is long enough 906 if( *pDelim && 2 >= pDelim - pWordStt && 907 lcl_IsInAsciiArr( ".-)>", *pDelim ) ) 908 return; 909 910 // tdf#59666 don't capitalize single Greek letters (except in Greek texts) 911 if ( 1 == pDelim - pWordStt && 0x03B1 <= *pWordStt && *pWordStt <= 0x03C9 && eLang != LANGUAGE_GREEK ) 912 return; 913 914 if( !bAtStart ) // Still no beginning of a paragraph? 915 { 916 if (NonFieldWordDelim(*pStr)) 917 { 918 for (;;) 919 { 920 bAtStart = (pStart == pStr--); 921 if (bAtStart || !NonFieldWordDelim(*pStr)) 922 break; 923 } 924 } 925 // Asian full stop, full width full stop, full width exclamation mark 926 // and full width question marks are treated as word delimiters 927 else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr && 928 0xFF1F != *pStr ) 929 return; // no valid separator -> no replacement 930 } 931 932 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list 933 if (FindInWrdSttExceptList(eLang, OUString(pWordStt, pDelim - pWordStt))) 934 return; 935 936 if( bAtStart ) // at the beginning of a paragraph? 937 { 938 // Check out the previous paragraph, if it exists. 939 // If so, then check to paragraph separator at the end. 940 OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos); 941 if (!pPrevPara) 942 { 943 // valid separator -> replace 944 OUString sChar( *pWordStt ); 945 sChar = rCC.titlecase(sChar); //see fdo#56740 946 if (sChar != OUStringChar(*pWordStt)) 947 rDoc.ReplaceRange( pWordStt - pStart, 1, sChar ); 948 return; 949 } 950 951 aText = *pPrevPara; 952 bAtStart = false; 953 pStart = aText.getStr(); 954 pStr = pStart + aText.getLength(); 955 956 do { // overwrite all blanks 957 --pStr; 958 if (!NonFieldWordDelim(*pStr)) 959 break; 960 bAtStart = (pStart == pStr); 961 } while( !bAtStart ); 962 963 if( bAtStart ) 964 return; // no valid separator -> no replacement 965 } 966 967 // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator. 968 // all three can happen, but not more than once! 969 const sal_Unicode* pExceptStt = nullptr; 970 bool bContinue = true; 971 Flags nFlag = Flags::NONE; 972 do 973 { 974 switch (*pStr) 975 { 976 // Western and Asian full stop 977 case '.': 978 case 0x3002: 979 case 0xFF0E: 980 { 981 if (pStr >= pStart + 2 && *(pStr - 2) == '.') 982 { 983 //e.g. text "f.o.o. word": Now currently considering 984 //capitalizing word but second last character of 985 //previous word is a . So probably last word is an 986 //anagram that ends in . and not truly the end of a 987 //previous sentence, so don't autocapitalize this word 988 return; 989 } 990 if (nFlag & Flags::FullStop) 991 return; // no valid separator -> no replacement 992 nFlag |= Flags::FullStop; 993 pExceptStt = pStr; 994 } 995 break; 996 case '!': 997 case 0xFF01: 998 { 999 if (nFlag & Flags::ExclamationMark) 1000 return; // no valid separator -> no replacement 1001 nFlag |= Flags::ExclamationMark; 1002 } 1003 break; 1004 case '?': 1005 case 0xFF1F: 1006 { 1007 if (nFlag & Flags::QuestionMark) 1008 return; // no valid separator -> no replacement 1009 nFlag |= Flags::QuestionMark; 1010 } 1011 break; 1012 default: 1013 if (nFlag == Flags::NONE) 1014 return; // no valid separator -> no replacement 1015 else 1016 bContinue = false; 1017 break; 1018 } 1019 1020 if (bContinue && pStr-- == pStart) 1021 { 1022 return; // no valid separator -> no replacement 1023 } 1024 } while (bContinue); 1025 if (Flags::FullStop != nFlag) 1026 pExceptStt = nullptr; 1027 1028 // Only capitalize, if string is long enough 1029 if( 2 > ( pStr - pStart ) ) 1030 return; 1031 1032 if (!rCC.isLetterNumeric(aText, pStr-- - pStart)) 1033 { 1034 bool bValid = false, bAlphaFnd = false; 1035 const sal_Unicode* pTmpStr = pStr; 1036 while( !bValid ) 1037 { 1038 if( rCC.isDigit( aText, pTmpStr - pStart ) ) 1039 { 1040 bValid = true; 1041 pStr = pTmpStr - 1; 1042 } 1043 else if( rCC.isLetter( aText, pTmpStr - pStart ) ) 1044 { 1045 if( bAlphaFnd ) 1046 { 1047 bValid = true; 1048 pStr = pTmpStr; 1049 } 1050 else 1051 bAlphaFnd = true; 1052 } 1053 else if (bAlphaFnd || NonFieldWordDelim(*pTmpStr)) 1054 break; 1055 1056 if( pTmpStr == pStart ) 1057 break; 1058 1059 --pTmpStr; 1060 } 1061 1062 if( !bValid ) 1063 return; // no valid separator -> no replacement 1064 } 1065 1066 bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9'; 1067 1068 // Search for the beginning of the word 1069 while (!NonFieldWordDelim(*pStr)) 1070 { 1071 if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) ) 1072 bNumericOnly = false; 1073 1074 if( pStart == pStr ) 1075 break; 1076 1077 --pStr; 1078 } 1079 1080 if( bNumericOnly ) // consists of only numbers, then not 1081 return; 1082 1083 if (NonFieldWordDelim(*pStr)) 1084 ++pStr; 1085 1086 OUString sWord; 1087 1088 // check on the basis of the exception list 1089 if( pExceptStt ) 1090 { 1091 sWord = OUString(pStr, pExceptStt - pStr + 1); 1092 if( FindInCplSttExceptList(eLang, sWord) ) 1093 return; 1094 1095 // Delete all non alphanumeric. Test the characters at the 1096 // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.) 1097 OUString sTmp( sWord ); 1098 while( !sTmp.isEmpty() && 1099 !rCC.isLetterNumeric( sTmp, 0 ) ) 1100 sTmp = sTmp.copy(1); 1101 1102 // Remove all non alphanumeric characters towards the end up until 1103 // the last one. 1104 sal_Int32 nLen = sTmp.getLength(); 1105 while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) ) 1106 --nLen; 1107 if( nLen + 1 < sTmp.getLength() ) 1108 sTmp = sTmp.copy( 0, nLen + 1 ); 1109 1110 if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() && 1111 FindInCplSttExceptList(eLang, sTmp)) 1112 return; 1113 1114 if(FindInCplSttExceptList(eLang, sWord, true)) 1115 return; 1116 } 1117 1118 // Ok, then replace 1119 sal_Unicode cSave = *pWordStt; 1120 nSttPos = pWordStt - rTxt.getStr(); 1121 OUString sChar = rCC.titlecase(OUString(cSave)); //see fdo#56740 1122 bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ); 1123 1124 // Perhaps someone wants to have the word 1125 if( bRet && ACFlags::SaveWordCplSttLst & nFlags ) 1126 rDoc.SaveCpltSttWord( ACFlags::CapitalStartSentence, nSttPos, sWord, cSave ); 1127 } 1128 1129 // Correct accidental use of cAPS LOCK key 1130 bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 1131 sal_Int32 nSttPos, sal_Int32 nEndPos, 1132 LanguageType eLang ) 1133 { 1134 if (nEndPos - nSttPos < 2) 1135 // string must be at least 2-character long. 1136 return false; 1137 1138 CharClass& rCC = GetCharClass( eLang ); 1139 1140 // Check the first 2 letters. 1141 if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) ) 1142 return false; 1143 1144 if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) ) 1145 return false; 1146 1147 OUStringBuffer aConverted; 1148 aConverted.append( rCC.uppercase(OUString(rTxt[nSttPos])) ); 1149 aConverted.append( rCC.lowercase(OUString(rTxt[nSttPos+1])) ); 1150 1151 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list 1152 if (FindInWrdSttExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos))) 1153 return false; 1154 1155 for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i ) 1156 { 1157 if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) ) 1158 // A lowercase letter disqualifies the whole text. 1159 return false; 1160 1161 if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) ) 1162 // Another uppercase letter. Convert it. 1163 aConverted.append( rCC.lowercase(OUString(rTxt[i])) ); 1164 else 1165 // This is not an alphabetic letter. Leave it as-is. 1166 aConverted.append( rTxt[i] ); 1167 } 1168 1169 // Replace the word. 1170 rDoc.Delete(nSttPos, nEndPos); 1171 rDoc.Insert(nSttPos, aConverted.makeStringAndClear()); 1172 1173 return true; 1174 } 1175 1176 1177 sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote, 1178 LanguageType eLang ) const 1179 { 1180 sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar 1181 ? GetStartDoubleQuote() 1182 : GetStartSingleQuote() ) 1183 : ( '\"' == cInsChar 1184 ? GetEndDoubleQuote() 1185 : GetEndSingleQuote() ); 1186 if( !cRet ) 1187 { 1188 // then through the Language find the right character 1189 if( LANGUAGE_NONE == eLang ) 1190 cRet = cInsChar; 1191 else 1192 { 1193 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang ); 1194 OUString sRet( bSttQuote 1195 ? ( '\"' == cInsChar 1196 ? rLcl.getDoubleQuotationMarkStart() 1197 : rLcl.getQuotationMarkStart() ) 1198 : ( '\"' == cInsChar 1199 ? rLcl.getDoubleQuotationMarkEnd() 1200 : rLcl.getQuotationMarkEnd() )); 1201 cRet = !sRet.isEmpty() ? sRet[0] : cInsChar; 1202 } 1203 } 1204 return cRet; 1205 } 1206 1207 void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos, 1208 sal_Unicode cInsChar, bool bSttQuote, 1209 bool bIns, LanguageType eLang, ACQuotes eType ) const 1210 { 1211 sal_Unicode cRet; 1212 1213 if ( eType == ACQuotes::DoubleAngleQuote ) 1214 { 1215 bool bSwiss = eLang == LANGUAGE_FRENCH_SWISS; 1216 // pressing " inside a quotation -> use second level angle quotes 1217 bool bLeftQuote = '\"' == cInsChar && 1218 // start position and Romanian OR 1219 // not start position and Hungarian 1220 bSttQuote == (eLang != LANGUAGE_HUNGARIAN); 1221 cRet = ( '<' == cInsChar || bLeftQuote ) 1222 ? ( bSwiss ? cLeftSingleAngleQuote : cLeftDoubleAngleQuote ) 1223 : ( bSwiss ? cRightSingleAngleQuote : cRightDoubleAngleQuote ); 1224 } 1225 else if ( eType == ACQuotes::UseApostrophe ) 1226 cRet = cApostrophe; 1227 else 1228 cRet = GetQuote( cInsChar, bSttQuote, eLang ); 1229 1230 OUString sChg( cInsChar ); 1231 if( bIns ) 1232 rDoc.Insert( nInsPos, sChg ); 1233 else 1234 rDoc.Replace( nInsPos, sChg ); 1235 1236 sChg = OUString(cRet); 1237 1238 if( eType == ACQuotes::NonBreakingSpace ) 1239 { 1240 if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, OUStringChar(cNonBreakingSpace) )) 1241 { 1242 if( !bSttQuote ) 1243 ++nInsPos; 1244 } 1245 } 1246 else if( eType == ACQuotes::DoubleAngleQuote && cInsChar != '\"' ) 1247 { 1248 rDoc.Delete( nInsPos-1, nInsPos); 1249 --nInsPos; 1250 } 1251 1252 rDoc.Replace( nInsPos, sChg ); 1253 1254 // i' -> I' in English (last step for the Undo) 1255 if( eType == ACQuotes::CapitalizeIAm ) 1256 rDoc.Replace( nInsPos-1, "I" ); 1257 } 1258 1259 OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPos, 1260 sal_Unicode cInsChar, bool bSttQuote ) 1261 { 1262 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos ); 1263 sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang ); 1264 1265 OUString sRet(cRet); 1266 1267 if( '\"' == cInsChar ) 1268 { 1269 if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS) 1270 { 1271 if( bSttQuote ) 1272 sRet += " "; 1273 else 1274 sRet = " " + sRet; 1275 } 1276 } 1277 return sRet; 1278 } 1279 1280 // search preceding opening quote in the paragraph before the insert position 1281 static bool lcl_HasPrecedingChar( std::u16string_view rTxt, sal_Int32 nPos, 1282 const sal_Unicode sPrecedingChar, const sal_Unicode* aStopChars ) 1283 { 1284 sal_Unicode cTmpChar; 1285 1286 do { 1287 cTmpChar = rTxt[ --nPos ]; 1288 if ( cTmpChar == sPrecedingChar ) 1289 return true; 1290 1291 for ( const sal_Unicode* pCh = aStopChars; *pCh; ++pCh ) 1292 if ( cTmpChar == *pCh ) 1293 return false; 1294 1295 } while ( nPos > 0 ); 1296 1297 return false; 1298 } 1299 1300 // WARNING: rText may become invalid, see comment below 1301 void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 1302 sal_Int32 nInsPos, sal_Unicode cChar, 1303 bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin ) 1304 { 1305 bool bIsNextRun = io_bNbspRunNext; 1306 io_bNbspRunNext = false; // if it was set, then it has to be turned off 1307 1308 do{ // only for middle check loop !! 1309 if( cChar ) 1310 { 1311 // Prevent double space 1312 if( nInsPos && ' ' == cChar && 1313 IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace ) && 1314 ' ' == rTxt[ nInsPos - 1 ]) 1315 { 1316 break; 1317 } 1318 1319 bool bSingle = '\'' == cChar; 1320 bool bIsReplaceQuote = 1321 (IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('\"' == cChar )) || 1322 (IsAutoCorrFlag( ACFlags::ChgSglQuotes ) && bSingle ); 1323 if( bIsReplaceQuote ) 1324 { 1325 bool bSttQuote = !nInsPos; 1326 ACQuotes eType = ACQuotes::NONE; 1327 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos ); 1328 if (!bSttQuote) 1329 { 1330 sal_Unicode cPrev = rTxt[ nInsPos-1 ]; 1331 bSttQuote = NonFieldWordDelim(cPrev) || 1332 lcl_IsInAsciiArr( "([{", cPrev ) || 1333 ( cEmDash == cPrev ) || 1334 ( cEnDash == cPrev ); 1335 // tdf#38394 use opening quotation mark << in French l'<<word>> 1336 if ( !bSingle && !bSttQuote && cPrev == cApostrophe && 1337 primary(eLang) == primary(LANGUAGE_FRENCH) && 1338 ( ( ( nInsPos == 2 || ( nInsPos > 2 && IsWordDelim( rTxt[ nInsPos-3 ] ) ) ) && 1339 // abbreviated form of ce, de, je, la, le, ne, me, te, se or si 1340 OUString("cdjlnmtsCDJLNMTS").indexOf( rTxt[ nInsPos-2 ] ) > -1 ) || 1341 ( ( nInsPos == 3 || (nInsPos > 3 && IsWordDelim( rTxt[ nInsPos-4 ] ) ) ) && 1342 // abbreviated form of que 1343 ( rTxt[ nInsPos-2 ] == 'u' || rTxt[ nInsPos-2 ] == 'U' ) && 1344 ( rTxt[ nInsPos-3 ] == 'q' || rTxt[ nInsPos-3 ] == 'Q' ) ) ) ) 1345 { 1346 bSttQuote = true; 1347 } 1348 // tdf#108423 for capitalization of English i'm 1349 else if ( bSingle && ( cPrev == 'i' ) && 1350 primary(eLang) == primary(LANGUAGE_ENGLISH) && 1351 ( nInsPos == 1 || IsWordDelim( rTxt[ nInsPos-2 ] ) ) ) 1352 { 1353 eType = ACQuotes::CapitalizeIAm; 1354 } 1355 // tdf#133524 support >>Hungarian<< and <<Romanian>> secondary level quotations 1356 else if ( !bSingle && nInsPos && 1357 ( ( eLang == LANGUAGE_HUNGARIAN && 1358 lcl_HasPrecedingChar( rTxt, nInsPos, 1359 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEnd[0], 1360 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEnd + 1 ) ) || 1361 ( eLang.anyOf( 1362 LANGUAGE_ROMANIAN, 1363 LANGUAGE_ROMANIAN_MOLDOVA ) && 1364 lcl_HasPrecedingChar( rTxt, nInsPos, 1365 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEndRo[0], 1366 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEndRo + 1 ) ) ) ) 1367 { 1368 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang ); 1369 // only if the opening double quotation mark is the default one 1370 if ( rLcl.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart[0]) ) 1371 eType = ACQuotes::DoubleAngleQuote; 1372 } 1373 else if ( bSingle && nInsPos && !bSttQuote && 1374 // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic, 1375 // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018. 1376 // tdf#123786 the same for Russian and Ukrainian 1377 ( ( eLang.anyOf ( 1378 LANGUAGE_CZECH, 1379 LANGUAGE_GERMAN, 1380 LANGUAGE_GERMAN_SWISS, 1381 LANGUAGE_GERMAN_AUSTRIAN, 1382 LANGUAGE_GERMAN_LUXEMBOURG, 1383 LANGUAGE_GERMAN_LIECHTENSTEIN, 1384 LANGUAGE_ICELANDIC, 1385 LANGUAGE_SLOVAK, 1386 LANGUAGE_SLOVENIAN ) && 1387 !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEnd[0], aStopSingleQuoteEnd + 1 ) ) || 1388 ( eLang.anyOf ( 1389 LANGUAGE_RUSSIAN, 1390 LANGUAGE_UKRAINIAN ) && 1391 !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEndRuUa[0], aStopSingleQuoteEndRuUa + 1 ) ) ) ) 1392 { 1393 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang ); 1394 CharClass& rCC = GetCharClass( eLang ); 1395 if ( ( rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEnd[0]) || 1396 rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa[0]) ) && 1397 // use apostrophe only after letters, not after digits or punctuation 1398 rCC.isLetter(rTxt, nInsPos-1) ) 1399 { 1400 eType = ACQuotes::UseApostrophe; 1401 } 1402 } 1403 } 1404 1405 if ( eType == ACQuotes::NONE && !bSingle && 1406 ( primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS ) ) 1407 eType = ACQuotes::NonBreakingSpace; 1408 1409 InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, eLang, eType ); 1410 break; 1411 } 1412 // tdf#133524 change "<<" and ">>" to double angle quotation marks 1413 else if ( IsAutoCorrFlag( ACFlags::ChgQuotes ) && 1414 IsAutoCorrFlag( ACFlags::ChgAngleQuotes ) && 1415 ('<' == cChar || '>' == cChar) && 1416 nInsPos > 0 && cChar == rTxt[ nInsPos-1 ] ) 1417 { 1418 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos ); 1419 if ( eLang.anyOf( 1420 LANGUAGE_CATALAN, // primary level 1421 LANGUAGE_CATALAN_VALENCIAN, // primary level 1422 LANGUAGE_FINNISH, // alternative primary level 1423 LANGUAGE_FRENCH_SWISS, // second level 1424 LANGUAGE_GALICIAN, // primary level 1425 LANGUAGE_HUNGARIAN, // second level 1426 LANGUAGE_POLISH, // second level 1427 LANGUAGE_PORTUGUESE, // primary level 1428 LANGUAGE_PORTUGUESE_BRAZILIAN, // primary level 1429 LANGUAGE_ROMANIAN, // second level 1430 LANGUAGE_ROMANIAN_MOLDOVA, // second level 1431 LANGUAGE_SWEDISH, // alternative primary level 1432 LANGUAGE_SWEDISH_FINLAND, // alternative primary level 1433 LANGUAGE_UKRAINIAN, // primary level 1434 LANGUAGE_USER_ARAGONESE, // primary level 1435 LANGUAGE_USER_ASTURIAN ) || // primary level 1436 primary(eLang) == primary(LANGUAGE_GERMAN) || // alternative primary level 1437 primary(eLang) == primary(LANGUAGE_SPANISH) ) // primary level 1438 { 1439 InsertQuote( rDoc, nInsPos, cChar, false, bInsert, eLang, ACQuotes::DoubleAngleQuote ); 1440 break; 1441 } 1442 } 1443 1444 if( bInsert ) 1445 rDoc.Insert( nInsPos, OUString(cChar) ); 1446 else 1447 rDoc.Replace( nInsPos, OUString(cChar) ); 1448 1449 // Hardspaces autocorrection 1450 if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace ) ) 1451 { 1452 if ( NeedsHardspaceAutocorr( cChar ) && 1453 FnAddNonBrkSpace( rDoc, rTxt, nInsPos, GetDocLanguage( rDoc, nInsPos ), io_bNbspRunNext ) ) 1454 { 1455 ; 1456 } 1457 else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) ) 1458 { 1459 // Remove the NBSP if it wasn't an autocorrection 1460 if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) && 1461 cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace ) 1462 { 1463 // Look for the last HARD_SPACE 1464 sal_Int32 nPos = nInsPos - 1; 1465 bool bContinue = true; 1466 while ( bContinue ) 1467 { 1468 const sal_Unicode cTmpChar = rTxt[ nPos ]; 1469 if ( cTmpChar == cNonBreakingSpace ) 1470 { 1471 rDoc.Delete( nPos, nPos + 1 ); 1472 bContinue = false; 1473 } 1474 else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 ) 1475 bContinue = false; 1476 nPos--; 1477 } 1478 } 1479 } 1480 } 1481 } 1482 1483 if( !nInsPos ) 1484 break; 1485 1486 sal_Int32 nPos = nInsPos - 1; 1487 1488 if( IsWordDelim( rTxt[ nPos ])) 1489 break; 1490 1491 // Set bold or underline automatically? 1492 if (('*' == cChar || '_' == cChar || '/' == cChar || '-' == cChar) && (nPos+1 < rTxt.getLength())) 1493 { 1494 if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl ) ) 1495 { 1496 FnChgWeightUnderl( rDoc, rTxt, nPos+1 ); 1497 } 1498 break; 1499 } 1500 1501 while( nPos && !IsWordDelim( rTxt[ --nPos ])) 1502 ; 1503 1504 // Found a Paragraph-start or a Blank, search for the word shortcut in 1505 // auto. 1506 sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character 1507 if( !nPos && !IsWordDelim( rTxt[ 0 ])) 1508 --nCapLttrPos; // begin of paragraph and no blank 1509 1510 const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos ); 1511 CharClass& rCC = GetCharClass( eLang ); 1512 1513 // no symbol characters 1514 if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos )) 1515 break; 1516 1517 if( IsAutoCorrFlag( ACFlags::Autocorrect ) && 1518 // tdf#134940 fix regression of arrow "-->" resulted by premature 1519 // replacement of "--" since '>' was added to IsAutoCorrectChar() 1520 '>' != cChar ) 1521 { 1522 // WARNING ATTENTION: rTxt is an alias of the text node's OUString 1523 // and becomes INVALID if ChgAutoCorrWord returns true! 1524 // => use aPara/pPara to create a valid copy of the string! 1525 OUString aPara; 1526 OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr; 1527 1528 bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos, 1529 *this, pPara ); 1530 if( !bChgWord ) 1531 { 1532 sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos; 1533 while( nCapLttrPos1 < nInsPos && 1534 lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] ) 1535 ) 1536 ++nCapLttrPos1; 1537 while( nCapLttrPos1 < nInsPos1 && nInsPos1 && 1538 lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] ) 1539 ) 1540 --nInsPos1; 1541 1542 if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) && 1543 nCapLttrPos1 < nInsPos1 && 1544 rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara )) 1545 { 1546 bChgWord = true; 1547 nCapLttrPos = nCapLttrPos1; 1548 } 1549 } 1550 1551 if( bChgWord ) 1552 { 1553 if( !aPara.isEmpty() ) 1554 { 1555 sal_Int32 nEnd = nCapLttrPos; 1556 while( nEnd < aPara.getLength() && 1557 !IsWordDelim( aPara[ nEnd ])) 1558 ++nEnd; 1559 1560 // Capital letter at beginning of paragraph? 1561 if( IsAutoCorrFlag( ACFlags::CapitalStartSentence ) ) 1562 { 1563 FnCapitalStartSentence( rDoc, aPara, false, 1564 nCapLttrPos, nEnd, eLang ); 1565 } 1566 1567 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) ) 1568 { 1569 FnChgToEnEmDash( rDoc, aPara, nCapLttrPos, nEnd, eLang ); 1570 } 1571 } 1572 break; 1573 } 1574 } 1575 1576 if( IsAutoCorrFlag( ACFlags::TransliterateRTL ) && GetDocLanguage( rDoc, nInsPos ) == LANGUAGE_HUNGARIAN ) 1577 { 1578 // WARNING ATTENTION: rTxt is an alias of the text node's OUString 1579 // and becomes INVALID if TransliterateRTLWord returns true! 1580 if ( rDoc.TransliterateRTLWord( nCapLttrPos, nInsPos ) ) 1581 break; 1582 } 1583 1584 if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber ) && 1585 (nInsPos >= 2 ) && // fdo#69762 avoid autocorrect for 2e-3 1586 ( '-' != cChar || 'E' != rtl::toAsciiUpperCase(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) && 1587 FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) || 1588 ( IsAutoCorrFlag( ACFlags::SetINetAttr ) && 1589 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) && 1590 FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ) 1591 ; 1592 else 1593 { 1594 bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK); 1595 bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos ); 1596 1597 if ( bLockKeyOn && IsAutoCorrFlag( ACFlags::CorrectCapsLock ) && 1598 FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) 1599 { 1600 // Correct accidental use of cAPS LOCK key (do this only when 1601 // the caps or shift lock key is pressed). Turn off the caps 1602 // lock afterwards. 1603 pFrameWin->SimulateKeyPress( KEY_CAPSLOCK ); 1604 } 1605 1606 // Capital letter at beginning of paragraph ? 1607 if( !bUnsupported && 1608 IsAutoCorrFlag( ACFlags::CapitalStartSentence ) ) 1609 { 1610 FnCapitalStartSentence( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang ); 1611 } 1612 1613 // Two capital letters at beginning of word ?? 1614 if( !bUnsupported && 1615 IsAutoCorrFlag( ACFlags::CapitalStartWord ) ) 1616 { 1617 FnCapitalStartWord( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ); 1618 } 1619 1620 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) ) 1621 { 1622 FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ); 1623 } 1624 } 1625 1626 } while( false ); 1627 } 1628 1629 SvxAutoCorrectLanguageLists& SvxAutoCorrect::GetLanguageList_( 1630 LanguageType eLang ) 1631 { 1632 LanguageTag aLanguageTag( eLang); 1633 if (m_aLangTable.find(aLanguageTag) == m_aLangTable.end()) 1634 (void)CreateLanguageFile(aLanguageTag); 1635 return *(m_aLangTable.find(aLanguageTag)->second); 1636 } 1637 1638 void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang ) 1639 { 1640 auto const iter = m_aLangTable.find(LanguageTag(eLang)); 1641 if (iter != m_aLangTable.end() && iter->second) 1642 iter->second->SaveCplSttExceptList(); 1643 else 1644 { 1645 SAL_WARN("editeng", "Save an empty list? "); 1646 } 1647 } 1648 1649 void SvxAutoCorrect::SaveWrdSttExceptList(LanguageType eLang) 1650 { 1651 auto const iter = m_aLangTable.find(LanguageTag(eLang)); 1652 if (iter != m_aLangTable.end() && iter->second) 1653 iter->second->SaveWrdSttExceptList(); 1654 else 1655 { 1656 SAL_WARN("editeng", "Save an empty list? "); 1657 } 1658 } 1659 1660 // Adds a single word. The list will immediately be written to the file! 1661 bool SvxAutoCorrect::AddCplSttException( const OUString& rNew, 1662 LanguageType eLang ) 1663 { 1664 SvxAutoCorrectLanguageLists* pLists = nullptr; 1665 // either the right language is present or it will be this in the general list 1666 auto iter = m_aLangTable.find(LanguageTag(eLang)); 1667 if (iter != m_aLangTable.end()) 1668 pLists = iter->second.get(); 1669 else 1670 { 1671 LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED); 1672 iter = m_aLangTable.find(aLangTagUndetermined); 1673 if (iter != m_aLangTable.end()) 1674 pLists = iter->second.get(); 1675 else if(CreateLanguageFile(aLangTagUndetermined)) 1676 pLists = m_aLangTable.find(aLangTagUndetermined)->second.get(); 1677 } 1678 OSL_ENSURE(pLists, "No auto correction data"); 1679 return pLists && pLists->AddToCplSttExceptList(rNew); 1680 } 1681 1682 // Adds a single word. The list will immediately be written to the file! 1683 bool SvxAutoCorrect::AddWrtSttException( const OUString& rNew, 1684 LanguageType eLang ) 1685 { 1686 SvxAutoCorrectLanguageLists* pLists = nullptr; 1687 //either the right language is present or it is set in the general list 1688 auto iter = m_aLangTable.find(LanguageTag(eLang)); 1689 if (iter != m_aLangTable.end()) 1690 pLists = iter->second.get(); 1691 else 1692 { 1693 LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED); 1694 iter = m_aLangTable.find(aLangTagUndetermined); 1695 if (iter != m_aLangTable.end()) 1696 pLists = iter->second.get(); 1697 else if(CreateLanguageFile(aLangTagUndetermined)) 1698 pLists = m_aLangTable.find(aLangTagUndetermined)->second.get(); 1699 } 1700 OSL_ENSURE(pLists, "No auto correction file!"); 1701 return pLists && pLists->AddToWrdSttExceptList(rNew); 1702 } 1703 1704 OUString SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const OUString& rTxt, 1705 sal_Int32 nPos) 1706 { 1707 OUString sRet; 1708 if( !nPos ) 1709 return sRet; 1710 1711 sal_Int32 nEnd = nPos; 1712 1713 // it must be followed by a blank or tab! 1714 if( ( nPos < rTxt.getLength() && 1715 !IsWordDelim( rTxt[ nPos ])) || 1716 IsWordDelim( rTxt[ --nPos ])) 1717 return sRet; 1718 1719 while( nPos && !IsWordDelim( rTxt[ --nPos ])) 1720 ; 1721 1722 // Found a Paragraph-start or a Blank, search for the word shortcut in 1723 // auto. 1724 sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character 1725 if( !nPos && !IsWordDelim( rTxt[ 0 ])) 1726 --nCapLttrPos; // Beginning of paragraph and no Blank! 1727 1728 while( lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) ) 1729 if( ++nCapLttrPos >= nEnd ) 1730 return sRet; 1731 1732 if( 3 > nEnd - nCapLttrPos ) 1733 return sRet; 1734 1735 const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos ); 1736 1737 CharClass& rCC = GetCharClass(eLang); 1738 1739 if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnd )) 1740 return sRet; 1741 1742 sRet = rTxt.copy( nCapLttrPos, nEnd - nCapLttrPos ); 1743 return sRet; 1744 } 1745 1746 // static 1747 std::vector<OUString> SvxAutoCorrect::GetChunkForAutoText(const OUString& rTxt, 1748 const sal_Int32 nPos) 1749 { 1750 constexpr sal_Int32 nMinLen = 3; 1751 constexpr sal_Int32 nMaxLen = 9; 1752 std::vector<OUString> aRes; 1753 if (nPos >= nMinLen) 1754 { 1755 sal_Int32 nBegin = std::max<sal_Int32>(nPos - nMaxLen, 0); 1756 // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation) 1757 if (nBegin > 0 && !IsWordDelim(rTxt[nBegin-1])) 1758 { 1759 while (nBegin + nMinLen <= nPos && !IsWordDelim(rTxt[nBegin])) 1760 ++nBegin; 1761 } 1762 if (nBegin + nMinLen <= nPos) 1763 { 1764 OUString sRes = rTxt.copy(nBegin, nPos - nBegin); 1765 aRes.push_back(sRes); 1766 bool bLastStartedWithDelim = IsWordDelim(sRes[0]); 1767 for (sal_Int32 i = 1; i <= sRes.getLength() - nMinLen; ++i) 1768 { 1769 bool bAdd = bLastStartedWithDelim; 1770 bLastStartedWithDelim = IsWordDelim(sRes[i]); 1771 bAdd = bAdd || bLastStartedWithDelim; 1772 if (bAdd) 1773 aRes.push_back(sRes.copy(i)); 1774 } 1775 } 1776 } 1777 return aRes; 1778 } 1779 1780 bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile ) 1781 { 1782 OSL_ENSURE(m_aLangTable.find(rLanguageTag) == m_aLangTable.end(), "Language already exists "); 1783 1784 OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true )); 1785 OUString sShareDirFile( sUserDirFile ); 1786 1787 SvxAutoCorrectLanguageLists* pLists = nullptr; 1788 1789 tools::Time nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM ), nLastCheckTime( tools::Time::EMPTY ); 1790 1791 auto nFndPos = aLastFileTable.find(rLanguageTag); 1792 if(nFndPos != aLastFileTable.end() && 1793 (nLastCheckTime.SetTime(nFndPos->second), nLastCheckTime < nAktTime) && 1794 nAktTime - nLastCheckTime < nMinTime) 1795 { 1796 // no need to test the file, because the last check is not older then 1797 // 2 minutes. 1798 if( bNewFile ) 1799 { 1800 sShareDirFile = sUserDirFile; 1801 pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile ); 1802 LanguageTag aTmp(rLanguageTag); // this insert() needs a non-const reference 1803 m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists))); 1804 aLastFileTable.erase(nFndPos); 1805 } 1806 } 1807 else if( 1808 ( FStatHelper::IsDocument( sUserDirFile ) || 1809 FStatHelper::IsDocument( sShareDirFile = 1810 GetAutoCorrFileName( rLanguageTag ) ) || 1811 FStatHelper::IsDocument( sShareDirFile = 1812 GetAutoCorrFileName( rLanguageTag, false, false, true) ) 1813 ) || 1814 ( sShareDirFile = sUserDirFile, bNewFile ) 1815 ) 1816 { 1817 pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile ); 1818 LanguageTag aTmp(rLanguageTag); // this insert() needs a non-const reference 1819 m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists))); 1820 if (nFndPos != aLastFileTable.end()) 1821 aLastFileTable.erase(nFndPos); 1822 } 1823 else if( !bNewFile ) 1824 { 1825 aLastFileTable[rLanguageTag] = nAktTime.GetTime(); 1826 } 1827 return pLists != nullptr; 1828 } 1829 1830 bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong, 1831 LanguageType eLang ) 1832 { 1833 LanguageTag aLanguageTag( eLang); 1834 auto const iter = m_aLangTable.find(aLanguageTag); 1835 if (iter != m_aLangTable.end()) 1836 return iter->second->PutText(rShort, rLong); 1837 if(CreateLanguageFile(aLanguageTag)) 1838 return m_aLangTable.find(aLanguageTag)->second->PutText(rShort, rLong); 1839 return false; 1840 } 1841 1842 void SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, 1843 std::vector<SvxAutocorrWord>& aDeleteEntries, 1844 LanguageType eLang ) 1845 { 1846 LanguageTag aLanguageTag( eLang); 1847 auto const iter = m_aLangTable.find(aLanguageTag); 1848 if (iter != m_aLangTable.end()) 1849 { 1850 iter->second->MakeCombinedChanges( aNewEntries, aDeleteEntries ); 1851 } 1852 else if(CreateLanguageFile( aLanguageTag )) 1853 { 1854 m_aLangTable.find( aLanguageTag )->second->MakeCombinedChanges( aNewEntries, aDeleteEntries ); 1855 } 1856 } 1857 1858 // - return the replacement text (only for SWG-Format, all other 1859 // can be taken from the word list!) 1860 bool SvxAutoCorrect::GetLongText( const OUString&, OUString& ) 1861 { 1862 return false; 1863 } 1864 1865 void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& ) 1866 { 1867 } 1868 1869 // Text with attribution (only the SWG - SWG format!) 1870 bool SvxAutoCorrect::PutText( const css::uno::Reference < css::embed::XStorage >&, 1871 const OUString&, const OUString&, SfxObjectShell&, OUString& ) 1872 { 1873 return false; 1874 } 1875 1876 OUString EncryptBlockName_Imp(const OUString& rName) 1877 { 1878 OUStringBuffer aName; 1879 aName.append('#').append(rName); 1880 for (sal_Int32 nLen = rName.getLength(), nPos = 1; nPos < nLen; ++nPos) 1881 { 1882 if (lcl_IsInAsciiArr( "!/:.\\", aName[nPos])) 1883 aName[nPos] &= 0x0f; 1884 } 1885 return aName.makeStringAndClear(); 1886 } 1887 1888 /* This code is copied from SwXMLTextBlocks::GeneratePackageName */ 1889 static void GeneratePackageName ( std::u16string_view rShort, OUString& rPackageName ) 1890 { 1891 OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7)); 1892 OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US)); 1893 1894 for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos) 1895 { 1896 switch (aBuf[nPos]) 1897 { 1898 case '!': 1899 case '/': 1900 case ':': 1901 case '.': 1902 case '\\': 1903 aBuf[nPos] = '_'; 1904 break; 1905 default: 1906 break; 1907 } 1908 } 1909 1910 rPackageName = aBuf.makeStringAndClear(); 1911 } 1912 1913 static const SvxAutocorrWord* lcl_SearchWordsInList( 1914 SvxAutoCorrectLanguageLists* pList, const OUString& rTxt, 1915 sal_Int32& rStt, sal_Int32 nEndPos) 1916 { 1917 const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList(); 1918 return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos ); 1919 } 1920 1921 // the search for the words in the substitution table 1922 const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList( 1923 const OUString& rTxt, sal_Int32& rStt, sal_Int32 nEndPos, 1924 SvxAutoCorrDoc&, LanguageTag& rLang ) 1925 { 1926 const SvxAutocorrWord* pRet = nullptr; 1927 LanguageTag aLanguageTag( rLang); 1928 if( aLanguageTag.isSystemLocale() ) 1929 aLanguageTag.reset( MsLangId::getSystemLanguage()); 1930 1931 /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback 1932 * list instead? */ 1933 1934 // First search for eLang, then US-English -> English 1935 // and last in LANGUAGE_UNDETERMINED 1936 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) 1937 { 1938 //the language is available - so bring it on 1939 std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second; 1940 pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos ); 1941 if( pRet ) 1942 { 1943 rLang = aLanguageTag; 1944 return pRet; 1945 } 1946 else 1947 return nullptr; 1948 } 1949 1950 // If it still could not be found here, then keep on searching 1951 LanguageType eLang = aLanguageTag.getLanguageType(); 1952 // the primary language for example EN 1953 aLanguageTag.reset(aLanguageTag.getLanguage()); 1954 LanguageType nTmpKey = aLanguageTag.getLanguageType(false); 1955 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED && 1956 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || 1957 CreateLanguageFile(aLanguageTag, false))) 1958 { 1959 //the language is available - so bring it on 1960 std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second; 1961 pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos ); 1962 if( pRet ) 1963 { 1964 rLang = aLanguageTag; 1965 return pRet; 1966 } 1967 } 1968 1969 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() || 1970 CreateLanguageFile(aLanguageTag, false)) 1971 { 1972 //the language is available - so bring it on 1973 std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second; 1974 pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos ); 1975 if( pRet ) 1976 { 1977 rLang = aLanguageTag; 1978 return pRet; 1979 } 1980 } 1981 return nullptr; 1982 } 1983 1984 bool SvxAutoCorrect::FindInWrdSttExceptList( LanguageType eLang, 1985 const OUString& sWord ) 1986 { 1987 LanguageTag aLanguageTag( eLang); 1988 1989 /* TODO-BCP47: again horrible ugliness */ 1990 1991 // First search for eLang, then primary language of eLang 1992 // and last in LANGUAGE_UNDETERMINED 1993 1994 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) 1995 { 1996 //the language is available - so bring it on 1997 auto const& pList = m_aLangTable.find(aLanguageTag)->second; 1998 if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() ) 1999 return true; 2000 } 2001 2002 // If it still could not be found here, then keep on searching 2003 // the primary language for example EN 2004 aLanguageTag.reset(aLanguageTag.getLanguage()); 2005 LanguageType nTmpKey = aLanguageTag.getLanguageType(false); 2006 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED && 2007 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || 2008 CreateLanguageFile(aLanguageTag, false))) 2009 { 2010 //the language is available - so bring it on 2011 auto const& pList = m_aLangTable.find(aLanguageTag)->second; 2012 if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() ) 2013 return true; 2014 } 2015 2016 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() || 2017 CreateLanguageFile(aLanguageTag, false)) 2018 { 2019 //the language is available - so bring it on 2020 auto const& pList = m_aLangTable.find(aLanguageTag)->second; 2021 if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() ) 2022 return true; 2023 } 2024 return false; 2025 } 2026 2027 static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord) 2028 { 2029 SvStringsISortDtor::const_iterator it = pList->find( "~" ); 2030 SvStringsISortDtor::size_type nPos = it - pList->begin(); 2031 if( nPos < pList->size() ) 2032 { 2033 OUString sLowerWord(sWord.toAsciiLowerCase()); 2034 OUString sAbr; 2035 for( SvStringsISortDtor::size_type n = nPos; n < pList->size(); ++n ) 2036 { 2037 sAbr = (*pList)[ n ]; 2038 if (sAbr[0] != '~') 2039 break; 2040 // ~ and ~. are not allowed! 2041 if( 2 < sAbr.getLength() && sAbr.getLength() - 1 <= sWord.getLength() ) 2042 { 2043 OUString sLowerAbk(sAbr.toAsciiLowerCase()); 2044 for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;) 2045 { 2046 if( !--i ) // agrees 2047 return true; 2048 2049 if( sLowerAbk[i] != sLowerWord[--ii]) 2050 break; 2051 } 2052 } 2053 } 2054 } 2055 OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ), 2056 "Wrongly sorted exception list?" ); 2057 return false; 2058 } 2059 2060 bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang, 2061 const OUString& sWord, bool bAbbreviation) 2062 { 2063 LanguageTag aLanguageTag( eLang); 2064 2065 /* TODO-BCP47: did I mention terrible horrible ugliness? */ 2066 2067 // First search for eLang, then primary language of eLang 2068 // and last in LANGUAGE_UNDETERMINED 2069 2070 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) 2071 { 2072 //the language is available - so bring it on 2073 const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList(); 2074 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() ) 2075 return true; 2076 } 2077 2078 // If it still could not be found here, then keep on searching 2079 // the primary language for example EN 2080 aLanguageTag.reset(aLanguageTag.getLanguage()); 2081 LanguageType nTmpKey = aLanguageTag.getLanguageType(false); 2082 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED && 2083 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || 2084 CreateLanguageFile(aLanguageTag, false))) 2085 { 2086 //the language is available - so bring it on 2087 const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList(); 2088 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() ) 2089 return true; 2090 } 2091 2092 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() || 2093 CreateLanguageFile(aLanguageTag, false)) 2094 { 2095 //the language is available - so bring it on 2096 const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList(); 2097 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() ) 2098 return true; 2099 } 2100 return false; 2101 } 2102 2103 OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag, 2104 bool bNewFile, bool bTst, bool bUnlocalized ) const 2105 { 2106 OUString sRet, sExt( rLanguageTag.getBcp47() ); 2107 if (bUnlocalized) 2108 { 2109 // we don't want variant, so we'll take "fr" instead of "fr-CA" for example 2110 std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false); 2111 if (!vecFallBackStrings.empty()) 2112 sExt = vecFallBackStrings[0]; 2113 } 2114 2115 sExt = "_" + sExt + ".dat"; 2116 if( bNewFile ) 2117 sRet = sUserAutoCorrFile + sExt; 2118 else if( !bTst ) 2119 sRet = sShareAutoCorrFile + sExt; 2120 else 2121 { 2122 // test first in the user directory - if not exist, then 2123 sRet = sUserAutoCorrFile + sExt; 2124 if( !FStatHelper::IsDocument( sRet )) 2125 sRet = sShareAutoCorrFile + sExt; 2126 } 2127 return sRet; 2128 } 2129 2130 SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists( 2131 SvxAutoCorrect& rParent, 2132 const OUString& rShareAutoCorrectFile, 2133 const OUString& rUserAutoCorrectFile) 2134 : sShareAutoCorrFile( rShareAutoCorrectFile ), 2135 sUserAutoCorrFile( rUserAutoCorrectFile ), 2136 aModifiedDate( Date::EMPTY ), 2137 aModifiedTime( tools::Time::EMPTY ), 2138 aLastCheckTime( tools::Time::EMPTY ), 2139 rAutoCorrect(rParent), 2140 nFlags(ACFlags::NONE) 2141 { 2142 } 2143 2144 SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists() 2145 { 2146 } 2147 2148 bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp() 2149 { 2150 // Access the file system only every 2 minutes to check the date stamp 2151 bool bRet = false; 2152 2153 tools::Time nMinTime( 0, 2 ); 2154 tools::Time nAktTime( tools::Time::SYSTEM ); 2155 if( aLastCheckTime <= nAktTime) // overflow? 2156 return false; 2157 nAktTime -= aLastCheckTime; 2158 if( nAktTime > nMinTime ) // min time past 2159 { 2160 Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY ); 2161 if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile, 2162 &aTstDate, &aTstTime ) && 2163 ( aModifiedDate != aTstDate || aModifiedTime != aTstTime )) 2164 { 2165 bRet = true; 2166 // then remove all the lists fast! 2167 if( (ACFlags::CplSttLstLoad & nFlags) && pCplStt_ExcptLst ) 2168 { 2169 pCplStt_ExcptLst.reset(); 2170 } 2171 if( (ACFlags::WrdSttLstLoad & nFlags) && pWrdStt_ExcptLst ) 2172 { 2173 pWrdStt_ExcptLst.reset(); 2174 } 2175 if( (ACFlags::ChgWordLstLoad & nFlags) && pAutocorr_List ) 2176 { 2177 pAutocorr_List.reset(); 2178 } 2179 nFlags &= ~ACFlags(ACFlags::CplSttLstLoad | ACFlags::WrdSttLstLoad | ACFlags::ChgWordLstLoad ); 2180 } 2181 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2182 } 2183 return bRet; 2184 } 2185 2186 void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp( 2187 std::unique_ptr<SvStringsISortDtor>& rpLst, 2188 const OUString& sStrmName, 2189 tools::SvRef<SotStorage>& rStg) 2190 { 2191 if( rpLst ) 2192 rpLst->clear(); 2193 else 2194 rpLst.reset( new SvStringsISortDtor ); 2195 2196 { 2197 if( rStg.is() && rStg->IsStream( sStrmName ) ) 2198 { 2199 tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName, 2200 ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) ); 2201 if( ERRCODE_NONE != xStrm->GetError()) 2202 { 2203 xStrm.clear(); 2204 rStg.clear(); 2205 RemoveStream_Imp( sStrmName ); 2206 } 2207 else 2208 { 2209 uno::Reference< uno::XComponentContext > xContext = 2210 comphelper::getProcessComponentContext(); 2211 2212 xml::sax::InputSource aParserInput; 2213 aParserInput.sSystemId = sStrmName; 2214 2215 xStrm->Seek( 0 ); 2216 xStrm->SetBufferSize( 8 * 1024 ); 2217 aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm ); 2218 2219 // get filter 2220 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLExceptionListImport ( xContext, *rpLst ); 2221 2222 // connect parser and filter 2223 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create( xContext ); 2224 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler; 2225 xParser->setFastDocumentHandler( xFilter ); 2226 xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE ); 2227 xParser->setTokenHandler( xTokenHandler ); 2228 2229 // parse 2230 try 2231 { 2232 xParser->parseStream( aParserInput ); 2233 } 2234 catch( const xml::sax::SAXParseException& ) 2235 { 2236 // re throw ? 2237 } 2238 catch( const xml::sax::SAXException& ) 2239 { 2240 // re throw ? 2241 } 2242 catch( const io::IOException& ) 2243 { 2244 // re throw ? 2245 } 2246 } 2247 } 2248 2249 // Set time stamp 2250 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile, 2251 &aModifiedDate, &aModifiedTime ); 2252 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2253 } 2254 2255 } 2256 2257 void SvxAutoCorrectLanguageLists::SaveExceptList_Imp( 2258 const SvStringsISortDtor& rLst, 2259 const OUString& sStrmName, 2260 tools::SvRef<SotStorage> const &rStg, 2261 bool bConvert ) 2262 { 2263 if( !rStg.is() ) 2264 return; 2265 2266 if( rLst.empty() ) 2267 { 2268 rStg->Remove( sStrmName ); 2269 rStg->Commit(); 2270 } 2271 else 2272 { 2273 tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName, 2274 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) ); 2275 if( xStrm.is() ) 2276 { 2277 xStrm->SetSize( 0 ); 2278 xStrm->SetBufferSize( 8192 ); 2279 xStrm->SetProperty( "MediaType", Any(OUString( "text/xml" )) ); 2280 2281 2282 uno::Reference< uno::XComponentContext > xContext = 2283 comphelper::getProcessComponentContext(); 2284 2285 uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext); 2286 uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm ); 2287 xWriter->setOutputStream(xOut); 2288 2289 uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW); 2290 rtl::Reference< SvXMLExceptionListExport > xExp( new SvXMLExceptionListExport( xContext, rLst, sStrmName, xHandler ) ); 2291 2292 xExp->exportDoc( XML_BLOCK_LIST ); 2293 2294 xStrm->Commit(); 2295 if( xStrm->GetError() == ERRCODE_NONE ) 2296 { 2297 xStrm.clear(); 2298 if (!bConvert) 2299 { 2300 rStg->Commit(); 2301 if( ERRCODE_NONE != rStg->GetError() ) 2302 { 2303 rStg->Remove( sStrmName ); 2304 rStg->Commit(); 2305 } 2306 } 2307 } 2308 } 2309 } 2310 } 2311 2312 SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList() 2313 { 2314 if( pAutocorr_List ) 2315 pAutocorr_List->DeleteAndDestroyAll(); 2316 else 2317 pAutocorr_List.reset( new SvxAutocorrWordList() ); 2318 2319 try 2320 { 2321 uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ ); 2322 uno::Reference < io::XStream > xStrm = xStg->openStreamElement( pXMLImplAutocorr_ListStr, embed::ElementModes::READ ); 2323 uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext(); 2324 2325 xml::sax::InputSource aParserInput; 2326 aParserInput.sSystemId = pXMLImplAutocorr_ListStr; 2327 aParserInput.aInputStream = xStrm->getInputStream(); 2328 2329 // get parser 2330 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext); 2331 SAL_INFO("editeng", "AutoCorrect Import" ); 2332 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List.get(), rAutoCorrect, xStg ); 2333 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler; 2334 2335 // connect parser and filter 2336 xParser->setFastDocumentHandler( xFilter ); 2337 xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE ); 2338 xParser->setTokenHandler(xTokenHandler); 2339 2340 // parse 2341 xParser->parseStream( aParserInput ); 2342 } 2343 catch ( const uno::Exception& ) 2344 { 2345 TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile); 2346 } 2347 2348 // Set time stamp 2349 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile, 2350 &aModifiedDate, &aModifiedTime ); 2351 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2352 2353 return pAutocorr_List.get(); 2354 } 2355 2356 const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList() 2357 { 2358 if( !( ACFlags::ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() ) 2359 { 2360 LoadAutocorrWordList(); 2361 if( !pAutocorr_List ) 2362 { 2363 OSL_ENSURE( false, "No valid list" ); 2364 pAutocorr_List.reset( new SvxAutocorrWordList() ); 2365 } 2366 nFlags |= ACFlags::ChgWordLstLoad; 2367 } 2368 return pAutocorr_List.get(); 2369 } 2370 2371 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList() 2372 { 2373 if( !( ACFlags::CplSttLstLoad & nFlags ) || IsFileChanged_Imp() ) 2374 { 2375 LoadCplSttExceptList(); 2376 if( !pCplStt_ExcptLst ) 2377 { 2378 OSL_ENSURE( false, "No valid list" ); 2379 pCplStt_ExcptLst.reset( new SvStringsISortDtor ); 2380 } 2381 nFlags |= ACFlags::CplSttLstLoad; 2382 } 2383 return pCplStt_ExcptLst.get(); 2384 } 2385 2386 bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew) 2387 { 2388 bool bRet = false; 2389 if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second ) 2390 { 2391 MakeUserStorage_Impl(); 2392 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2393 2394 SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg ); 2395 2396 xStg = nullptr; 2397 // Set time stamp 2398 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, 2399 &aModifiedDate, &aModifiedTime ); 2400 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2401 bRet = true; 2402 } 2403 return bRet; 2404 } 2405 2406 bool SvxAutoCorrectLanguageLists::AddToWrdSttExceptList(const OUString& rNew) 2407 { 2408 bool bRet = false; 2409 SvStringsISortDtor* pExceptList = LoadWrdSttExceptList(); 2410 if( !rNew.isEmpty() && pExceptList && pExceptList->insert( rNew ).second ) 2411 { 2412 MakeUserStorage_Impl(); 2413 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2414 2415 SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg ); 2416 2417 xStg = nullptr; 2418 // Set time stamp 2419 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, 2420 &aModifiedDate, &aModifiedTime ); 2421 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2422 bRet = true; 2423 } 2424 return bRet; 2425 } 2426 2427 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList() 2428 { 2429 try 2430 { 2431 tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE ); 2432 if( xStg.is() && xStg->IsContained( pXMLImplCplStt_ExcptLstStr ) ) 2433 LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg ); 2434 } 2435 catch (const css::ucb::ContentCreationException&) 2436 { 2437 } 2438 return pCplStt_ExcptLst.get(); 2439 } 2440 2441 void SvxAutoCorrectLanguageLists::SaveCplSttExceptList() 2442 { 2443 MakeUserStorage_Impl(); 2444 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2445 2446 SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg ); 2447 2448 xStg = nullptr; 2449 2450 // Set time stamp 2451 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, 2452 &aModifiedDate, &aModifiedTime ); 2453 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2454 } 2455 2456 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWrdSttExceptList() 2457 { 2458 try 2459 { 2460 tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE ); 2461 if( xStg.is() && xStg->IsContained( pXMLImplWrdStt_ExcptLstStr ) ) 2462 LoadXMLExceptList_Imp( pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg ); 2463 } 2464 catch (const css::ucb::ContentCreationException &) 2465 { 2466 TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWrdSttExceptList"); 2467 } 2468 return pWrdStt_ExcptLst.get(); 2469 } 2470 2471 void SvxAutoCorrectLanguageLists::SaveWrdSttExceptList() 2472 { 2473 MakeUserStorage_Impl(); 2474 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2475 2476 SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg ); 2477 2478 xStg = nullptr; 2479 // Set time stamp 2480 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, 2481 &aModifiedDate, &aModifiedTime ); 2482 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2483 } 2484 2485 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWrdSttExceptList() 2486 { 2487 if( !( ACFlags::WrdSttLstLoad & nFlags ) || IsFileChanged_Imp() ) 2488 { 2489 LoadWrdSttExceptList(); 2490 if( !pWrdStt_ExcptLst ) 2491 { 2492 OSL_ENSURE( false, "No valid list" ); 2493 pWrdStt_ExcptLst.reset( new SvStringsISortDtor ); 2494 } 2495 nFlags |= ACFlags::WrdSttLstLoad; 2496 } 2497 return pWrdStt_ExcptLst.get(); 2498 } 2499 2500 void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName ) 2501 { 2502 if( sShareAutoCorrFile != sUserAutoCorrFile ) 2503 { 2504 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2505 if( xStg.is() && ERRCODE_NONE == xStg->GetError() && 2506 xStg->IsStream( rName ) ) 2507 { 2508 xStg->Remove( rName ); 2509 xStg->Commit(); 2510 2511 xStg = nullptr; 2512 } 2513 } 2514 } 2515 2516 void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl() 2517 { 2518 // The conversion needs to happen if the file is already in the user 2519 // directory and is in the old format. Additionally it needs to 2520 // happen when the file is being copied from share to user. 2521 2522 bool bError = false, bConvert = false, bCopy = false; 2523 INetURLObject aDest; 2524 INetURLObject aSource; 2525 2526 if (sUserAutoCorrFile != sShareAutoCorrFile ) 2527 { 2528 aSource = INetURLObject ( sShareAutoCorrFile ); 2529 aDest = INetURLObject ( sUserAutoCorrFile ); 2530 if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) ) 2531 { 2532 aDest.SetExtension ( u"bak" ); 2533 bConvert = true; 2534 } 2535 bCopy = true; 2536 } 2537 else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) ) 2538 { 2539 aSource = INetURLObject ( sUserAutoCorrFile ); 2540 aDest = INetURLObject ( sUserAutoCorrFile ); 2541 aDest.SetExtension ( u"bak" ); 2542 bCopy = bConvert = true; 2543 } 2544 if (bCopy) 2545 { 2546 try 2547 { 2548 OUString sMain(aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri )); 2549 sal_Int32 nSlashPos = sMain.lastIndexOf('/'); 2550 sMain = sMain.copy(0, nSlashPos); 2551 ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() ); 2552 TransferInfo aInfo; 2553 aInfo.NameClash = NameClash::OVERWRITE; 2554 aInfo.NewTitle = aDest.GetLastName(); 2555 aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ); 2556 aInfo.MoveData = false; 2557 aNewContent.executeCommand( "transfer", Any(aInfo)); 2558 } 2559 catch (...) 2560 { 2561 bError = true; 2562 } 2563 } 2564 if (bConvert && !bError) 2565 { 2566 tools::SvRef<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), StreamMode::READ ); 2567 tools::SvRef<SotStorage> xDstStg = new SotStorage( sUserAutoCorrFile, StreamMode::WRITE ); 2568 2569 if( xSrcStg.is() && xDstStg.is() ) 2570 { 2571 std::unique_ptr<SvStringsISortDtor> pTmpWordList; 2572 2573 if (xSrcStg->IsContained( pXMLImplWrdStt_ExcptLstStr ) ) 2574 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xSrcStg ); 2575 2576 if (pTmpWordList) 2577 { 2578 SaveExceptList_Imp( *pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xDstStg, true ); 2579 pTmpWordList.reset(); 2580 } 2581 2582 2583 if (xSrcStg->IsContained( pXMLImplCplStt_ExcptLstStr ) ) 2584 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg ); 2585 2586 if (pTmpWordList) 2587 { 2588 SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true ); 2589 pTmpWordList->clear(); 2590 } 2591 2592 GetAutocorrWordList(); 2593 MakeBlocklist_Imp( *xDstStg ); 2594 sShareAutoCorrFile = sUserAutoCorrFile; 2595 xDstStg = nullptr; 2596 try 2597 { 2598 ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() ); 2599 aContent.executeCommand ( "delete", makeAny ( true ) ); 2600 } 2601 catch (...) 2602 { 2603 } 2604 } 2605 } 2606 else if( bCopy && !bError ) 2607 sShareAutoCorrFile = sUserAutoCorrFile; 2608 } 2609 2610 bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg ) 2611 { 2612 bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty(); 2613 if( !bRemove ) 2614 { 2615 tools::SvRef<SotStorageStream> refList = rStg.OpenSotStream( pXMLImplAutocorr_ListStr, 2616 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) ); 2617 if( refList.is() ) 2618 { 2619 refList->SetSize( 0 ); 2620 refList->SetBufferSize( 8192 ); 2621 refList->SetProperty( "MediaType", Any(OUString( "text/xml" )) ); 2622 2623 uno::Reference< uno::XComponentContext > xContext = 2624 comphelper::getProcessComponentContext(); 2625 2626 uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext); 2627 uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList ); 2628 xWriter->setOutputStream(xOut); 2629 2630 rtl::Reference< SvXMLAutoCorrectExport > xExp( new SvXMLAutoCorrectExport( xContext, pAutocorr_List.get(), pXMLImplAutocorr_ListStr, xWriter ) ); 2631 2632 xExp->exportDoc( XML_BLOCK_LIST ); 2633 2634 refList->Commit(); 2635 bRet = ERRCODE_NONE == refList->GetError(); 2636 if( bRet ) 2637 { 2638 refList.clear(); 2639 rStg.Commit(); 2640 if( ERRCODE_NONE != rStg.GetError() ) 2641 { 2642 bRemove = true; 2643 bRet = false; 2644 } 2645 } 2646 } 2647 else 2648 bRet = false; 2649 } 2650 2651 if( bRemove ) 2652 { 2653 rStg.Remove( pXMLImplAutocorr_ListStr ); 2654 rStg.Commit(); 2655 } 2656 2657 return bRet; 2658 } 2659 2660 bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries ) 2661 { 2662 // First get the current list! 2663 GetAutocorrWordList(); 2664 2665 MakeUserStorage_Impl(); 2666 tools::SvRef<SotStorage> xStorage = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2667 2668 bool bRet = xStorage.is() && ERRCODE_NONE == xStorage->GetError(); 2669 2670 if( bRet ) 2671 { 2672 for (SvxAutocorrWord & aWordToDelete : aDeleteEntries) 2673 { 2674 std::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete ); 2675 if( xFoundEntry ) 2676 { 2677 if( !xFoundEntry->IsTextOnly() ) 2678 { 2679 OUString aName( aWordToDelete.GetShort() ); 2680 if (xStorage->IsOLEStorage()) 2681 aName = EncryptBlockName_Imp(aName); 2682 else 2683 GeneratePackageName ( aWordToDelete.GetShort(), aName ); 2684 2685 if( xStorage->IsContained( aName ) ) 2686 { 2687 xStorage->Remove( aName ); 2688 bRet = xStorage->Commit(); 2689 } 2690 } 2691 } 2692 } 2693 2694 for (const SvxAutocorrWord & aNewEntrie : aNewEntries) 2695 { 2696 SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true ); 2697 std::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd ); 2698 if( xRemoved ) 2699 { 2700 if( !xRemoved->IsTextOnly() ) 2701 { 2702 // Still have to remove the Storage 2703 OUString sStorageName( aWordToAdd.GetShort() ); 2704 if (xStorage->IsOLEStorage()) 2705 sStorageName = EncryptBlockName_Imp(sStorageName); 2706 else 2707 GeneratePackageName ( aWordToAdd.GetShort(), sStorageName); 2708 2709 if( xStorage->IsContained( sStorageName ) ) 2710 xStorage->Remove( sStorageName ); 2711 } 2712 } 2713 bRet = pAutocorr_List->Insert( std::move(aWordToAdd) ); 2714 2715 if ( !bRet ) 2716 { 2717 break; 2718 } 2719 } 2720 2721 if ( bRet ) 2722 { 2723 bRet = MakeBlocklist_Imp( *xStorage ); 2724 } 2725 } 2726 return bRet; 2727 } 2728 2729 bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong ) 2730 { 2731 // First get the current list! 2732 GetAutocorrWordList(); 2733 2734 MakeUserStorage_Impl(); 2735 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2736 2737 bool bRet = xStg.is() && ERRCODE_NONE == xStg->GetError(); 2738 2739 // Update the word list 2740 if( bRet ) 2741 { 2742 SvxAutocorrWord aNew(rShort, rLong, true ); 2743 std::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew ); 2744 if( xRemove ) 2745 { 2746 if( !xRemove->IsTextOnly() ) 2747 { 2748 // Still have to remove the Storage 2749 OUString sStgNm( rShort ); 2750 if (xStg->IsOLEStorage()) 2751 sStgNm = EncryptBlockName_Imp(sStgNm); 2752 else 2753 GeneratePackageName ( rShort, sStgNm); 2754 2755 if( xStg->IsContained( sStgNm ) ) 2756 xStg->Remove( sStgNm ); 2757 } 2758 } 2759 2760 if( pAutocorr_List->Insert( std::move(aNew) ) ) 2761 { 2762 bRet = MakeBlocklist_Imp( *xStg ); 2763 xStg = nullptr; 2764 } 2765 else 2766 { 2767 bRet = false; 2768 } 2769 } 2770 return bRet; 2771 } 2772 2773 void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, 2774 SfxObjectShell& rShell ) 2775 { 2776 // First get the current list! 2777 GetAutocorrWordList(); 2778 2779 MakeUserStorage_Impl(); 2780 2781 try 2782 { 2783 uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE ); 2784 OUString sLong; 2785 bool bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong ); 2786 xStg = nullptr; 2787 2788 // Update the word list 2789 if( bRet ) 2790 { 2791 if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) ) 2792 { 2793 tools::SvRef<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2794 MakeBlocklist_Imp( *xStor ); 2795 } 2796 } 2797 } 2798 catch ( const uno::Exception& ) 2799 { 2800 } 2801 } 2802 2803 // Keep the list sorted ... 2804 struct SvxAutocorrWordList::CompareSvxAutocorrWordList 2805 { 2806 bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const 2807 { 2808 CollatorWrapper& rCmp = ::GetCollatorWrapper(); 2809 return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0; 2810 } 2811 }; 2812 2813 namespace { 2814 2815 typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType; 2816 2817 } 2818 2819 struct SvxAutocorrWordList::Impl 2820 { 2821 2822 // only one of these contains the data 2823 // maSortedVector is manually sorted so we can optimise data movement 2824 mutable AutocorrWordSetType maSortedVector; 2825 mutable AutocorrWordHashType maHash; // key is 'Short' 2826 2827 void DeleteAndDestroyAll() 2828 { 2829 maHash.clear(); 2830 maSortedVector.clear(); 2831 } 2832 }; 2833 2834 SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {} 2835 2836 SvxAutocorrWordList::~SvxAutocorrWordList() 2837 { 2838 } 2839 2840 void SvxAutocorrWordList::DeleteAndDestroyAll() 2841 { 2842 mpImpl->DeleteAndDestroyAll(); 2843 } 2844 2845 // returns true if inserted 2846 const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const 2847 { 2848 if ( mpImpl->maSortedVector.empty() ) // use the hash 2849 { 2850 OUString aShort = aWord.GetShort(); 2851 auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) ); 2852 if (inserted) 2853 return &(it->second); 2854 return nullptr; 2855 } 2856 else 2857 { 2858 auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList()); 2859 CollatorWrapper& rCmp = ::GetCollatorWrapper(); 2860 if (it == mpImpl->maSortedVector.end() || rCmp.compareString( aWord.GetShort(), it->GetShort() ) != 0) 2861 { 2862 it = mpImpl->maSortedVector.insert(it, std::move(aWord)); 2863 return &*it; 2864 } 2865 return nullptr; 2866 } 2867 } 2868 2869 void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt) 2870 { 2871 (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt )); 2872 } 2873 2874 bool SvxAutocorrWordList::empty() const 2875 { 2876 return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty(); 2877 } 2878 2879 std::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord *pWord) 2880 { 2881 2882 if ( mpImpl->maSortedVector.empty() ) // use the hash 2883 { 2884 AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() ); 2885 if( it != mpImpl->maHash.end() ) 2886 { 2887 SvxAutocorrWord pMatch = std::move(it->second); 2888 mpImpl->maHash.erase (it); 2889 return pMatch; 2890 } 2891 } 2892 else 2893 { 2894 auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList()); 2895 if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it)) 2896 { 2897 SvxAutocorrWord pMatch = std::move(*it); 2898 mpImpl->maSortedVector.erase (it); 2899 return pMatch; 2900 } 2901 } 2902 return std::optional<SvxAutocorrWord>(); 2903 } 2904 2905 // return the sorted contents - defer sorting until we have to. 2906 const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const 2907 { 2908 // convert from hash to set permanently 2909 if ( mpImpl->maSortedVector.empty() ) 2910 { 2911 std::vector<SvxAutocorrWord> tmp; 2912 tmp.reserve(mpImpl->maHash.size()); 2913 for (auto & rPair : mpImpl->maHash) 2914 tmp.emplace_back(std::move(rPair.second)); 2915 mpImpl->maHash.clear(); 2916 // sort twice - this gets the list into mostly-sorted order, which 2917 // reduces the number of times we need to invoke the expensive ICU collate fn. 2918 std::sort(tmp.begin(), tmp.end(), 2919 [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) 2920 { 2921 return lhs.GetShort() < rhs.GetShort(); 2922 }); 2923 // This beast has some O(N log(N)) in a terribly slow ICU collate fn. 2924 // stable_sort is twice as fast as sort in this situation because it does 2925 // fewer comparison operations. 2926 std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList()); 2927 mpImpl->maSortedVector = std::move(tmp); 2928 } 2929 return mpImpl->maSortedVector; 2930 } 2931 2932 const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd, 2933 const OUString &rTxt, 2934 sal_Int32 &rStt, 2935 sal_Int32 nEndPos) const 2936 { 2937 const OUString& rChk = pFnd->GetShort(); 2938 2939 sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern? 2940 sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern? 2941 sal_Int32 nSttWdPos = nEndPos; 2942 2943 // direct replacement of keywords surrounded by colons (for example, ":name:") 2944 bool bColonNameColon = rTxt.getLength() > nEndPos && 2945 rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":"); 2946 if ( nEndPos + (bColonNameColon ? 1 : 0) >= rChk.getLength() - left_wildcard - right_wildcard ) 2947 { 2948 2949 bool bWasWordDelim = false; 2950 sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard; 2951 if (bColonNameColon) 2952 nCalcStt++; 2953 if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon || 2954 ( nCalcStt < rStt && 2955 IsWordDelim( rTxt[ nCalcStt - 1 ] ))) ) 2956 { 2957 TransliterationWrapper& rCmp = GetIgnoreTranslWrapper(); 2958 OUString sWord = rTxt.copy(nCalcStt, rChk.getLength() - left_wildcard); 2959 if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) )) 2960 { 2961 rStt = nCalcStt; 2962 if (!left_wildcard) 2963 { 2964 // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14 2965 if (rTxt.getLength() > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1) 2966 return nullptr; 2967 return pFnd; 2968 } 2969 // get the first word delimiter position before the matching ".*word" pattern 2970 while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ]))) 2971 ; 2972 if (bWasWordDelim) rStt++; 2973 OUString left_pattern = rTxt.copy(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard); 2974 // avoid double spaces before simple "word" replacement 2975 left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().copy(1) : pFnd->GetLong(); 2976 if( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(rTxt.copy(rStt, nEndPos - rStt), left_pattern) ) ) 2977 return pNew; 2978 } 2979 } else 2980 // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support 2981 if ( right_wildcard ) 2982 { 2983 2984 OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) ); 2985 // Get the last word delimiter position 2986 bool not_suffix; 2987 2988 while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]))) 2989 ; 2990 // search the first occurrence (with a left word delimitation, if needed) 2991 sal_Int32 nFndPos = -1; 2992 do { 2993 nFndPos = rTxt.indexOf( sTmp, nFndPos + 1); 2994 if (nFndPos == -1) 2995 break; 2996 not_suffix = bWasWordDelim && (nSttWdPos >= (nFndPos + sTmp.getLength())); 2997 } while ( (!left_wildcard && nFndPos && !IsWordDelim( rTxt[ nFndPos - 1 ])) || not_suffix ); 2998 2999 if ( nFndPos != -1 ) 3000 { 3001 sal_Int32 extra_repl = nFndPos + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:" 3002 3003 if ( left_wildcard ) 3004 { 3005 // get the first word delimiter position before the matching ".*word.*" pattern 3006 while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ]))) 3007 ; 3008 if (bWasWordDelim) nFndPos++; 3009 } 3010 if (nEndPos + extra_repl <= nFndPos) 3011 { 3012 return nullptr; 3013 } 3014 // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations" 3015 OUString aShort = rTxt.copy(nFndPos, nEndPos - nFndPos + extra_repl); 3016 3017 OUString aLong; 3018 rStt = nFndPos; 3019 if ( !left_wildcard ) 3020 { 3021 sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength(); 3022 aLong = pFnd->GetLong() + (siz > 0 ? rTxt.copy(nFndPos + sTmp.getLength(), siz) : ""); 3023 } else { 3024 OUStringBuffer buf; 3025 do { 3026 nSttWdPos = rTxt.indexOf( sTmp, nFndPos); 3027 if (nSttWdPos != -1) 3028 { 3029 sal_Int32 nTmp(nFndPos); 3030 while (nTmp < nSttWdPos && !IsWordDelim(rTxt[nTmp])) 3031 nTmp++; 3032 if (nTmp < nSttWdPos) 3033 break; // word delimiter found 3034 buf.append(rTxt.subView(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong()); 3035 nFndPos = nSttWdPos + sTmp.getLength(); 3036 } 3037 } while (nSttWdPos != -1); 3038 if (nEndPos - nFndPos > extra_repl) 3039 buf.append(rTxt.subView(nFndPos, nEndPos - nFndPos)); 3040 aLong = buf.makeStringAndClear(); 3041 } 3042 if ( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(aShort, aLong) ) ) 3043 { 3044 if ( (rTxt.getLength() > nEndPos && IsWordDelim(rTxt[nEndPos])) || rTxt.getLength() == nEndPos ) 3045 return pNew; 3046 } 3047 } 3048 } 3049 } 3050 return nullptr; 3051 } 3052 3053 const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(const OUString& rTxt, sal_Int32& rStt, 3054 sal_Int32 nEndPos) const 3055 { 3056 for (auto const& elem : mpImpl->maHash) 3057 { 3058 if( const SvxAutocorrWord *pTmp = WordMatches( &elem.second, rTxt, rStt, nEndPos ) ) 3059 return pTmp; 3060 } 3061 3062 for (auto const& elem : mpImpl->maSortedVector) 3063 { 3064 if( const SvxAutocorrWord *pTmp = WordMatches( &elem, rTxt, rStt, nEndPos ) ) 3065 return pTmp; 3066 } 3067 return nullptr; 3068 } 3069 3070 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ 3071
