1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 2 /* 3 * This file is part of the LibreOffice project. 4 * 5 * This Source Code Form is subject to the terms of the Mozilla Public 6 * License, v. 2.0. If a copy of the MPL was not distributed with this 7 * file, You can obtain one at http://mozilla.org/MPL/2.0/. 8 * 9 * This file incorporates work covered by the following license notice: 10 * 11 * Licensed to the Apache Software Foundation (ASF) under one or more 12 * contributor license agreements. See the NOTICE file distributed 13 * with this work for additional information regarding copyright 14 * ownership. The ASF licenses this file to you under the Apache 15 * License, Version 2.0 (the "License"); you may not use this file 16 * except in compliance with the License. You may obtain a copy of 17 * the License at http://www.apache.org/licenses/LICENSE-2.0 . 18 */ 19 20 #include <memory> 21 #include <string_view> 22 #include <sal/config.h> 23 24 #include <com/sun/star/linguistic2/XSpellChecker1.hpp> 25 #include <com/sun/star/embed/XStorage.hpp> 26 #include <com/sun/star/io/IOException.hpp> 27 #include <com/sun/star/io/XStream.hpp> 28 #include <tools/urlobj.hxx> 29 #include <i18nlangtag/mslangid.hxx> 30 #include <i18nutil/transliteration.hxx> 31 #include <sal/log.hxx> 32 #include <osl/diagnose.h> 33 #include <vcl/svapp.hxx> 34 #include <vcl/settings.hxx> 35 #include <svl/fstathelper.hxx> 36 #include <svl/urihelper.hxx> 37 #include <unotools/charclass.hxx> 38 #include <com/sun/star/i18n/UnicodeType.hpp> 39 #include <unotools/collatorwrapper.hxx> 40 #include <com/sun/star/i18n/UnicodeScript.hpp> 41 #include <com/sun/star/i18n/OrdinalSuffix.hpp> 42 #include <unotools/localedatawrapper.hxx> 43 #include <unotools/transliterationwrapper.hxx> 44 #include <comphelper/processfactory.hxx> 45 #include <comphelper/storagehelper.hxx> 46 #include <comphelper/string.hxx> 47 #include <editeng/editids.hrc> 48 #include <sot/storage.hxx> 49 #include <editeng/udlnitem.hxx> 50 #include <editeng/wghtitem.hxx> 51 #include <editeng/postitem.hxx> 52 #include <editeng/crossedoutitem.hxx> 53 #include <editeng/escapementitem.hxx> 54 #include <editeng/svxacorr.hxx> 55 #include <editeng/unolingu.hxx> 56 #include <vcl/window.hxx> 57 #include <com/sun/star/xml/sax/InputSource.hpp> 58 #include <com/sun/star/xml/sax/FastParser.hpp> 59 #include <com/sun/star/xml/sax/Writer.hpp> 60 #include <com/sun/star/xml/sax/SAXParseException.hpp> 61 #include <unotools/streamwrap.hxx> 62 #include "SvXMLAutoCorrectImport.hxx" 63 #include "SvXMLAutoCorrectExport.hxx" 64 #include "SvXMLAutoCorrectTokenHandler.hxx" 65 #include <ucbhelper/content.hxx> 66 #include <com/sun/star/ucb/ContentCreationException.hpp> 67 #include <com/sun/star/ucb/XCommandEnvironment.hpp> 68 #include <com/sun/star/ucb/TransferInfo.hpp> 69 #include <com/sun/star/ucb/NameClash.hpp> 70 #include <tools/diagnose_ex.h> 71 #include <xmloff/xmltoken.hxx> 72 #include <unordered_map> 73 #include <rtl/character.hxx> 74 75 using namespace ::com::sun::star::ucb; 76 using namespace ::com::sun::star::uno; 77 using namespace ::com::sun::star::xml::sax; 78 using namespace ::com::sun::star; 79 using namespace ::xmloff::token; 80 using namespace ::utl; 81 82 namespace { 83 84 enum class Flags { 85 NONE = 0x00, 86 FullStop = 0x01, 87 ExclamationMark = 0x02, 88 QuestionMark = 0x04, 89 }; 90 91 } 92 93 namespace o3tl { 94 template<> struct typed_flags<Flags> : is_typed_flags<Flags, 0x07> {}; 95 } 96 const sal_Unicode cNonBreakingSpace = 0xA0; // UNICODE code for no break space 97 98 constexpr OUStringLiteral pXMLImplWrdStt_ExcptLstStr = u"WordExceptList.xml"; 99 constexpr OUStringLiteral pXMLImplCplStt_ExcptLstStr = u"SentenceExceptList.xml"; 100 constexpr OUStringLiteral pXMLImplAutocorr_ListStr = u"DocumentList.xml"; 101 102 const char 103 /* also at these beginnings - Brackets and all kinds of begin characters */ 104 sImplSttSkipChars[] = "\"\'([{\x83\x84\x89\x91\x92\x93\x94", 105 /* also at these ends - Brackets and all kinds of begin characters */ 106 sImplEndSkipChars[] = "\"\')]}\x83\x84\x89\x91\x92\x93\x94"; 107 108 static OUString EncryptBlockName_Imp(const OUString& rName); 109 110 static bool NonFieldWordDelim( const sal_Unicode c ) 111 { 112 return ' ' == c || '\t' == c || 0x0a == c || 113 cNonBreakingSpace == c || 0x2011 == c; 114 } 115 116 static bool IsWordDelim( const sal_Unicode c ) 117 { 118 return c == 0x1 || NonFieldWordDelim(c); 119 } 120 121 122 static bool IsLowerLetter( sal_Int32 nCharType ) 123 { 124 return CharClass::isLetterType( nCharType ) && 125 ( css::i18n::KCharacterType::LOWER & nCharType); 126 } 127 128 static bool IsUpperLetter( sal_Int32 nCharType ) 129 { 130 return CharClass::isLetterType( nCharType ) && 131 ( css::i18n::KCharacterType::UPPER & nCharType); 132 } 133 134 static bool lcl_IsUnsupportedUnicodeChar( CharClass const & rCC, const OUString& rTxt, 135 sal_Int32 nStt, sal_Int32 nEnd ) 136 { 137 for( ; nStt < nEnd; ++nStt ) 138 { 139 css::i18n::UnicodeScript nScript = rCC.getScript( rTxt, nStt ); 140 switch( nScript ) 141 { 142 case css::i18n::UnicodeScript_kCJKRadicalsSupplement: 143 case css::i18n::UnicodeScript_kHangulJamo: 144 case css::i18n::UnicodeScript_kCJKSymbolPunctuation: 145 case css::i18n::UnicodeScript_kHiragana: 146 case css::i18n::UnicodeScript_kKatakana: 147 case css::i18n::UnicodeScript_kHangulCompatibilityJamo: 148 case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth: 149 case css::i18n::UnicodeScript_kCJKCompatibility: 150 case css::i18n::UnicodeScript_k_CJKUnifiedIdeographsExtensionA: 151 case css::i18n::UnicodeScript_kCJKUnifiedIdeograph: 152 case css::i18n::UnicodeScript_kHangulSyllable: 153 case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph: 154 case css::i18n::UnicodeScript_kHalfwidthFullwidthForm: 155 return true; 156 default: ; //do nothing 157 } 158 } 159 return false; 160 } 161 162 static bool lcl_IsSymbolChar( CharClass const & rCC, const OUString& rTxt, 163 sal_Int32 nStt, sal_Int32 nEnd ) 164 { 165 for( ; nStt < nEnd; ++nStt ) 166 { 167 if( css::i18n::UnicodeType::PRIVATE_USE == rCC.getType( rTxt, nStt )) 168 return true; 169 } 170 return false; 171 } 172 173 static bool lcl_IsInAsciiArr( const char* pArr, const sal_Unicode c ) 174 { 175 // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks 176 if ( 0x2018 <= c && c <= 0x201F && (pArr == sImplSttSkipChars || pArr == sImplEndSkipChars) ) 177 return true; 178 179 bool bRet = false; 180 for( ; *pArr; ++pArr ) 181 if( *pArr == c ) 182 { 183 bRet = true; 184 break; 185 } 186 return bRet; 187 } 188 189 SvxAutoCorrDoc::~SvxAutoCorrDoc() 190 { 191 } 192 193 // Called by the functions: 194 // - FnCapitalStartWord 195 // - FnCapitalStartSentence 196 // after the exchange of characters. Then the words, if necessary, can be inserted 197 // into the exception list. 198 void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&, 199 sal_Unicode ) 200 { 201 } 202 203 LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32 ) const 204 { 205 return LANGUAGE_SYSTEM; 206 } 207 208 static const LanguageTag& GetAppLang() 209 { 210 return Application::GetSettings().GetLanguageTag(); 211 } 212 213 /// Never use an unresolved LANGUAGE_SYSTEM. 214 static LanguageType GetDocLanguage( const SvxAutoCorrDoc& rDoc, sal_Int32 nPos ) 215 { 216 LanguageType eLang = rDoc.GetLanguage( nPos ); 217 if (eLang == LANGUAGE_SYSTEM) 218 eLang = GetAppLang().getLanguageType(); // the current work locale 219 return eLang; 220 } 221 222 static LocaleDataWrapper& GetLocaleDataWrapper( LanguageType nLang ) 223 { 224 static LocaleDataWrapper aLclDtWrp( GetAppLang() ); 225 LanguageTag aLcl( nLang ); 226 const LanguageTag& rLcl = aLclDtWrp.getLoadedLanguageTag(); 227 if( aLcl != rLcl ) 228 aLclDtWrp.setLanguageTag( aLcl ); 229 return aLclDtWrp; 230 } 231 static TransliterationWrapper& GetIgnoreTranslWrapper() 232 { 233 static int bIsInit = 0; 234 static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(), 235 TransliterationFlags::IGNORE_KANA | 236 TransliterationFlags::IGNORE_WIDTH ); 237 if( !bIsInit ) 238 { 239 aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() ); 240 bIsInit = 1; 241 } 242 return aWrp; 243 } 244 static CollatorWrapper& GetCollatorWrapper() 245 { 246 static CollatorWrapper aCollWrp = [&]() 247 { 248 CollatorWrapper tmp( ::comphelper::getProcessComponentContext() ); 249 tmp.loadDefaultCollator( GetAppLang().getLocale(), 0 ); 250 return tmp; 251 }(); 252 return aCollWrp; 253 } 254 255 bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar ) 256 { 257 return cChar == '\0' || cChar == '\t' || cChar == 0x0a || 258 cChar == ' ' || cChar == '\'' || cChar == '\"' || 259 cChar == '*' || cChar == '_' || cChar == '%' || 260 cChar == '.' || cChar == ',' || cChar == ';' || 261 cChar == ':' || cChar == '?' || cChar == '!' || 262 cChar == '<' || cChar == '>' || 263 cChar == '/' || cChar == '-'; 264 } 265 266 namespace 267 { 268 bool IsCompoundWordDelimChar(sal_Unicode cChar) 269 { 270 return cChar == '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar); 271 } 272 } 273 274 bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar ) 275 { 276 return cChar == '%' || cChar == ';' || cChar == ':' || cChar == '?' || cChar == '!' || 277 cChar == '/' /*case for the urls exception*/; 278 } 279 280 ACFlags SvxAutoCorrect::GetDefaultFlags() 281 { 282 ACFlags nRet = ACFlags::Autocorrect 283 | ACFlags::CapitalStartSentence 284 | ACFlags::CapitalStartWord 285 | ACFlags::ChgOrdinalNumber 286 | ACFlags::ChgToEnEmDash 287 | ACFlags::AddNonBrkSpace 288 | ACFlags::TransliterateRTL 289 | ACFlags::ChgAngleQuotes 290 | ACFlags::ChgWeightUnderl 291 | ACFlags::SetINetAttr 292 | ACFlags::ChgQuotes 293 | ACFlags::SaveWordCplSttLst 294 | ACFlags::SaveWordWrdSttLst 295 | ACFlags::CorrectCapsLock; 296 LanguageType eLang = GetAppLang().getLanguageType(); 297 if( eLang.anyOf( 298 LANGUAGE_ENGLISH, 299 LANGUAGE_ENGLISH_US, 300 LANGUAGE_ENGLISH_UK, 301 LANGUAGE_ENGLISH_AUS, 302 LANGUAGE_ENGLISH_CAN, 303 LANGUAGE_ENGLISH_NZ, 304 LANGUAGE_ENGLISH_EIRE, 305 LANGUAGE_ENGLISH_SAFRICA, 306 LANGUAGE_ENGLISH_JAMAICA, 307 LANGUAGE_ENGLISH_CARIBBEAN)) 308 nRet &= ~ACFlags(ACFlags::ChgQuotes|ACFlags::ChgSglQuotes); 309 return nRet; 310 } 311 312 constexpr sal_Unicode cEmDash = 0x2014; 313 constexpr sal_Unicode cEnDash = 0x2013; 314 constexpr sal_Unicode cApostrophe = 0x2019; 315 constexpr sal_Unicode cLeftDoubleAngleQuote = 0xAB; 316 constexpr sal_Unicode cRightDoubleAngleQuote = 0xBB; 317 constexpr sal_Unicode cLeftSingleAngleQuote = 0x2039; 318 constexpr sal_Unicode cRightSingleAngleQuote = 0x203A; 319 // stop characters for searching preceding quotes 320 // (the first character is also the opening quote we are looking for) 321 const sal_Unicode aStopDoubleAngleQuoteStart[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,, 322 const sal_Unicode aStopDoubleAngleQuoteEnd[] = { cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0x201D, 0x201E, 0 }; // preceding >> 323 // preceding << for Romanian, handle also alternative primary closing quotation mark U+201C 324 const sal_Unicode aStopDoubleAngleQuoteEndRo[] = { cLeftDoubleAngleQuote, cRightDoubleAngleQuote, 0x201D, 0x201E, 0x201C, 0 }; 325 const sal_Unicode aStopSingleQuoteEnd[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 }; 326 const sal_Unicode aStopSingleQuoteEndRuUa[] = { 0x201E, 0x201C, cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0 }; 327 328 SvxAutoCorrect::SvxAutoCorrect( const OUString& rShareAutocorrFile, 329 const OUString& rUserAutocorrFile ) 330 : sShareAutoCorrFile( rShareAutocorrFile ) 331 , sUserAutoCorrFile( rUserAutocorrFile ) 332 , eCharClassLang( LANGUAGE_DONTKNOW ) 333 , nFlags(SvxAutoCorrect::GetDefaultFlags()) 334 , cStartDQuote( 0 ) 335 , cEndDQuote( 0 ) 336 , cStartSQuote( 0 ) 337 , cEndSQuote( 0 ) 338 { 339 } 340 341 SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy ) 342 : sShareAutoCorrFile( rCpy.sShareAutoCorrFile ) 343 , sUserAutoCorrFile( rCpy.sUserAutoCorrFile ) 344 , aSwFlags( rCpy.aSwFlags ) 345 , eCharClassLang(rCpy.eCharClassLang) 346 , nFlags( rCpy.nFlags & ~ACFlags(ACFlags::ChgWordLstLoad|ACFlags::CplSttLstLoad|ACFlags::WrdSttLstLoad)) 347 , cStartDQuote( rCpy.cStartDQuote ) 348 , cEndDQuote( rCpy.cEndDQuote ) 349 , cStartSQuote( rCpy.cStartSQuote ) 350 , cEndSQuote( rCpy.cEndSQuote ) 351 { 352 } 353 354 355 SvxAutoCorrect::~SvxAutoCorrect() 356 { 357 } 358 359 void SvxAutoCorrect::GetCharClass_( LanguageType eLang ) 360 { 361 pCharClass.reset( new CharClass( LanguageTag( eLang)) ); 362 eCharClassLang = eLang; 363 } 364 365 void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag, bool bOn ) 366 { 367 ACFlags nOld = nFlags; 368 nFlags = bOn ? nFlags | nFlag 369 : nFlags & ~nFlag; 370 371 if( !bOn ) 372 { 373 if( (nOld & ACFlags::CapitalStartSentence) != (nFlags & ACFlags::CapitalStartSentence) ) 374 nFlags &= ~ACFlags::CplSttLstLoad; 375 if( (nOld & ACFlags::CapitalStartWord) != (nFlags & ACFlags::CapitalStartWord) ) 376 nFlags &= ~ACFlags::WrdSttLstLoad; 377 if( (nOld & ACFlags::Autocorrect) != (nFlags & ACFlags::Autocorrect) ) 378 nFlags &= ~ACFlags::ChgWordLstLoad; 379 } 380 } 381 382 383 // Correct TWo INitial CApitals 384 void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 385 sal_Int32 nSttPos, sal_Int32 nEndPos, 386 LanguageType eLang ) 387 { 388 CharClass& rCC = GetCharClass( eLang ); 389 390 // Delete all non alphanumeric. Test the characters at the beginning/end of 391 // the word ( recognizes: "(min.", "/min.", and so on.) 392 for( ; nSttPos < nEndPos; ++nSttPos ) 393 if( rCC.isLetterNumeric( rTxt, nSttPos )) 394 break; 395 for( ; nSttPos < nEndPos; --nEndPos ) 396 if( rCC.isLetterNumeric( rTxt, nEndPos - 1 )) 397 break; 398 399 // Is the word a compounded word separated by delimiters? 400 // If so, keep track of all delimiters so each constituent 401 // word can be checked for two initial capital letters. 402 std::deque<sal_Int32> aDelimiters; 403 404 // Always check for two capitals at the beginning 405 // of the entire word, so start at nSttPos. 406 aDelimiters.push_back(nSttPos); 407 408 // Find all compound word delimiters 409 for (sal_Int32 n = nSttPos; n < nEndPos; ++n) 410 { 411 if (IsCompoundWordDelimChar(rTxt[ n ])) 412 { 413 aDelimiters.push_back( n + 1 ); // Get position of char after delimiter 414 } 415 } 416 417 // Decide where to put the terminating delimiter. 418 // If the last AutoCorrect char was a newline, then the AutoCorrect 419 // char will not be included in rTxt. 420 // If the last AutoCorrect char was not a newline, then the AutoCorrect 421 // character will be the last character in rTxt. 422 if (!IsCompoundWordDelimChar(rTxt[nEndPos-1])) 423 aDelimiters.push_back(nEndPos); 424 425 // Iterate through the word and all words that compose it. 426 // Two capital letters at the beginning of word? 427 for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI) 428 { 429 nSttPos = aDelimiters[nI]; 430 nEndPos = aDelimiters[nI + 1]; 431 432 if( nSttPos+2 < nEndPos && 433 IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) && 434 IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) && 435 // Is the third character a lower case 436 IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) && 437 // Do not replace special attributes 438 0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ]) 439 { 440 // test if the word is in an exception list 441 OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 )); 442 if( !FindInWrdSttExceptList(eLang, sWord) ) 443 { 444 // Check that word isn't correctly spelt before correcting: 445 css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller = 446 LinguMgr::GetSpellChecker(); 447 if( xSpeller->hasLanguage(static_cast<sal_uInt16>(eLang)) ) 448 { 449 Sequence< css::beans::PropertyValue > aEmptySeq; 450 if (xSpeller->isValid(sWord, static_cast<sal_uInt16>(eLang), aEmptySeq)) 451 { 452 return; 453 } 454 } 455 sal_Unicode cSave = rTxt[ nSttPos ]; 456 OUString sChar = rCC.lowercase( OUString(cSave) ); 457 if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar )) 458 { 459 if( ACFlags::SaveWordWrdSttLst & nFlags ) 460 rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave ); 461 } 462 } 463 } 464 } 465 } 466 467 // Format ordinal numbers suffixes (1st -> 1^st) 468 bool SvxAutoCorrect::FnChgOrdinalNumber( 469 SvxAutoCorrDoc& rDoc, const OUString& rTxt, 470 sal_Int32 nSttPos, sal_Int32 nEndPos, 471 LanguageType eLang) 472 { 473 // 1st, 2nd, 3rd, 4 - 0th 474 // 201th or 201st 475 // 12th or 12nd 476 bool bChg = false; 477 478 // In some languages ordinal suffixes should never be 479 // changed to superscript. Let's break for those languages. 480 if (!eLang.anyOf( 481 LANGUAGE_SWEDISH, 482 LANGUAGE_SWEDISH_FINLAND)) 483 { 484 CharClass& rCC = GetCharClass(eLang); 485 486 for (; nSttPos < nEndPos; ++nSttPos) 487 if (!lcl_IsInAsciiArr(sImplSttSkipChars, rTxt[nSttPos])) 488 break; 489 for (; nSttPos < nEndPos; --nEndPos) 490 if (!lcl_IsInAsciiArr(sImplEndSkipChars, rTxt[nEndPos - 1])) 491 break; 492 493 494 // Get the last number in the string to check 495 sal_Int32 nNumEnd = nEndPos; 496 bool bFoundEnd = false; 497 bool isValidNumber = true; 498 sal_Int32 i = nEndPos; 499 while (i > nSttPos) 500 { 501 i--; 502 bool isDigit = rCC.isDigit(rTxt, i); 503 if (bFoundEnd) 504 isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i)); 505 506 if (isDigit && !bFoundEnd) 507 { 508 bFoundEnd = true; 509 nNumEnd = i; 510 } 511 } 512 513 if (bFoundEnd && isValidNumber) { 514 sal_Int32 nNum = rTxt.copy(nSttPos, nNumEnd - nSttPos + 1).toInt32(); 515 516 // Check if the characters after that number correspond to the ordinal suffix 517 uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix 518 = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext()); 519 520 const uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale()); 521 for (OUString const & sSuffix : aSuffixes) 522 { 523 OUString sEnd = rTxt.copy(nNumEnd + 1, nEndPos - nNumEnd - 1); 524 525 if (sSuffix == sEnd) 526 { 527 // Check if the ordinal suffix has to be set as super script 528 if (rCC.isLetter(sSuffix)) 529 { 530 // Do the change 531 SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER, 532 DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT); 533 rDoc.SetAttr(nNumEnd + 1, nEndPos, 534 SID_ATTR_CHAR_ESCAPEMENT, 535 aSvxEscapementItem); 536 bChg = true; 537 } 538 } 539 } 540 } 541 } 542 return bChg; 543 } 544 545 // Replace dashes 546 bool SvxAutoCorrect::FnChgToEnEmDash( 547 SvxAutoCorrDoc& rDoc, const OUString& rTxt, 548 sal_Int32 nSttPos, sal_Int32 nEndPos, 549 LanguageType eLang ) 550 { 551 bool bRet = false; 552 CharClass& rCC = GetCharClass( eLang ); 553 if (eLang == LANGUAGE_SYSTEM) 554 eLang = GetAppLang().getLanguageType(); 555 bool bAlwaysUseEmDash = (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN); 556 557 // replace " - " or " --" with "enDash" 558 if( 1 < nSttPos && 1 <= nEndPos - nSttPos ) 559 { 560 sal_Unicode cCh = rTxt[ nSttPos ]; 561 if( '-' == cCh ) 562 { 563 if( 1 < nEndPos - nSttPos && 564 ' ' == rTxt[ nSttPos-1 ] && 565 '-' == rTxt[ nSttPos+1 ]) 566 { 567 sal_Int32 n; 568 for( n = nSttPos+2; n < nEndPos && lcl_IsInAsciiArr( 569 sImplSttSkipChars,(cCh = rTxt[ n ])); 570 ++n ) 571 ; 572 573 // found: " --[<AnySttChars>][A-z0-9] 574 if( rCC.isLetterNumeric( OUString(cCh) ) ) 575 { 576 for( n = nSttPos-1; n && lcl_IsInAsciiArr( 577 sImplEndSkipChars,(cCh = rTxt[ --n ])); ) 578 ; 579 580 // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9] 581 if( rCC.isLetterNumeric( OUString(cCh) )) 582 { 583 rDoc.Delete( nSttPos, nSttPos + 2 ); 584 rDoc.Insert( nSttPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) ); 585 bRet = true; 586 } 587 } 588 } 589 } 590 else if( 3 < nSttPos && 591 ' ' == rTxt[ nSttPos-1 ] && 592 '-' == rTxt[ nSttPos-2 ]) 593 { 594 sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2; 595 if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) ) 596 { 597 --nTmpPos; 598 ++nLen; 599 cCh = rTxt[ nTmpPos-1 ]; 600 } 601 if( ' ' == cCh ) 602 { 603 for( n = nSttPos; n < nEndPos && lcl_IsInAsciiArr( 604 sImplSttSkipChars,(cCh = rTxt[ n ])); 605 ++n ) 606 ; 607 608 // found: " - [<AnySttChars>][A-z0-9] 609 if( rCC.isLetterNumeric( OUString(cCh) ) ) 610 { 611 cCh = ' '; 612 for( n = nTmpPos-1; n && lcl_IsInAsciiArr( 613 sImplEndSkipChars,(cCh = rTxt[ --n ])); ) 614 ; 615 // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9] 616 if( rCC.isLetterNumeric( OUString(cCh) )) 617 { 618 rDoc.Delete( nTmpPos, nTmpPos + nLen ); 619 rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) ); 620 bRet = true; 621 } 622 } 623 } 624 } 625 } 626 627 // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash" 628 // [0-9]--[0-9] double dash always replaced with "enDash" 629 // Finnish and Hungarian use enDash instead of emDash. 630 bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH); 631 if( 4 <= nEndPos - nSttPos ) 632 { 633 OUString sTmp( rTxt.copy( nSttPos, nEndPos - nSttPos ) ); 634 sal_Int32 nFndPos = sTmp.indexOf("--"); 635 if( nFndPos != -1 && nFndPos && 636 nFndPos + 2 < sTmp.getLength() && 637 ( rCC.isLetterNumeric( sTmp, nFndPos - 1 ) || 638 lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nFndPos - 1 ] )) && 639 ( rCC.isLetterNumeric( sTmp, nFndPos + 2 ) || 640 lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nFndPos + 2 ] ))) 641 { 642 nSttPos = nSttPos + nFndPos; 643 rDoc.Delete( nSttPos, nSttPos + 2 ); 644 rDoc.Insert( nSttPos, (bEnDash || (rCC.isDigit( sTmp, nFndPos - 1 ) && 645 rCC.isDigit( sTmp, nFndPos + 2 )) ? OUString(cEnDash) : OUString(cEmDash)) ); 646 bRet = true; 647 } 648 } 649 return bRet; 650 } 651 652 // Add non-breaking space before specific punctuation marks in French text 653 bool SvxAutoCorrect::FnAddNonBrkSpace( 654 SvxAutoCorrDoc& rDoc, const OUString& rTxt, 655 sal_Int32 nEndPos, 656 LanguageType eLang, bool& io_bNbspRunNext ) 657 { 658 bool bRet = false; 659 660 CharClass& rCC = GetCharClass( eLang ); 661 662 if ( rCC.getLanguageTag().getLanguage() == "fr" ) 663 { 664 bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA"); 665 OUString allChars = ":;?!%"; 666 OUString chars( allChars ); 667 if ( bFrCA ) 668 chars = ":"; 669 670 sal_Unicode cChar = rTxt[ nEndPos ]; 671 bool bHasSpace = chars.indexOf( cChar ) != -1; 672 bool bIsSpecial = allChars.indexOf( cChar ) != -1; 673 if ( bIsSpecial ) 674 { 675 // Get the last word delimiter position 676 sal_Int32 nSttWdPos = nEndPos; 677 bool bWasWordDelim = false; 678 while( nSttWdPos ) 679 { 680 bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]); 681 if (bWasWordDelim) 682 break; 683 } 684 685 //See if the text is the start of a protocol string, e.g. have text of 686 //"http" see if it is the start of "http:" and if so leave it alone 687 sal_Int32 nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0); 688 sal_Int32 nProtocolLen = nEndPos - nSttWdPos + 1; 689 if (nIndex + nProtocolLen <= rTxt.getLength()) 690 { 691 if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid) 692 return false; 693 } 694 695 // Check the presence of "://" in the word 696 sal_Int32 nStrPos = rTxt.indexOf( "://", nSttWdPos + 1 ); 697 if ( nStrPos == -1 && nEndPos > 0 ) 698 { 699 // Check the previous char 700 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ]; 701 if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' ) 702 { 703 // Remove any previous normal space 704 sal_Int32 nPos = nEndPos - 1; 705 while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace ) 706 { 707 if ( nPos == 0 ) break; 708 nPos--; 709 cPrevChar = rTxt[ nPos ]; 710 } 711 712 nPos++; 713 if ( nEndPos - nPos > 0 ) 714 rDoc.Delete( nPos, nEndPos ); 715 716 // Add the non-breaking space at the end pos 717 if ( bHasSpace ) 718 rDoc.Insert( nPos, OUString(cNonBreakingSpace) ); 719 io_bNbspRunNext = true; 720 bRet = true; 721 } 722 else if ( chars.indexOf( cPrevChar ) != -1 ) 723 io_bNbspRunNext = true; 724 } 725 } 726 else if ( cChar == '/' && nEndPos > 1 && rTxt.getLength() > (nEndPos - 1) ) 727 { 728 // Remove the hardspace right before to avoid formatting URLs 729 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ]; 730 sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ]; 731 if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace ) 732 { 733 rDoc.Delete( nEndPos - 2, nEndPos - 1 ); 734 bRet = true; 735 } 736 } 737 } 738 739 return bRet; 740 } 741 742 // URL recognition 743 bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 744 sal_Int32 nSttPos, sal_Int32 nEndPos, 745 LanguageType eLang ) 746 { 747 OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos, 748 GetCharClass( eLang ) )); 749 bool bRet = !sURL.isEmpty(); 750 if( bRet ) // so, set attribute: 751 rDoc.SetINetAttr( nSttPos, nEndPos, sURL ); 752 return bRet; 753 } 754 755 // Automatic *bold*, /italic/, -strikeout- and _underline_ 756 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 757 sal_Int32 nEndPos ) 758 { 759 // Condition: 760 // at the beginning: _, *, / or ~ after Space with the following !Space 761 // at the end: _, *, / or ~ before Space (word delimiter?) 762 763 sal_Unicode cInsChar = rTxt[ nEndPos ]; // underline, bold, italic or strikeout 764 if( ++nEndPos != rTxt.getLength() && 765 !IsWordDelim( rTxt[ nEndPos ] ) ) 766 return false; 767 768 --nEndPos; 769 770 bool bAlphaNum = false; 771 sal_Int32 nPos = nEndPos; 772 sal_Int32 nFndPos = -1; 773 CharClass& rCC = GetCharClass( LANGUAGE_SYSTEM ); 774 775 while( nPos ) 776 { 777 switch( sal_Unicode c = rTxt[ --nPos ] ) 778 { 779 case '_': 780 case '-': 781 case '/': 782 case '*': 783 if( c == cInsChar ) 784 { 785 if( bAlphaNum && nPos+1 < nEndPos && ( !nPos || 786 IsWordDelim( rTxt[ nPos-1 ])) && 787 !IsWordDelim( rTxt[ nPos+1 ])) 788 nFndPos = nPos; 789 else 790 // Condition is not satisfied, so cancel 791 nFndPos = -1; 792 nPos = 0; 793 } 794 break; 795 default: 796 if( !bAlphaNum ) 797 bAlphaNum = rCC.isLetterNumeric( rTxt, nPos ); 798 } 799 } 800 801 if( -1 != nFndPos ) 802 { 803 // first delete the Character at the end - this allows insertion 804 // of an empty hint in SetAttr which would be removed by Delete 805 // (fdo#62536, AUTOFMT in Writer) 806 rDoc.Delete( nEndPos, nEndPos + 1 ); 807 rDoc.Delete( nFndPos, nFndPos + 1 ); 808 // Span the Attribute over the area 809 // the end. 810 if( '*' == cInsChar ) // Bold 811 { 812 SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT ); 813 rDoc.SetAttr( nFndPos, nEndPos - 1, 814 SID_ATTR_CHAR_WEIGHT, 815 aSvxWeightItem); 816 } 817 else if( '/' == cInsChar ) // Italic 818 { 819 SvxPostureItem aSvxPostureItem( ITALIC_NORMAL, SID_ATTR_CHAR_POSTURE ); 820 rDoc.SetAttr( nFndPos, nEndPos - 1, 821 SID_ATTR_CHAR_POSTURE, 822 aSvxPostureItem); 823 } 824 else if( '-' == cInsChar ) // Strikeout 825 { 826 SvxCrossedOutItem aSvxCrossedOutItem( STRIKEOUT_SINGLE, SID_ATTR_CHAR_STRIKEOUT ); 827 rDoc.SetAttr( nFndPos, nEndPos - 1, 828 SID_ATTR_CHAR_STRIKEOUT, 829 aSvxCrossedOutItem); 830 } 831 else // Underline 832 { 833 SvxUnderlineItem aSvxUnderlineItem( LINESTYLE_SINGLE, SID_ATTR_CHAR_UNDERLINE ); 834 rDoc.SetAttr( nFndPos, nEndPos - 1, 835 SID_ATTR_CHAR_UNDERLINE, 836 aSvxUnderlineItem); 837 } 838 } 839 840 return -1 != nFndPos; 841 } 842 843 // Capitalize first letter of every sentence 844 void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc& rDoc, 845 const OUString& rTxt, bool bNormalPos, 846 sal_Int32 nSttPos, sal_Int32 nEndPos, 847 LanguageType eLang ) 848 { 849 850 if( rTxt.isEmpty() || nEndPos <= nSttPos ) 851 return; 852 853 CharClass& rCC = GetCharClass( eLang ); 854 OUString aText( rTxt ); 855 const sal_Unicode *pStart = aText.getStr(), 856 *pStr = pStart + nEndPos, 857 *pWordStt = nullptr, 858 *pDelim = nullptr; 859 860 bool bAtStart = false; 861 do { 862 --pStr; 863 if (rCC.isLetter(aText, pStr - pStart)) 864 { 865 if( !pWordStt ) 866 pDelim = pStr+1; 867 pWordStt = pStr; 868 } 869 else if (pWordStt && !rCC.isDigit(aText, pStr - pStart)) 870 { 871 if( (lcl_IsInAsciiArr( "-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words 872 pWordStt - 1 == pStr && 873 // Installation at beginning of paragraph. Replaced < by <= (#i38971#) 874 (pStart + 1) <= pStr && 875 rCC.isLetter(aText, pStr-1 - pStart)) 876 pWordStt = --pStr; 877 else 878 break; 879 } 880 bAtStart = (pStart == pStr); 881 } while( !bAtStart ); 882 883 if (!pWordStt) 884 return; // no character to be replaced 885 886 887 if (rCC.isDigit(aText, pStr - pStart)) 888 return; // already ok 889 890 if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart))) 891 return; // already ok 892 893 //See if the text is the start of a protocol string, e.g. have text of 894 //"http" see if it is the start of "http:" and if so leave it alone 895 sal_Int32 nIndex = pWordStt - pStart; 896 sal_Int32 nProtocolLen = pDelim - pWordStt + 1; 897 if (nIndex + nProtocolLen <= rTxt.getLength()) 898 { 899 if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid) 900 return; // already ok 901 } 902 903 if (0x1 == *pWordStt || 0x2 == *pWordStt) 904 return; // already ok 905 906 // Only capitalize, if string before specified characters is long enough 907 if( *pDelim && 2 >= pDelim - pWordStt && 908 lcl_IsInAsciiArr( ".-)>", *pDelim ) ) 909 return; 910 911 // tdf#59666 don't capitalize single Greek letters (except in Greek texts) 912 if ( 1 == pDelim - pWordStt && 0x03B1 <= *pWordStt && *pWordStt <= 0x03C9 && eLang != LANGUAGE_GREEK ) 913 return; 914 915 if( !bAtStart ) // Still no beginning of a paragraph? 916 { 917 if (NonFieldWordDelim(*pStr)) 918 { 919 for (;;) 920 { 921 bAtStart = (pStart == pStr--); 922 if (bAtStart || !NonFieldWordDelim(*pStr)) 923 break; 924 } 925 } 926 // Asian full stop, full width full stop, full width exclamation mark 927 // and full width question marks are treated as word delimiters 928 else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr && 929 0xFF1F != *pStr ) 930 return; // no valid separator -> no replacement 931 } 932 933 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list 934 if (FindInWrdSttExceptList(eLang, OUString(pWordStt, pDelim - pWordStt))) 935 return; 936 937 if( bAtStart ) // at the beginning of a paragraph? 938 { 939 // Check out the previous paragraph, if it exists. 940 // If so, then check to paragraph separator at the end. 941 OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos); 942 if (!pPrevPara) 943 { 944 // valid separator -> replace 945 OUString sChar( *pWordStt ); 946 sChar = rCC.titlecase(sChar); //see fdo#56740 947 if (sChar != OUStringChar(*pWordStt)) 948 rDoc.ReplaceRange( pWordStt - pStart, 1, sChar ); 949 return; 950 } 951 952 aText = *pPrevPara; 953 bAtStart = false; 954 pStart = aText.getStr(); 955 pStr = pStart + aText.getLength(); 956 957 do { // overwrite all blanks 958 --pStr; 959 if (!NonFieldWordDelim(*pStr)) 960 break; 961 bAtStart = (pStart == pStr); 962 } while( !bAtStart ); 963 964 if( bAtStart ) 965 return; // no valid separator -> no replacement 966 } 967 968 // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator. 969 // all three can happen, but not more than once! 970 const sal_Unicode* pExceptStt = nullptr; 971 bool bContinue = true; 972 Flags nFlag = Flags::NONE; 973 do 974 { 975 switch (*pStr) 976 { 977 // Western and Asian full stop 978 case '.': 979 case 0x3002: 980 case 0xFF0E: 981 { 982 if (pStr >= pStart + 2 && *(pStr - 2) == '.') 983 { 984 //e.g. text "f.o.o. word": Now currently considering 985 //capitalizing word but second last character of 986 //previous word is a . So probably last word is an 987 //anagram that ends in . and not truly the end of a 988 //previous sentence, so don't autocapitalize this word 989 return; 990 } 991 if (nFlag & Flags::FullStop) 992 return; // no valid separator -> no replacement 993 nFlag |= Flags::FullStop; 994 pExceptStt = pStr; 995 } 996 break; 997 case '!': 998 case 0xFF01: 999 { 1000 if (nFlag & Flags::ExclamationMark) 1001 return; // no valid separator -> no replacement 1002 nFlag |= Flags::ExclamationMark; 1003 } 1004 break; 1005 case '?': 1006 case 0xFF1F: 1007 { 1008 if (nFlag & Flags::QuestionMark) 1009 return; // no valid separator -> no replacement 1010 nFlag |= Flags::QuestionMark; 1011 } 1012 break; 1013 default: 1014 if (nFlag == Flags::NONE) 1015 return; // no valid separator -> no replacement 1016 else 1017 bContinue = false; 1018 break; 1019 } 1020 1021 if (bContinue && pStr-- == pStart) 1022 { 1023 return; // no valid separator -> no replacement 1024 } 1025 } while (bContinue); 1026 if (Flags::FullStop != nFlag) 1027 pExceptStt = nullptr; 1028 1029 // Only capitalize, if string is long enough 1030 if( 2 > ( pStr - pStart ) ) 1031 return; 1032 1033 if (!rCC.isLetterNumeric(aText, pStr-- - pStart)) 1034 { 1035 bool bValid = false, bAlphaFnd = false; 1036 const sal_Unicode* pTmpStr = pStr; 1037 while( !bValid ) 1038 { 1039 if( rCC.isDigit( aText, pTmpStr - pStart ) ) 1040 { 1041 bValid = true; 1042 pStr = pTmpStr - 1; 1043 } 1044 else if( rCC.isLetter( aText, pTmpStr - pStart ) ) 1045 { 1046 if( bAlphaFnd ) 1047 { 1048 bValid = true; 1049 pStr = pTmpStr; 1050 } 1051 else 1052 bAlphaFnd = true; 1053 } 1054 else if (bAlphaFnd || NonFieldWordDelim(*pTmpStr)) 1055 break; 1056 1057 if( pTmpStr == pStart ) 1058 break; 1059 1060 --pTmpStr; 1061 } 1062 1063 if( !bValid ) 1064 return; // no valid separator -> no replacement 1065 } 1066 1067 bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9'; 1068 1069 // Search for the beginning of the word 1070 while (!NonFieldWordDelim(*pStr)) 1071 { 1072 if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) ) 1073 bNumericOnly = false; 1074 1075 if( pStart == pStr ) 1076 break; 1077 1078 --pStr; 1079 } 1080 1081 if( bNumericOnly ) // consists of only numbers, then not 1082 return; 1083 1084 if (NonFieldWordDelim(*pStr)) 1085 ++pStr; 1086 1087 OUString sWord; 1088 1089 // check on the basis of the exception list 1090 if( pExceptStt ) 1091 { 1092 sWord = OUString(pStr, pExceptStt - pStr + 1); 1093 if( FindInCplSttExceptList(eLang, sWord) ) 1094 return; 1095 1096 // Delete all non alphanumeric. Test the characters at the 1097 // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.) 1098 OUString sTmp( sWord ); 1099 while( !sTmp.isEmpty() && 1100 !rCC.isLetterNumeric( sTmp, 0 ) ) 1101 sTmp = sTmp.copy(1); 1102 1103 // Remove all non alphanumeric characters towards the end up until 1104 // the last one. 1105 sal_Int32 nLen = sTmp.getLength(); 1106 while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) ) 1107 --nLen; 1108 if( nLen + 1 < sTmp.getLength() ) 1109 sTmp = sTmp.copy( 0, nLen + 1 ); 1110 1111 if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() && 1112 FindInCplSttExceptList(eLang, sTmp)) 1113 return; 1114 1115 if(FindInCplSttExceptList(eLang, sWord, true)) 1116 return; 1117 } 1118 1119 // Ok, then replace 1120 sal_Unicode cSave = *pWordStt; 1121 nSttPos = pWordStt - rTxt.getStr(); 1122 OUString sChar = rCC.titlecase(OUString(cSave)); //see fdo#56740 1123 bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ); 1124 1125 // Perhaps someone wants to have the word 1126 if( bRet && ACFlags::SaveWordCplSttLst & nFlags ) 1127 rDoc.SaveCpltSttWord( ACFlags::CapitalStartSentence, nSttPos, sWord, cSave ); 1128 } 1129 1130 // Correct accidental use of cAPS LOCK key 1131 bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 1132 sal_Int32 nSttPos, sal_Int32 nEndPos, 1133 LanguageType eLang ) 1134 { 1135 if (nEndPos - nSttPos < 2) 1136 // string must be at least 2-character long. 1137 return false; 1138 1139 CharClass& rCC = GetCharClass( eLang ); 1140 1141 // Check the first 2 letters. 1142 if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) ) 1143 return false; 1144 1145 if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) ) 1146 return false; 1147 1148 OUStringBuffer aConverted; 1149 aConverted.append( rCC.uppercase(OUString(rTxt[nSttPos])) ); 1150 aConverted.append( rCC.lowercase(OUString(rTxt[nSttPos+1])) ); 1151 1152 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list 1153 if (FindInWrdSttExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos))) 1154 return false; 1155 1156 for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i ) 1157 { 1158 if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) ) 1159 // A lowercase letter disqualifies the whole text. 1160 return false; 1161 1162 if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) ) 1163 // Another uppercase letter. Convert it. 1164 aConverted.append( rCC.lowercase(OUString(rTxt[i])) ); 1165 else 1166 // This is not an alphabetic letter. Leave it as-is. 1167 aConverted.append( rTxt[i] ); 1168 } 1169 1170 // Replace the word. 1171 rDoc.Delete(nSttPos, nEndPos); 1172 rDoc.Insert(nSttPos, aConverted.makeStringAndClear()); 1173 1174 return true; 1175 } 1176 1177 1178 sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote, 1179 LanguageType eLang ) const 1180 { 1181 sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar 1182 ? GetStartDoubleQuote() 1183 : GetStartSingleQuote() ) 1184 : ( '\"' == cInsChar 1185 ? GetEndDoubleQuote() 1186 : GetEndSingleQuote() ); 1187 if( !cRet ) 1188 { 1189 // then through the Language find the right character 1190 if( LANGUAGE_NONE == eLang ) 1191 cRet = cInsChar; 1192 else 1193 { 1194 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang ); 1195 OUString sRet( bSttQuote 1196 ? ( '\"' == cInsChar 1197 ? rLcl.getDoubleQuotationMarkStart() 1198 : rLcl.getQuotationMarkStart() ) 1199 : ( '\"' == cInsChar 1200 ? rLcl.getDoubleQuotationMarkEnd() 1201 : rLcl.getQuotationMarkEnd() )); 1202 cRet = !sRet.isEmpty() ? sRet[0] : cInsChar; 1203 } 1204 } 1205 return cRet; 1206 } 1207 1208 void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos, 1209 sal_Unicode cInsChar, bool bSttQuote, 1210 bool bIns, LanguageType eLang, ACQuotes eType ) const 1211 { 1212 sal_Unicode cRet; 1213 1214 if ( eType == ACQuotes::DoubleAngleQuote ) 1215 { 1216 bool bSwiss = eLang == LANGUAGE_FRENCH_SWISS; 1217 // pressing " inside a quotation -> use second level angle quotes 1218 bool bLeftQuote = '\"' == cInsChar && 1219 // start position and Romanian OR 1220 // not start position and Hungarian 1221 bSttQuote == (eLang != LANGUAGE_HUNGARIAN); 1222 cRet = ( '<' == cInsChar || bLeftQuote ) 1223 ? ( bSwiss ? cLeftSingleAngleQuote : cLeftDoubleAngleQuote ) 1224 : ( bSwiss ? cRightSingleAngleQuote : cRightDoubleAngleQuote ); 1225 } 1226 else if ( eType == ACQuotes::UseApostrophe ) 1227 cRet = cApostrophe; 1228 else 1229 cRet = GetQuote( cInsChar, bSttQuote, eLang ); 1230 1231 OUString sChg( cInsChar ); 1232 if( bIns ) 1233 rDoc.Insert( nInsPos, sChg ); 1234 else 1235 rDoc.Replace( nInsPos, sChg ); 1236 1237 sChg = OUString(cRet); 1238 1239 if( eType == ACQuotes::NonBreakingSpace ) 1240 { 1241 if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, OUStringChar(cNonBreakingSpace) )) 1242 { 1243 if( !bSttQuote ) 1244 ++nInsPos; 1245 } 1246 } 1247 else if( eType == ACQuotes::DoubleAngleQuote && cInsChar != '\"' ) 1248 { 1249 rDoc.Delete( nInsPos-1, nInsPos); 1250 --nInsPos; 1251 } 1252 1253 rDoc.Replace( nInsPos, sChg ); 1254 1255 // i' -> I' in English (last step for the Undo) 1256 if( eType == ACQuotes::CapitalizeIAm ) 1257 rDoc.Replace( nInsPos-1, "I" ); 1258 } 1259 1260 OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPos, 1261 sal_Unicode cInsChar, bool bSttQuote ) 1262 { 1263 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos ); 1264 sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang ); 1265 1266 OUString sRet(cRet); 1267 1268 if( '\"' == cInsChar ) 1269 { 1270 if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS) 1271 { 1272 if( bSttQuote ) 1273 sRet += " "; 1274 else 1275 sRet = " " + sRet; 1276 } 1277 } 1278 return sRet; 1279 } 1280 1281 // search preceding opening quote in the paragraph before the insert position 1282 static bool lcl_HasPrecedingChar( std::u16string_view rTxt, sal_Int32 nPos, 1283 const sal_Unicode sPrecedingChar, const sal_Unicode* aStopChars ) 1284 { 1285 sal_Unicode cTmpChar; 1286 1287 do { 1288 cTmpChar = rTxt[ --nPos ]; 1289 if ( cTmpChar == sPrecedingChar ) 1290 return true; 1291 1292 for ( const sal_Unicode* pCh = aStopChars; *pCh; ++pCh ) 1293 if ( cTmpChar == *pCh ) 1294 return false; 1295 1296 } while ( nPos > 0 ); 1297 1298 return false; 1299 } 1300 1301 // WARNING: rText may become invalid, see comment below 1302 void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 1303 sal_Int32 nInsPos, sal_Unicode cChar, 1304 bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin ) 1305 { 1306 bool bIsNextRun = io_bNbspRunNext; 1307 io_bNbspRunNext = false; // if it was set, then it has to be turned off 1308 1309 do{ // only for middle check loop !! 1310 if( cChar ) 1311 { 1312 // Prevent double space 1313 if( nInsPos && ' ' == cChar && 1314 IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace ) && 1315 ' ' == rTxt[ nInsPos - 1 ]) 1316 { 1317 break; 1318 } 1319 1320 bool bSingle = '\'' == cChar; 1321 bool bIsReplaceQuote = 1322 (IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('\"' == cChar )) || 1323 (IsAutoCorrFlag( ACFlags::ChgSglQuotes ) && bSingle ); 1324 if( bIsReplaceQuote ) 1325 { 1326 bool bSttQuote = !nInsPos; 1327 ACQuotes eType = ACQuotes::NONE; 1328 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos ); 1329 if (!bSttQuote) 1330 { 1331 sal_Unicode cPrev = rTxt[ nInsPos-1 ]; 1332 bSttQuote = NonFieldWordDelim(cPrev) || 1333 lcl_IsInAsciiArr( "([{", cPrev ) || 1334 ( cEmDash == cPrev ) || 1335 ( cEnDash == cPrev ); 1336 // tdf#38394 use opening quotation mark << in French l'<<word>> 1337 if ( !bSingle && !bSttQuote && cPrev == cApostrophe && 1338 primary(eLang) == primary(LANGUAGE_FRENCH) && 1339 ( ( ( nInsPos == 2 || ( nInsPos > 2 && IsWordDelim( rTxt[ nInsPos-3 ] ) ) ) && 1340 // abbreviated form of ce, de, je, la, le, ne, me, te, se or si 1341 OUString("cdjlnmtsCDJLNMTS").indexOf( rTxt[ nInsPos-2 ] ) > -1 ) || 1342 ( ( nInsPos == 3 || (nInsPos > 3 && IsWordDelim( rTxt[ nInsPos-4 ] ) ) ) && 1343 // abbreviated form of que 1344 ( rTxt[ nInsPos-2 ] == 'u' || rTxt[ nInsPos-2 ] == 'U' ) && 1345 ( rTxt[ nInsPos-3 ] == 'q' || rTxt[ nInsPos-3 ] == 'Q' ) ) ) ) 1346 { 1347 bSttQuote = true; 1348 } 1349 // tdf#108423 for capitalization of English i'm 1350 else if ( bSingle && ( cPrev == 'i' ) && 1351 primary(eLang) == primary(LANGUAGE_ENGLISH) && 1352 ( nInsPos == 1 || IsWordDelim( rTxt[ nInsPos-2 ] ) ) ) 1353 { 1354 eType = ACQuotes::CapitalizeIAm; 1355 } 1356 // tdf#133524 support >>Hungarian<< and <<Romanian>> secondary level quotations 1357 else if ( !bSingle && nInsPos && 1358 ( ( eLang == LANGUAGE_HUNGARIAN && 1359 lcl_HasPrecedingChar( rTxt, nInsPos, 1360 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEnd[0], 1361 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEnd + 1 ) ) || 1362 ( eLang.anyOf( 1363 LANGUAGE_ROMANIAN, 1364 LANGUAGE_ROMANIAN_MOLDOVA ) && 1365 lcl_HasPrecedingChar( rTxt, nInsPos, 1366 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEndRo[0], 1367 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEndRo + 1 ) ) ) ) 1368 { 1369 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang ); 1370 // only if the opening double quotation mark is the default one 1371 if ( rLcl.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart[0]) ) 1372 eType = ACQuotes::DoubleAngleQuote; 1373 } 1374 else if ( bSingle && nInsPos && !bSttQuote && 1375 // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic, 1376 // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018. 1377 // tdf#123786 the same for Russian and Ukrainian 1378 ( ( eLang.anyOf ( 1379 LANGUAGE_CZECH, 1380 LANGUAGE_GERMAN, 1381 LANGUAGE_GERMAN_SWISS, 1382 LANGUAGE_GERMAN_AUSTRIAN, 1383 LANGUAGE_GERMAN_LUXEMBOURG, 1384 LANGUAGE_GERMAN_LIECHTENSTEIN, 1385 LANGUAGE_ICELANDIC, 1386 LANGUAGE_SLOVAK, 1387 LANGUAGE_SLOVENIAN ) && 1388 !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEnd[0], aStopSingleQuoteEnd + 1 ) ) || 1389 ( eLang.anyOf ( 1390 LANGUAGE_RUSSIAN, 1391 LANGUAGE_UKRAINIAN ) && 1392 !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEndRuUa[0], aStopSingleQuoteEndRuUa + 1 ) ) ) ) 1393 { 1394 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang ); 1395 CharClass& rCC = GetCharClass( eLang ); 1396 if ( ( rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEnd[0]) || 1397 rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa[0]) ) && 1398 // use apostrophe only after letters, not after digits or punctuation 1399 rCC.isLetter(rTxt, nInsPos-1) ) 1400 { 1401 eType = ACQuotes::UseApostrophe; 1402 } 1403 } 1404 } 1405 1406 if ( eType == ACQuotes::NONE && !bSingle && 1407 ( primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS ) ) 1408 eType = ACQuotes::NonBreakingSpace; 1409 1410 InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, eLang, eType ); 1411 break; 1412 } 1413 // tdf#133524 change "<<" and ">>" to double angle quotation marks 1414 else if ( IsAutoCorrFlag( ACFlags::ChgQuotes ) && 1415 IsAutoCorrFlag( ACFlags::ChgAngleQuotes ) && 1416 ('<' == cChar || '>' == cChar) && 1417 nInsPos > 0 && cChar == rTxt[ nInsPos-1 ] ) 1418 { 1419 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos ); 1420 if ( eLang.anyOf( 1421 LANGUAGE_CATALAN, // primary level 1422 LANGUAGE_CATALAN_VALENCIAN, // primary level 1423 LANGUAGE_FINNISH, // alternative primary level 1424 LANGUAGE_FRENCH_SWISS, // second level 1425 LANGUAGE_GALICIAN, // primary level 1426 LANGUAGE_HUNGARIAN, // second level 1427 LANGUAGE_POLISH, // second level 1428 LANGUAGE_PORTUGUESE, // primary level 1429 LANGUAGE_PORTUGUESE_BRAZILIAN, // primary level 1430 LANGUAGE_ROMANIAN, // second level 1431 LANGUAGE_ROMANIAN_MOLDOVA, // second level 1432 LANGUAGE_SWEDISH, // alternative primary level 1433 LANGUAGE_SWEDISH_FINLAND, // alternative primary level 1434 LANGUAGE_UKRAINIAN, // primary level 1435 LANGUAGE_USER_ARAGONESE, // primary level 1436 LANGUAGE_USER_ASTURIAN ) || // primary level 1437 primary(eLang) == primary(LANGUAGE_GERMAN) || // alternative primary level 1438 primary(eLang) == primary(LANGUAGE_SPANISH) ) // primary level 1439 { 1440 InsertQuote( rDoc, nInsPos, cChar, false, bInsert, eLang, ACQuotes::DoubleAngleQuote ); 1441 break; 1442 } 1443 } 1444 1445 if( bInsert ) 1446 rDoc.Insert( nInsPos, OUString(cChar) ); 1447 else 1448 rDoc.Replace( nInsPos, OUString(cChar) ); 1449 1450 // Hardspaces autocorrection 1451 if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace ) ) 1452 { 1453 if ( NeedsHardspaceAutocorr( cChar ) && 1454 FnAddNonBrkSpace( rDoc, rTxt, nInsPos, GetDocLanguage( rDoc, nInsPos ), io_bNbspRunNext ) ) 1455 { 1456 ; 1457 } 1458 else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) ) 1459 { 1460 // Remove the NBSP if it wasn't an autocorrection 1461 if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) && 1462 cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace ) 1463 { 1464 // Look for the last HARD_SPACE 1465 sal_Int32 nPos = nInsPos - 1; 1466 bool bContinue = true; 1467 while ( bContinue ) 1468 { 1469 const sal_Unicode cTmpChar = rTxt[ nPos ]; 1470 if ( cTmpChar == cNonBreakingSpace ) 1471 { 1472 rDoc.Delete( nPos, nPos + 1 ); 1473 bContinue = false; 1474 } 1475 else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 ) 1476 bContinue = false; 1477 nPos--; 1478 } 1479 } 1480 } 1481 } 1482 } 1483 1484 if( !nInsPos ) 1485 break; 1486 1487 sal_Int32 nPos = nInsPos - 1; 1488 1489 if( IsWordDelim( rTxt[ nPos ])) 1490 break; 1491 1492 // Set bold or underline automatically? 1493 if (('*' == cChar || '_' == cChar || '/' == cChar || '-' == cChar) && (nPos+1 < rTxt.getLength())) 1494 { 1495 if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl ) ) 1496 { 1497 FnChgWeightUnderl( rDoc, rTxt, nPos+1 ); 1498 } 1499 break; 1500 } 1501 1502 while( nPos && !IsWordDelim( rTxt[ --nPos ])) 1503 ; 1504 1505 // Found a Paragraph-start or a Blank, search for the word shortcut in 1506 // auto. 1507 sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character 1508 if( !nPos && !IsWordDelim( rTxt[ 0 ])) 1509 --nCapLttrPos; // begin of paragraph and no blank 1510 1511 const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos ); 1512 CharClass& rCC = GetCharClass( eLang ); 1513 1514 // no symbol characters 1515 if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos )) 1516 break; 1517 1518 if( IsAutoCorrFlag( ACFlags::Autocorrect ) ) 1519 { 1520 // WARNING ATTENTION: rTxt is an alias of the text node's OUString 1521 // and becomes INVALID if ChgAutoCorrWord returns true! 1522 // => use aPara/pPara to create a valid copy of the string! 1523 OUString aPara; 1524 OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr; 1525 1526 bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos, 1527 *this, pPara ); 1528 if( !bChgWord ) 1529 { 1530 sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos; 1531 while( nCapLttrPos1 < nInsPos && 1532 lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] ) 1533 ) 1534 ++nCapLttrPos1; 1535 while( nCapLttrPos1 < nInsPos1 && nInsPos1 && 1536 lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] ) 1537 ) 1538 --nInsPos1; 1539 1540 if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) && 1541 nCapLttrPos1 < nInsPos1 && 1542 rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara )) 1543 { 1544 bChgWord = true; 1545 nCapLttrPos = nCapLttrPos1; 1546 } 1547 } 1548 1549 if( bChgWord ) 1550 { 1551 if( !aPara.isEmpty() ) 1552 { 1553 sal_Int32 nEnd = nCapLttrPos; 1554 while( nEnd < aPara.getLength() && 1555 !IsWordDelim( aPara[ nEnd ])) 1556 ++nEnd; 1557 1558 // Capital letter at beginning of paragraph? 1559 if( IsAutoCorrFlag( ACFlags::CapitalStartSentence ) ) 1560 { 1561 FnCapitalStartSentence( rDoc, aPara, false, 1562 nCapLttrPos, nEnd, eLang ); 1563 } 1564 1565 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) ) 1566 { 1567 FnChgToEnEmDash( rDoc, aPara, nCapLttrPos, nEnd, eLang ); 1568 } 1569 } 1570 break; 1571 } 1572 } 1573 1574 if( IsAutoCorrFlag( ACFlags::TransliterateRTL ) && GetDocLanguage( rDoc, nInsPos ) == LANGUAGE_HUNGARIAN ) 1575 { 1576 // WARNING ATTENTION: rTxt is an alias of the text node's OUString 1577 // and becomes INVALID if TransliterateRTLWord returns true! 1578 if ( rDoc.TransliterateRTLWord( nCapLttrPos, nInsPos ) ) 1579 break; 1580 } 1581 1582 if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber ) && 1583 (nInsPos >= 2 ) && // fdo#69762 avoid autocorrect for 2e-3 1584 ( '-' != cChar || 'E' != rtl::toAsciiUpperCase(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) && 1585 FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) || 1586 ( IsAutoCorrFlag( ACFlags::SetINetAttr ) && 1587 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) && 1588 FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ) 1589 ; 1590 else 1591 { 1592 bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK); 1593 bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos ); 1594 1595 if ( bLockKeyOn && IsAutoCorrFlag( ACFlags::CorrectCapsLock ) && 1596 FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) 1597 { 1598 // Correct accidental use of cAPS LOCK key (do this only when 1599 // the caps or shift lock key is pressed). Turn off the caps 1600 // lock afterwards. 1601 pFrameWin->SimulateKeyPress( KEY_CAPSLOCK ); 1602 } 1603 1604 // Capital letter at beginning of paragraph ? 1605 if( !bUnsupported && 1606 IsAutoCorrFlag( ACFlags::CapitalStartSentence ) ) 1607 { 1608 FnCapitalStartSentence( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang ); 1609 } 1610 1611 // Two capital letters at beginning of word ?? 1612 if( !bUnsupported && 1613 IsAutoCorrFlag( ACFlags::CapitalStartWord ) ) 1614 { 1615 FnCapitalStartWord( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ); 1616 } 1617 1618 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) ) 1619 { 1620 FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ); 1621 } 1622 } 1623 1624 } while( false ); 1625 } 1626 1627 SvxAutoCorrectLanguageLists& SvxAutoCorrect::GetLanguageList_( 1628 LanguageType eLang ) 1629 { 1630 LanguageTag aLanguageTag( eLang); 1631 if (m_aLangTable.find(aLanguageTag) == m_aLangTable.end()) 1632 (void)CreateLanguageFile(aLanguageTag); 1633 return *(m_aLangTable.find(aLanguageTag)->second); 1634 } 1635 1636 void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang ) 1637 { 1638 auto const iter = m_aLangTable.find(LanguageTag(eLang)); 1639 if (iter != m_aLangTable.end() && iter->second) 1640 iter->second->SaveCplSttExceptList(); 1641 else 1642 { 1643 SAL_WARN("editeng", "Save an empty list? "); 1644 } 1645 } 1646 1647 void SvxAutoCorrect::SaveWrdSttExceptList(LanguageType eLang) 1648 { 1649 auto const iter = m_aLangTable.find(LanguageTag(eLang)); 1650 if (iter != m_aLangTable.end() && iter->second) 1651 iter->second->SaveWrdSttExceptList(); 1652 else 1653 { 1654 SAL_WARN("editeng", "Save an empty list? "); 1655 } 1656 } 1657 1658 // Adds a single word. The list will immediately be written to the file! 1659 bool SvxAutoCorrect::AddCplSttException( const OUString& rNew, 1660 LanguageType eLang ) 1661 { 1662 SvxAutoCorrectLanguageLists* pLists = nullptr; 1663 // either the right language is present or it will be this in the general list 1664 auto iter = m_aLangTable.find(LanguageTag(eLang)); 1665 if (iter != m_aLangTable.end()) 1666 pLists = iter->second.get(); 1667 else 1668 { 1669 LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED); 1670 iter = m_aLangTable.find(aLangTagUndetermined); 1671 if (iter != m_aLangTable.end()) 1672 pLists = iter->second.get(); 1673 else if(CreateLanguageFile(aLangTagUndetermined)) 1674 pLists = m_aLangTable.find(aLangTagUndetermined)->second.get(); 1675 } 1676 OSL_ENSURE(pLists, "No auto correction data"); 1677 return pLists && pLists->AddToCplSttExceptList(rNew); 1678 } 1679 1680 // Adds a single word. The list will immediately be written to the file! 1681 bool SvxAutoCorrect::AddWrtSttException( const OUString& rNew, 1682 LanguageType eLang ) 1683 { 1684 SvxAutoCorrectLanguageLists* pLists = nullptr; 1685 //either the right language is present or it is set in the general list 1686 auto iter = m_aLangTable.find(LanguageTag(eLang)); 1687 if (iter != m_aLangTable.end()) 1688 pLists = iter->second.get(); 1689 else 1690 { 1691 LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED); 1692 iter = m_aLangTable.find(aLangTagUndetermined); 1693 if (iter != m_aLangTable.end()) 1694 pLists = iter->second.get(); 1695 else if(CreateLanguageFile(aLangTagUndetermined)) 1696 pLists = m_aLangTable.find(aLangTagUndetermined)->second.get(); 1697 } 1698 OSL_ENSURE(pLists, "No auto correction file!"); 1699 return pLists && pLists->AddToWrdSttExceptList(rNew); 1700 } 1701 1702 OUString SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const OUString& rTxt, 1703 sal_Int32 nPos) 1704 { 1705 OUString sRet; 1706 if( !nPos ) 1707 return sRet; 1708 1709 sal_Int32 nEnd = nPos; 1710 1711 // it must be followed by a blank or tab! 1712 if( ( nPos < rTxt.getLength() && 1713 !IsWordDelim( rTxt[ nPos ])) || 1714 IsWordDelim( rTxt[ --nPos ])) 1715 return sRet; 1716 1717 while( nPos && !IsWordDelim( rTxt[ --nPos ])) 1718 ; 1719 1720 // Found a Paragraph-start or a Blank, search for the word shortcut in 1721 // auto. 1722 sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character 1723 if( !nPos && !IsWordDelim( rTxt[ 0 ])) 1724 --nCapLttrPos; // Beginning of paragraph and no Blank! 1725 1726 while( lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) ) 1727 if( ++nCapLttrPos >= nEnd ) 1728 return sRet; 1729 1730 if( 3 > nEnd - nCapLttrPos ) 1731 return sRet; 1732 1733 const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos ); 1734 1735 CharClass& rCC = GetCharClass(eLang); 1736 1737 if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnd )) 1738 return sRet; 1739 1740 sRet = rTxt.copy( nCapLttrPos, nEnd - nCapLttrPos ); 1741 return sRet; 1742 } 1743 1744 // static 1745 std::vector<OUString> SvxAutoCorrect::GetChunkForAutoText(const OUString& rTxt, 1746 const sal_Int32 nPos) 1747 { 1748 constexpr sal_Int32 nMinLen = 3; 1749 constexpr sal_Int32 nMaxLen = 9; 1750 std::vector<OUString> aRes; 1751 if (nPos >= nMinLen) 1752 { 1753 sal_Int32 nBegin = std::max<sal_Int32>(nPos - nMaxLen, 0); 1754 // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation) 1755 if (nBegin > 0 && !IsWordDelim(rTxt[nBegin-1])) 1756 { 1757 while (nBegin + nMinLen <= nPos && !IsWordDelim(rTxt[nBegin])) 1758 ++nBegin; 1759 } 1760 if (nBegin + nMinLen <= nPos) 1761 { 1762 OUString sRes = rTxt.copy(nBegin, nPos - nBegin); 1763 aRes.push_back(sRes); 1764 bool bLastStartedWithDelim = IsWordDelim(sRes[0]); 1765 for (sal_Int32 i = 1; i <= sRes.getLength() - nMinLen; ++i) 1766 { 1767 bool bAdd = bLastStartedWithDelim; 1768 bLastStartedWithDelim = IsWordDelim(sRes[i]); 1769 bAdd = bAdd || bLastStartedWithDelim; 1770 if (bAdd) 1771 aRes.push_back(sRes.copy(i)); 1772 } 1773 } 1774 } 1775 return aRes; 1776 } 1777 1778 bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile ) 1779 { 1780 OSL_ENSURE(m_aLangTable.find(rLanguageTag) == m_aLangTable.end(), "Language already exists "); 1781 1782 OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true )); 1783 OUString sShareDirFile( sUserDirFile ); 1784 1785 SvxAutoCorrectLanguageLists* pLists = nullptr; 1786 1787 tools::Time nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM ), nLastCheckTime( tools::Time::EMPTY ); 1788 1789 auto nFndPos = aLastFileTable.find(rLanguageTag); 1790 if(nFndPos != aLastFileTable.end() && 1791 (nLastCheckTime.SetTime(nFndPos->second), nLastCheckTime < nAktTime) && 1792 nAktTime - nLastCheckTime < nMinTime) 1793 { 1794 // no need to test the file, because the last check is not older then 1795 // 2 minutes. 1796 if( bNewFile ) 1797 { 1798 sShareDirFile = sUserDirFile; 1799 pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile ); 1800 LanguageTag aTmp(rLanguageTag); // this insert() needs a non-const reference 1801 m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists))); 1802 aLastFileTable.erase(nFndPos); 1803 } 1804 } 1805 else if( 1806 ( FStatHelper::IsDocument( sUserDirFile ) || 1807 FStatHelper::IsDocument( sShareDirFile = 1808 GetAutoCorrFileName( rLanguageTag ) ) || 1809 FStatHelper::IsDocument( sShareDirFile = 1810 GetAutoCorrFileName( rLanguageTag, false, false, true) ) 1811 ) || 1812 ( sShareDirFile = sUserDirFile, bNewFile ) 1813 ) 1814 { 1815 pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile ); 1816 LanguageTag aTmp(rLanguageTag); // this insert() needs a non-const reference 1817 m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists))); 1818 if (nFndPos != aLastFileTable.end()) 1819 aLastFileTable.erase(nFndPos); 1820 } 1821 else if( !bNewFile ) 1822 { 1823 aLastFileTable[rLanguageTag] = nAktTime.GetTime(); 1824 } 1825 return pLists != nullptr; 1826 } 1827 1828 bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong, 1829 LanguageType eLang ) 1830 { 1831 LanguageTag aLanguageTag( eLang); 1832 auto const iter = m_aLangTable.find(aLanguageTag); 1833 if (iter != m_aLangTable.end()) 1834 return iter->second->PutText(rShort, rLong); 1835 if(CreateLanguageFile(aLanguageTag)) 1836 return m_aLangTable.find(aLanguageTag)->second->PutText(rShort, rLong); 1837 return false; 1838 } 1839 1840 void SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, 1841 std::vector<SvxAutocorrWord>& aDeleteEntries, 1842 LanguageType eLang ) 1843 { 1844 LanguageTag aLanguageTag( eLang); 1845 auto const iter = m_aLangTable.find(aLanguageTag); 1846 if (iter != m_aLangTable.end()) 1847 { 1848 iter->second->MakeCombinedChanges( aNewEntries, aDeleteEntries ); 1849 } 1850 else if(CreateLanguageFile( aLanguageTag )) 1851 { 1852 m_aLangTable.find( aLanguageTag )->second->MakeCombinedChanges( aNewEntries, aDeleteEntries ); 1853 } 1854 } 1855 1856 // - return the replacement text (only for SWG-Format, all other 1857 // can be taken from the word list!) 1858 bool SvxAutoCorrect::GetLongText( const OUString&, OUString& ) 1859 { 1860 return false; 1861 } 1862 1863 void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& ) 1864 { 1865 } 1866 1867 // Text with attribution (only the SWG - SWG format!) 1868 bool SvxAutoCorrect::PutText( const css::uno::Reference < css::embed::XStorage >&, 1869 const OUString&, const OUString&, SfxObjectShell&, OUString& ) 1870 { 1871 return false; 1872 } 1873 1874 OUString EncryptBlockName_Imp(const OUString& rName) 1875 { 1876 OUStringBuffer aName; 1877 aName.append('#').append(rName); 1878 for (sal_Int32 nLen = rName.getLength(), nPos = 1; nPos < nLen; ++nPos) 1879 { 1880 if (lcl_IsInAsciiArr( "!/:.\\", aName[nPos])) 1881 aName[nPos] &= 0x0f; 1882 } 1883 return aName.makeStringAndClear(); 1884 } 1885 1886 /* This code is copied from SwXMLTextBlocks::GeneratePackageName */ 1887 static void GeneratePackageName ( std::u16string_view rShort, OUString& rPackageName ) 1888 { 1889 OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7)); 1890 OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US)); 1891 1892 for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos) 1893 { 1894 switch (aBuf[nPos]) 1895 { 1896 case '!': 1897 case '/': 1898 case ':': 1899 case '.': 1900 case '\\': 1901 aBuf[nPos] = '_'; 1902 break; 1903 default: 1904 break; 1905 } 1906 } 1907 1908 rPackageName = aBuf.makeStringAndClear(); 1909 } 1910 1911 static const SvxAutocorrWord* lcl_SearchWordsInList( 1912 SvxAutoCorrectLanguageLists* pList, const OUString& rTxt, 1913 sal_Int32& rStt, sal_Int32 nEndPos) 1914 { 1915 const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList(); 1916 return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos ); 1917 } 1918 1919 // the search for the words in the substitution table 1920 const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList( 1921 const OUString& rTxt, sal_Int32& rStt, sal_Int32 nEndPos, 1922 SvxAutoCorrDoc&, LanguageTag& rLang ) 1923 { 1924 const SvxAutocorrWord* pRet = nullptr; 1925 LanguageTag aLanguageTag( rLang); 1926 if( aLanguageTag.isSystemLocale() ) 1927 aLanguageTag.reset( MsLangId::getSystemLanguage()); 1928 1929 /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback 1930 * list instead? */ 1931 1932 // First search for eLang, then US-English -> English 1933 // and last in LANGUAGE_UNDETERMINED 1934 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) 1935 { 1936 //the language is available - so bring it on 1937 std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second; 1938 pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos ); 1939 if( pRet ) 1940 { 1941 rLang = aLanguageTag; 1942 return pRet; 1943 } 1944 else 1945 return nullptr; 1946 } 1947 1948 // If it still could not be found here, then keep on searching 1949 LanguageType eLang = aLanguageTag.getLanguageType(); 1950 // the primary language for example EN 1951 aLanguageTag.reset(aLanguageTag.getLanguage()); 1952 LanguageType nTmpKey = aLanguageTag.getLanguageType(false); 1953 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED && 1954 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || 1955 CreateLanguageFile(aLanguageTag, false))) 1956 { 1957 //the language is available - so bring it on 1958 std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second; 1959 pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos ); 1960 if( pRet ) 1961 { 1962 rLang = aLanguageTag; 1963 return pRet; 1964 } 1965 } 1966 1967 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() || 1968 CreateLanguageFile(aLanguageTag, false)) 1969 { 1970 //the language is available - so bring it on 1971 std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second; 1972 pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos ); 1973 if( pRet ) 1974 { 1975 rLang = aLanguageTag; 1976 return pRet; 1977 } 1978 } 1979 return nullptr; 1980 } 1981 1982 bool SvxAutoCorrect::FindInWrdSttExceptList( LanguageType eLang, 1983 const OUString& sWord ) 1984 { 1985 LanguageTag aLanguageTag( eLang); 1986 1987 /* TODO-BCP47: again horrible ugliness */ 1988 1989 // First search for eLang, then primary language of eLang 1990 // and last in LANGUAGE_UNDETERMINED 1991 1992 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) 1993 { 1994 //the language is available - so bring it on 1995 auto const& pList = m_aLangTable.find(aLanguageTag)->second; 1996 if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() ) 1997 return true; 1998 } 1999 2000 // If it still could not be found here, then keep on searching 2001 // the primary language for example EN 2002 aLanguageTag.reset(aLanguageTag.getLanguage()); 2003 LanguageType nTmpKey = aLanguageTag.getLanguageType(false); 2004 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED && 2005 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || 2006 CreateLanguageFile(aLanguageTag, false))) 2007 { 2008 //the language is available - so bring it on 2009 auto const& pList = m_aLangTable.find(aLanguageTag)->second; 2010 if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() ) 2011 return true; 2012 } 2013 2014 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() || 2015 CreateLanguageFile(aLanguageTag, false)) 2016 { 2017 //the language is available - so bring it on 2018 auto const& pList = m_aLangTable.find(aLanguageTag)->second; 2019 if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() ) 2020 return true; 2021 } 2022 return false; 2023 } 2024 2025 static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord) 2026 { 2027 SvStringsISortDtor::const_iterator it = pList->find( "~" ); 2028 SvStringsISortDtor::size_type nPos = it - pList->begin(); 2029 if( nPos < pList->size() ) 2030 { 2031 OUString sLowerWord(sWord.toAsciiLowerCase()); 2032 OUString sAbr; 2033 for( SvStringsISortDtor::size_type n = nPos; n < pList->size(); ++n ) 2034 { 2035 sAbr = (*pList)[ n ]; 2036 if (sAbr[0] != '~') 2037 break; 2038 // ~ and ~. are not allowed! 2039 if( 2 < sAbr.getLength() && sAbr.getLength() - 1 <= sWord.getLength() ) 2040 { 2041 OUString sLowerAbk(sAbr.toAsciiLowerCase()); 2042 for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;) 2043 { 2044 if( !--i ) // agrees 2045 return true; 2046 2047 if( sLowerAbk[i] != sLowerWord[--ii]) 2048 break; 2049 } 2050 } 2051 } 2052 } 2053 OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ), 2054 "Wrongly sorted exception list?" ); 2055 return false; 2056 } 2057 2058 bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang, 2059 const OUString& sWord, bool bAbbreviation) 2060 { 2061 LanguageTag aLanguageTag( eLang); 2062 2063 /* TODO-BCP47: did I mention terrible horrible ugliness? */ 2064 2065 // First search for eLang, then primary language of eLang 2066 // and last in LANGUAGE_UNDETERMINED 2067 2068 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) 2069 { 2070 //the language is available - so bring it on 2071 const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList(); 2072 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() ) 2073 return true; 2074 } 2075 2076 // If it still could not be found here, then keep on searching 2077 // the primary language for example EN 2078 aLanguageTag.reset(aLanguageTag.getLanguage()); 2079 LanguageType nTmpKey = aLanguageTag.getLanguageType(false); 2080 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED && 2081 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || 2082 CreateLanguageFile(aLanguageTag, false))) 2083 { 2084 //the language is available - so bring it on 2085 const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList(); 2086 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() ) 2087 return true; 2088 } 2089 2090 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() || 2091 CreateLanguageFile(aLanguageTag, false)) 2092 { 2093 //the language is available - so bring it on 2094 const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList(); 2095 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() ) 2096 return true; 2097 } 2098 return false; 2099 } 2100 2101 OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag, 2102 bool bNewFile, bool bTst, bool bUnlocalized ) const 2103 { 2104 OUString sRet, sExt( rLanguageTag.getBcp47() ); 2105 if (bUnlocalized) 2106 { 2107 // we don't want variant, so we'll take "fr" instead of "fr-CA" for example 2108 std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false); 2109 if (!vecFallBackStrings.empty()) 2110 sExt = vecFallBackStrings[0]; 2111 } 2112 2113 sExt = "_" + sExt + ".dat"; 2114 if( bNewFile ) 2115 sRet = sUserAutoCorrFile + sExt; 2116 else if( !bTst ) 2117 sRet = sShareAutoCorrFile + sExt; 2118 else 2119 { 2120 // test first in the user directory - if not exist, then 2121 sRet = sUserAutoCorrFile + sExt; 2122 if( !FStatHelper::IsDocument( sRet )) 2123 sRet = sShareAutoCorrFile + sExt; 2124 } 2125 return sRet; 2126 } 2127 2128 SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists( 2129 SvxAutoCorrect& rParent, 2130 const OUString& rShareAutoCorrectFile, 2131 const OUString& rUserAutoCorrectFile) 2132 : sShareAutoCorrFile( rShareAutoCorrectFile ), 2133 sUserAutoCorrFile( rUserAutoCorrectFile ), 2134 aModifiedDate( Date::EMPTY ), 2135 aModifiedTime( tools::Time::EMPTY ), 2136 aLastCheckTime( tools::Time::EMPTY ), 2137 rAutoCorrect(rParent), 2138 nFlags(ACFlags::NONE) 2139 { 2140 } 2141 2142 SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists() 2143 { 2144 } 2145 2146 bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp() 2147 { 2148 // Access the file system only every 2 minutes to check the date stamp 2149 bool bRet = false; 2150 2151 tools::Time nMinTime( 0, 2 ); 2152 tools::Time nAktTime( tools::Time::SYSTEM ); 2153 if( aLastCheckTime <= nAktTime) // overflow? 2154 return false; 2155 nAktTime -= aLastCheckTime; 2156 if( nAktTime > nMinTime ) // min time past 2157 { 2158 Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY ); 2159 if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile, 2160 &aTstDate, &aTstTime ) && 2161 ( aModifiedDate != aTstDate || aModifiedTime != aTstTime )) 2162 { 2163 bRet = true; 2164 // then remove all the lists fast! 2165 if( (ACFlags::CplSttLstLoad & nFlags) && pCplStt_ExcptLst ) 2166 { 2167 pCplStt_ExcptLst.reset(); 2168 } 2169 if( (ACFlags::WrdSttLstLoad & nFlags) && pWrdStt_ExcptLst ) 2170 { 2171 pWrdStt_ExcptLst.reset(); 2172 } 2173 if( (ACFlags::ChgWordLstLoad & nFlags) && pAutocorr_List ) 2174 { 2175 pAutocorr_List.reset(); 2176 } 2177 nFlags &= ~ACFlags(ACFlags::CplSttLstLoad | ACFlags::WrdSttLstLoad | ACFlags::ChgWordLstLoad ); 2178 } 2179 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2180 } 2181 return bRet; 2182 } 2183 2184 void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp( 2185 std::unique_ptr<SvStringsISortDtor>& rpLst, 2186 const OUString& sStrmName, 2187 tools::SvRef<SotStorage>& rStg) 2188 { 2189 if( rpLst ) 2190 rpLst->clear(); 2191 else 2192 rpLst.reset( new SvStringsISortDtor ); 2193 2194 { 2195 if( rStg.is() && rStg->IsStream( sStrmName ) ) 2196 { 2197 tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName, 2198 ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) ); 2199 if( ERRCODE_NONE != xStrm->GetError()) 2200 { 2201 xStrm.clear(); 2202 rStg.clear(); 2203 RemoveStream_Imp( sStrmName ); 2204 } 2205 else 2206 { 2207 uno::Reference< uno::XComponentContext > xContext = 2208 comphelper::getProcessComponentContext(); 2209 2210 xml::sax::InputSource aParserInput; 2211 aParserInput.sSystemId = sStrmName; 2212 2213 xStrm->Seek( 0 ); 2214 xStrm->SetBufferSize( 8 * 1024 ); 2215 aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm ); 2216 2217 // get filter 2218 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLExceptionListImport ( xContext, *rpLst ); 2219 2220 // connect parser and filter 2221 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create( xContext ); 2222 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler; 2223 xParser->setFastDocumentHandler( xFilter ); 2224 xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE ); 2225 xParser->setTokenHandler( xTokenHandler ); 2226 2227 // parse 2228 try 2229 { 2230 xParser->parseStream( aParserInput ); 2231 } 2232 catch( const xml::sax::SAXParseException& ) 2233 { 2234 // re throw ? 2235 } 2236 catch( const xml::sax::SAXException& ) 2237 { 2238 // re throw ? 2239 } 2240 catch( const io::IOException& ) 2241 { 2242 // re throw ? 2243 } 2244 } 2245 } 2246 2247 // Set time stamp 2248 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile, 2249 &aModifiedDate, &aModifiedTime ); 2250 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2251 } 2252 2253 } 2254 2255 void SvxAutoCorrectLanguageLists::SaveExceptList_Imp( 2256 const SvStringsISortDtor& rLst, 2257 const OUString& sStrmName, 2258 tools::SvRef<SotStorage> const &rStg, 2259 bool bConvert ) 2260 { 2261 if( !rStg.is() ) 2262 return; 2263 2264 if( rLst.empty() ) 2265 { 2266 rStg->Remove( sStrmName ); 2267 rStg->Commit(); 2268 } 2269 else 2270 { 2271 tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName, 2272 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) ); 2273 if( xStrm.is() ) 2274 { 2275 xStrm->SetSize( 0 ); 2276 xStrm->SetBufferSize( 8192 ); 2277 xStrm->SetProperty( "MediaType", Any(OUString( "text/xml" )) ); 2278 2279 2280 uno::Reference< uno::XComponentContext > xContext = 2281 comphelper::getProcessComponentContext(); 2282 2283 uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext); 2284 uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm ); 2285 xWriter->setOutputStream(xOut); 2286 2287 uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW); 2288 rtl::Reference< SvXMLExceptionListExport > xExp( new SvXMLExceptionListExport( xContext, rLst, sStrmName, xHandler ) ); 2289 2290 xExp->exportDoc( XML_BLOCK_LIST ); 2291 2292 xStrm->Commit(); 2293 if( xStrm->GetError() == ERRCODE_NONE ) 2294 { 2295 xStrm.clear(); 2296 if (!bConvert) 2297 { 2298 rStg->Commit(); 2299 if( ERRCODE_NONE != rStg->GetError() ) 2300 { 2301 rStg->Remove( sStrmName ); 2302 rStg->Commit(); 2303 } 2304 } 2305 } 2306 } 2307 } 2308 } 2309 2310 SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList() 2311 { 2312 if( pAutocorr_List ) 2313 pAutocorr_List->DeleteAndDestroyAll(); 2314 else 2315 pAutocorr_List.reset( new SvxAutocorrWordList() ); 2316 2317 try 2318 { 2319 uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ ); 2320 uno::Reference < io::XStream > xStrm = xStg->openStreamElement( pXMLImplAutocorr_ListStr, embed::ElementModes::READ ); 2321 uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext(); 2322 2323 xml::sax::InputSource aParserInput; 2324 aParserInput.sSystemId = pXMLImplAutocorr_ListStr; 2325 aParserInput.aInputStream = xStrm->getInputStream(); 2326 2327 // get parser 2328 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext); 2329 SAL_INFO("editeng", "AutoCorrect Import" ); 2330 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List.get(), rAutoCorrect, xStg ); 2331 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler; 2332 2333 // connect parser and filter 2334 xParser->setFastDocumentHandler( xFilter ); 2335 xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE ); 2336 xParser->setTokenHandler(xTokenHandler); 2337 2338 // parse 2339 xParser->parseStream( aParserInput ); 2340 } 2341 catch ( const uno::Exception& ) 2342 { 2343 TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile); 2344 } 2345 2346 // Set time stamp 2347 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile, 2348 &aModifiedDate, &aModifiedTime ); 2349 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2350 2351 return pAutocorr_List.get(); 2352 } 2353 2354 const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList() 2355 { 2356 if( !( ACFlags::ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() ) 2357 { 2358 LoadAutocorrWordList(); 2359 if( !pAutocorr_List ) 2360 { 2361 OSL_ENSURE( false, "No valid list" ); 2362 pAutocorr_List.reset( new SvxAutocorrWordList() ); 2363 } 2364 nFlags |= ACFlags::ChgWordLstLoad; 2365 } 2366 return pAutocorr_List.get(); 2367 } 2368 2369 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList() 2370 { 2371 if( !( ACFlags::CplSttLstLoad & nFlags ) || IsFileChanged_Imp() ) 2372 { 2373 LoadCplSttExceptList(); 2374 if( !pCplStt_ExcptLst ) 2375 { 2376 OSL_ENSURE( false, "No valid list" ); 2377 pCplStt_ExcptLst.reset( new SvStringsISortDtor ); 2378 } 2379 nFlags |= ACFlags::CplSttLstLoad; 2380 } 2381 return pCplStt_ExcptLst.get(); 2382 } 2383 2384 bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew) 2385 { 2386 bool bRet = false; 2387 if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second ) 2388 { 2389 MakeUserStorage_Impl(); 2390 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2391 2392 SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg ); 2393 2394 xStg = nullptr; 2395 // Set time stamp 2396 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, 2397 &aModifiedDate, &aModifiedTime ); 2398 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2399 bRet = true; 2400 } 2401 return bRet; 2402 } 2403 2404 bool SvxAutoCorrectLanguageLists::AddToWrdSttExceptList(const OUString& rNew) 2405 { 2406 bool bRet = false; 2407 SvStringsISortDtor* pExceptList = LoadWrdSttExceptList(); 2408 if( !rNew.isEmpty() && pExceptList && pExceptList->insert( rNew ).second ) 2409 { 2410 MakeUserStorage_Impl(); 2411 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2412 2413 SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg ); 2414 2415 xStg = nullptr; 2416 // Set time stamp 2417 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, 2418 &aModifiedDate, &aModifiedTime ); 2419 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2420 bRet = true; 2421 } 2422 return bRet; 2423 } 2424 2425 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList() 2426 { 2427 try 2428 { 2429 tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE ); 2430 if( xStg.is() && xStg->IsContained( pXMLImplCplStt_ExcptLstStr ) ) 2431 LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg ); 2432 } 2433 catch (const css::ucb::ContentCreationException&) 2434 { 2435 } 2436 return pCplStt_ExcptLst.get(); 2437 } 2438 2439 void SvxAutoCorrectLanguageLists::SaveCplSttExceptList() 2440 { 2441 MakeUserStorage_Impl(); 2442 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2443 2444 SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg ); 2445 2446 xStg = nullptr; 2447 2448 // Set time stamp 2449 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, 2450 &aModifiedDate, &aModifiedTime ); 2451 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2452 } 2453 2454 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWrdSttExceptList() 2455 { 2456 try 2457 { 2458 tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE ); 2459 if( xStg.is() && xStg->IsContained( pXMLImplWrdStt_ExcptLstStr ) ) 2460 LoadXMLExceptList_Imp( pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg ); 2461 } 2462 catch (const css::ucb::ContentCreationException &) 2463 { 2464 TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWrdSttExceptList"); 2465 } 2466 return pWrdStt_ExcptLst.get(); 2467 } 2468 2469 void SvxAutoCorrectLanguageLists::SaveWrdSttExceptList() 2470 { 2471 MakeUserStorage_Impl(); 2472 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2473 2474 SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg ); 2475 2476 xStg = nullptr; 2477 // Set time stamp 2478 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, 2479 &aModifiedDate, &aModifiedTime ); 2480 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2481 } 2482 2483 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWrdSttExceptList() 2484 { 2485 if( !( ACFlags::WrdSttLstLoad & nFlags ) || IsFileChanged_Imp() ) 2486 { 2487 LoadWrdSttExceptList(); 2488 if( !pWrdStt_ExcptLst ) 2489 { 2490 OSL_ENSURE( false, "No valid list" ); 2491 pWrdStt_ExcptLst.reset( new SvStringsISortDtor ); 2492 } 2493 nFlags |= ACFlags::WrdSttLstLoad; 2494 } 2495 return pWrdStt_ExcptLst.get(); 2496 } 2497 2498 void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName ) 2499 { 2500 if( sShareAutoCorrFile != sUserAutoCorrFile ) 2501 { 2502 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2503 if( xStg.is() && ERRCODE_NONE == xStg->GetError() && 2504 xStg->IsStream( rName ) ) 2505 { 2506 xStg->Remove( rName ); 2507 xStg->Commit(); 2508 2509 xStg = nullptr; 2510 } 2511 } 2512 } 2513 2514 void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl() 2515 { 2516 // The conversion needs to happen if the file is already in the user 2517 // directory and is in the old format. Additionally it needs to 2518 // happen when the file is being copied from share to user. 2519 2520 bool bError = false, bConvert = false, bCopy = false; 2521 INetURLObject aDest; 2522 INetURLObject aSource; 2523 2524 if (sUserAutoCorrFile != sShareAutoCorrFile ) 2525 { 2526 aSource = INetURLObject ( sShareAutoCorrFile ); 2527 aDest = INetURLObject ( sUserAutoCorrFile ); 2528 if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) ) 2529 { 2530 aDest.SetExtension ( "bak" ); 2531 bConvert = true; 2532 } 2533 bCopy = true; 2534 } 2535 else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) ) 2536 { 2537 aSource = INetURLObject ( sUserAutoCorrFile ); 2538 aDest = INetURLObject ( sUserAutoCorrFile ); 2539 aDest.SetExtension ( "bak" ); 2540 bCopy = bConvert = true; 2541 } 2542 if (bCopy) 2543 { 2544 try 2545 { 2546 OUString sMain(aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri )); 2547 sal_Int32 nSlashPos = sMain.lastIndexOf('/'); 2548 sMain = sMain.copy(0, nSlashPos); 2549 ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() ); 2550 TransferInfo aInfo; 2551 aInfo.NameClash = NameClash::OVERWRITE; 2552 aInfo.NewTitle = aDest.GetLastName(); 2553 aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ); 2554 aInfo.MoveData = false; 2555 aNewContent.executeCommand( "transfer", Any(aInfo)); 2556 } 2557 catch (...) 2558 { 2559 bError = true; 2560 } 2561 } 2562 if (bConvert && !bError) 2563 { 2564 tools::SvRef<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), StreamMode::READ ); 2565 tools::SvRef<SotStorage> xDstStg = new SotStorage( sUserAutoCorrFile, StreamMode::WRITE ); 2566 2567 if( xSrcStg.is() && xDstStg.is() ) 2568 { 2569 std::unique_ptr<SvStringsISortDtor> pTmpWordList; 2570 2571 if (xSrcStg->IsContained( pXMLImplWrdStt_ExcptLstStr ) ) 2572 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xSrcStg ); 2573 2574 if (pTmpWordList) 2575 { 2576 SaveExceptList_Imp( *pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xDstStg, true ); 2577 pTmpWordList.reset(); 2578 } 2579 2580 2581 if (xSrcStg->IsContained( pXMLImplCplStt_ExcptLstStr ) ) 2582 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg ); 2583 2584 if (pTmpWordList) 2585 { 2586 SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true ); 2587 pTmpWordList->clear(); 2588 } 2589 2590 GetAutocorrWordList(); 2591 MakeBlocklist_Imp( *xDstStg ); 2592 sShareAutoCorrFile = sUserAutoCorrFile; 2593 xDstStg = nullptr; 2594 try 2595 { 2596 ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() ); 2597 aContent.executeCommand ( "delete", makeAny ( true ) ); 2598 } 2599 catch (...) 2600 { 2601 } 2602 } 2603 } 2604 else if( bCopy && !bError ) 2605 sShareAutoCorrFile = sUserAutoCorrFile; 2606 } 2607 2608 bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg ) 2609 { 2610 bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty(); 2611 if( !bRemove ) 2612 { 2613 tools::SvRef<SotStorageStream> refList = rStg.OpenSotStream( pXMLImplAutocorr_ListStr, 2614 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) ); 2615 if( refList.is() ) 2616 { 2617 refList->SetSize( 0 ); 2618 refList->SetBufferSize( 8192 ); 2619 refList->SetProperty( "MediaType", Any(OUString( "text/xml" )) ); 2620 2621 uno::Reference< uno::XComponentContext > xContext = 2622 comphelper::getProcessComponentContext(); 2623 2624 uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext); 2625 uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList ); 2626 xWriter->setOutputStream(xOut); 2627 2628 rtl::Reference< SvXMLAutoCorrectExport > xExp( new SvXMLAutoCorrectExport( xContext, pAutocorr_List.get(), pXMLImplAutocorr_ListStr, xWriter ) ); 2629 2630 xExp->exportDoc( XML_BLOCK_LIST ); 2631 2632 refList->Commit(); 2633 bRet = ERRCODE_NONE == refList->GetError(); 2634 if( bRet ) 2635 { 2636 refList.clear(); 2637 rStg.Commit(); 2638 if( ERRCODE_NONE != rStg.GetError() ) 2639 { 2640 bRemove = true; 2641 bRet = false; 2642 } 2643 } 2644 } 2645 else 2646 bRet = false; 2647 } 2648 2649 if( bRemove ) 2650 { 2651 rStg.Remove( pXMLImplAutocorr_ListStr ); 2652 rStg.Commit(); 2653 } 2654 2655 return bRet; 2656 } 2657 2658 bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries ) 2659 { 2660 // First get the current list! 2661 GetAutocorrWordList(); 2662 2663 MakeUserStorage_Impl(); 2664 tools::SvRef<SotStorage> xStorage = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2665 2666 bool bRet = xStorage.is() && ERRCODE_NONE == xStorage->GetError(); 2667 2668 if( bRet ) 2669 { 2670 for (SvxAutocorrWord & aWordToDelete : aDeleteEntries) 2671 { 2672 std::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete ); 2673 if( xFoundEntry ) 2674 { 2675 if( !xFoundEntry->IsTextOnly() ) 2676 { 2677 OUString aName( aWordToDelete.GetShort() ); 2678 if (xStorage->IsOLEStorage()) 2679 aName = EncryptBlockName_Imp(aName); 2680 else 2681 GeneratePackageName ( aWordToDelete.GetShort(), aName ); 2682 2683 if( xStorage->IsContained( aName ) ) 2684 { 2685 xStorage->Remove( aName ); 2686 bRet = xStorage->Commit(); 2687 } 2688 } 2689 } 2690 } 2691 2692 for (const SvxAutocorrWord & aNewEntrie : aNewEntries) 2693 { 2694 SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true ); 2695 std::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd ); 2696 if( xRemoved ) 2697 { 2698 if( !xRemoved->IsTextOnly() ) 2699 { 2700 // Still have to remove the Storage 2701 OUString sStorageName( aWordToAdd.GetShort() ); 2702 if (xStorage->IsOLEStorage()) 2703 sStorageName = EncryptBlockName_Imp(sStorageName); 2704 else 2705 GeneratePackageName ( aWordToAdd.GetShort(), sStorageName); 2706 2707 if( xStorage->IsContained( sStorageName ) ) 2708 xStorage->Remove( sStorageName ); 2709 } 2710 } 2711 bRet = pAutocorr_List->Insert( std::move(aWordToAdd) ); 2712 2713 if ( !bRet ) 2714 { 2715 break; 2716 } 2717 } 2718 2719 if ( bRet ) 2720 { 2721 bRet = MakeBlocklist_Imp( *xStorage ); 2722 } 2723 } 2724 return bRet; 2725 } 2726 2727 bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong ) 2728 { 2729 // First get the current list! 2730 GetAutocorrWordList(); 2731 2732 MakeUserStorage_Impl(); 2733 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2734 2735 bool bRet = xStg.is() && ERRCODE_NONE == xStg->GetError(); 2736 2737 // Update the word list 2738 if( bRet ) 2739 { 2740 SvxAutocorrWord aNew(rShort, rLong, true ); 2741 std::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew ); 2742 if( xRemove ) 2743 { 2744 if( !xRemove->IsTextOnly() ) 2745 { 2746 // Still have to remove the Storage 2747 OUString sStgNm( rShort ); 2748 if (xStg->IsOLEStorage()) 2749 sStgNm = EncryptBlockName_Imp(sStgNm); 2750 else 2751 GeneratePackageName ( rShort, sStgNm); 2752 2753 if( xStg->IsContained( sStgNm ) ) 2754 xStg->Remove( sStgNm ); 2755 } 2756 } 2757 2758 if( pAutocorr_List->Insert( std::move(aNew) ) ) 2759 { 2760 bRet = MakeBlocklist_Imp( *xStg ); 2761 xStg = nullptr; 2762 } 2763 else 2764 { 2765 bRet = false; 2766 } 2767 } 2768 return bRet; 2769 } 2770 2771 void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, 2772 SfxObjectShell& rShell ) 2773 { 2774 // First get the current list! 2775 GetAutocorrWordList(); 2776 2777 MakeUserStorage_Impl(); 2778 2779 try 2780 { 2781 uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE ); 2782 OUString sLong; 2783 bool bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong ); 2784 xStg = nullptr; 2785 2786 // Update the word list 2787 if( bRet ) 2788 { 2789 if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) ) 2790 { 2791 tools::SvRef<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2792 MakeBlocklist_Imp( *xStor ); 2793 } 2794 } 2795 } 2796 catch ( const uno::Exception& ) 2797 { 2798 } 2799 } 2800 2801 // Keep the list sorted ... 2802 struct SvxAutocorrWordList::CompareSvxAutocorrWordList 2803 { 2804 bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const 2805 { 2806 CollatorWrapper& rCmp = ::GetCollatorWrapper(); 2807 return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0; 2808 } 2809 }; 2810 2811 namespace { 2812 2813 typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType; 2814 2815 } 2816 2817 struct SvxAutocorrWordList::Impl 2818 { 2819 2820 // only one of these contains the data 2821 // maSortedVector is manually sorted so we can optimise data movement 2822 mutable AutocorrWordSetType maSortedVector; 2823 mutable AutocorrWordHashType maHash; // key is 'Short' 2824 2825 void DeleteAndDestroyAll() 2826 { 2827 maHash.clear(); 2828 maSortedVector.clear(); 2829 } 2830 }; 2831 2832 SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {} 2833 2834 SvxAutocorrWordList::~SvxAutocorrWordList() 2835 { 2836 } 2837 2838 void SvxAutocorrWordList::DeleteAndDestroyAll() 2839 { 2840 mpImpl->DeleteAndDestroyAll(); 2841 } 2842 2843 // returns true if inserted 2844 const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const 2845 { 2846 if ( mpImpl->maSortedVector.empty() ) // use the hash 2847 { 2848 OUString aShort = aWord.GetShort(); 2849 auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) ); 2850 if (inserted) 2851 return &(it->second); 2852 return nullptr; 2853 } 2854 else 2855 { 2856 auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList()); 2857 CollatorWrapper& rCmp = ::GetCollatorWrapper(); 2858 if (it == mpImpl->maSortedVector.end() || rCmp.compareString( aWord.GetShort(), it->GetShort() ) != 0) 2859 { 2860 it = mpImpl->maSortedVector.insert(it, std::move(aWord)); 2861 return &*it; 2862 } 2863 return nullptr; 2864 } 2865 } 2866 2867 void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt) 2868 { 2869 (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt )); 2870 } 2871 2872 bool SvxAutocorrWordList::empty() const 2873 { 2874 return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty(); 2875 } 2876 2877 std::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord *pWord) 2878 { 2879 2880 if ( mpImpl->maSortedVector.empty() ) // use the hash 2881 { 2882 AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() ); 2883 if( it != mpImpl->maHash.end() ) 2884 { 2885 SvxAutocorrWord pMatch = std::move(it->second); 2886 mpImpl->maHash.erase (it); 2887 return pMatch; 2888 } 2889 } 2890 else 2891 { 2892 auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList()); 2893 if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it)) 2894 { 2895 SvxAutocorrWord pMatch = std::move(*it); 2896 mpImpl->maSortedVector.erase (it); 2897 return pMatch; 2898 } 2899 } 2900 return std::optional<SvxAutocorrWord>(); 2901 } 2902 2903 // return the sorted contents - defer sorting until we have to. 2904 const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const 2905 { 2906 // convert from hash to set permanently 2907 if ( mpImpl->maSortedVector.empty() ) 2908 { 2909 std::vector<SvxAutocorrWord> tmp; 2910 tmp.reserve(mpImpl->maHash.size()); 2911 for (auto & rPair : mpImpl->maHash) 2912 tmp.emplace_back(std::move(rPair.second)); 2913 mpImpl->maHash.clear(); 2914 // sort twice - this gets the list into mostly-sorted order, which 2915 // reduces the number of times we need to invoke the expensive ICU collate fn. 2916 std::sort(tmp.begin(), tmp.end(), 2917 [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) 2918 { 2919 return lhs.GetShort() < rhs.GetShort(); 2920 }); 2921 // This beast has some O(N log(N)) in a terribly slow ICU collate fn. 2922 // stable_sort is twice as fast as sort in this situation because it does 2923 // fewer comparison operations. 2924 std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList()); 2925 mpImpl->maSortedVector = std::move(tmp); 2926 } 2927 return mpImpl->maSortedVector; 2928 } 2929 2930 const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd, 2931 const OUString &rTxt, 2932 sal_Int32 &rStt, 2933 sal_Int32 nEndPos) const 2934 { 2935 const OUString& rChk = pFnd->GetShort(); 2936 2937 sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern? 2938 sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern? 2939 sal_Int32 nSttWdPos = nEndPos; 2940 2941 // direct replacement of keywords surrounded by colons (for example, ":name:") 2942 bool bColonNameColon = rTxt.getLength() > nEndPos && 2943 rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":"); 2944 if ( nEndPos + (bColonNameColon ? 1 : 0) >= rChk.getLength() - left_wildcard - right_wildcard ) 2945 { 2946 2947 bool bWasWordDelim = false; 2948 sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard; 2949 if (bColonNameColon) 2950 nCalcStt++; 2951 if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon || 2952 ( nCalcStt < rStt && 2953 IsWordDelim( rTxt[ nCalcStt - 1 ] ))) ) 2954 { 2955 TransliterationWrapper& rCmp = GetIgnoreTranslWrapper(); 2956 OUString sWord = rTxt.copy(nCalcStt, rChk.getLength() - left_wildcard); 2957 if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) )) 2958 { 2959 rStt = nCalcStt; 2960 if (!left_wildcard) 2961 { 2962 // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14 2963 if (rTxt.getLength() > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1) 2964 return nullptr; 2965 return pFnd; 2966 } 2967 // get the first word delimiter position before the matching ".*word" pattern 2968 while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ]))) 2969 ; 2970 if (bWasWordDelim) rStt++; 2971 OUString left_pattern = rTxt.copy(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard); 2972 // avoid double spaces before simple "word" replacement 2973 left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().copy(1) : pFnd->GetLong(); 2974 if( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(rTxt.copy(rStt, nEndPos - rStt), left_pattern) ) ) 2975 return pNew; 2976 } 2977 } else 2978 // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support 2979 if ( right_wildcard ) 2980 { 2981 2982 OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) ); 2983 // Get the last word delimiter position 2984 bool not_suffix; 2985 2986 while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]))) 2987 ; 2988 // search the first occurrence (with a left word delimitation, if needed) 2989 sal_Int32 nFndPos = -1; 2990 do { 2991 nFndPos = rTxt.indexOf( sTmp, nFndPos + 1); 2992 if (nFndPos == -1) 2993 break; 2994 not_suffix = bWasWordDelim && (nSttWdPos >= (nFndPos + sTmp.getLength())); 2995 } while ( (!left_wildcard && nFndPos && !IsWordDelim( rTxt[ nFndPos - 1 ])) || not_suffix ); 2996 2997 if ( nFndPos != -1 ) 2998 { 2999 sal_Int32 extra_repl = nFndPos + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:" 3000 3001 if ( left_wildcard ) 3002 { 3003 // get the first word delimiter position before the matching ".*word.*" pattern 3004 while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ]))) 3005 ; 3006 if (bWasWordDelim) nFndPos++; 3007 } 3008 if (nEndPos + extra_repl <= nFndPos) 3009 { 3010 return nullptr; 3011 } 3012 // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations" 3013 OUString aShort = rTxt.copy(nFndPos, nEndPos - nFndPos + extra_repl); 3014 3015 OUString aLong; 3016 rStt = nFndPos; 3017 if ( !left_wildcard ) 3018 { 3019 sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength(); 3020 aLong = pFnd->GetLong() + (siz > 0 ? rTxt.copy(nFndPos + sTmp.getLength(), siz) : ""); 3021 } else { 3022 OUStringBuffer buf; 3023 do { 3024 nSttWdPos = rTxt.indexOf( sTmp, nFndPos); 3025 if (nSttWdPos != -1) 3026 { 3027 sal_Int32 nTmp(nFndPos); 3028 while (nTmp < nSttWdPos && !IsWordDelim(rTxt[nTmp])) 3029 nTmp++; 3030 if (nTmp < nSttWdPos) 3031 break; // word delimiter found 3032 buf.append(rTxt.subView(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong()); 3033 nFndPos = nSttWdPos + sTmp.getLength(); 3034 } 3035 } while (nSttWdPos != -1); 3036 if (nEndPos - nFndPos > extra_repl) 3037 buf.append(rTxt.subView(nFndPos, nEndPos - nFndPos)); 3038 aLong = buf.makeStringAndClear(); 3039 } 3040 if ( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(aShort, aLong) ) ) 3041 { 3042 if ( (rTxt.getLength() > nEndPos && IsWordDelim(rTxt[nEndPos])) || rTxt.getLength() == nEndPos ) 3043 return pNew; 3044 } 3045 } 3046 } 3047 } 3048 return nullptr; 3049 } 3050 3051 const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(const OUString& rTxt, sal_Int32& rStt, 3052 sal_Int32 nEndPos) const 3053 { 3054 for (auto const& elem : mpImpl->maHash) 3055 { 3056 if( const SvxAutocorrWord *pTmp = WordMatches( &elem.second, rTxt, rStt, nEndPos ) ) 3057 return pTmp; 3058 } 3059 3060 for (auto const& elem : mpImpl->maSortedVector) 3061 { 3062 if( const SvxAutocorrWord *pTmp = WordMatches( &elem, rTxt, rStt, nEndPos ) ) 3063 return pTmp; 3064 } 3065 return nullptr; 3066 } 3067 3068 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ 3069
