1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 2 /* 3 * This file is part of the LibreOffice project. 4 * 5 * This Source Code Form is subject to the terms of the Mozilla Public 6 * License, v. 2.0. If a copy of the MPL was not distributed with this 7 * file, You can obtain one at http://mozilla.org/MPL/2.0/. 8 * 9 * This file incorporates work covered by the following license notice: 10 * 11 * Licensed to the Apache Software Foundation (ASF) under one or more 12 * contributor license agreements. See the NOTICE file distributed 13 * with this work for additional information regarding copyright 14 * ownership. The ASF licenses this file to you under the Apache 15 * License, Version 2.0 (the "License"); you may not use this file 16 * except in compliance with the License. You may obtain a copy of 17 * the License at http://www.apache.org/licenses/LICENSE-2.0 . 18 */ 19 20 #include <memory> 21 #include <string_view> 22 #include <sal/config.h> 23 24 #include <com/sun/star/linguistic2/XSpellChecker1.hpp> 25 #include <com/sun/star/embed/XStorage.hpp> 26 #include <com/sun/star/io/IOException.hpp> 27 #include <com/sun/star/io/XStream.hpp> 28 #include <tools/urlobj.hxx> 29 #include <i18nlangtag/mslangid.hxx> 30 #include <i18nutil/transliteration.hxx> 31 #include <sal/log.hxx> 32 #include <osl/diagnose.h> 33 #include <vcl/svapp.hxx> 34 #include <vcl/settings.hxx> 35 #include <svl/fstathelper.hxx> 36 #include <svl/urihelper.hxx> 37 #include <unotools/charclass.hxx> 38 #include <com/sun/star/i18n/UnicodeType.hpp> 39 #include <unotools/collatorwrapper.hxx> 40 #include <com/sun/star/i18n/UnicodeScript.hpp> 41 #include <com/sun/star/i18n/OrdinalSuffix.hpp> 42 #include <unotools/localedatawrapper.hxx> 43 #include <unotools/transliterationwrapper.hxx> 44 #include <comphelper/processfactory.hxx> 45 #include <comphelper/storagehelper.hxx> 46 #include <comphelper/string.hxx> 47 #include <editeng/editids.hrc> 48 #include <sot/storage.hxx> 49 #include <editeng/udlnitem.hxx> 50 #include <editeng/wghtitem.hxx> 51 #include <editeng/postitem.hxx> 52 #include <editeng/crossedoutitem.hxx> 53 #include <editeng/escapementitem.hxx> 54 #include <editeng/svxacorr.hxx> 55 #include <editeng/unolingu.hxx> 56 #include <vcl/window.hxx> 57 #include <com/sun/star/xml/sax/InputSource.hpp> 58 #include <com/sun/star/xml/sax/FastParser.hpp> 59 #include <com/sun/star/xml/sax/Writer.hpp> 60 #include <com/sun/star/xml/sax/SAXParseException.hpp> 61 #include <unotools/streamwrap.hxx> 62 #include "SvXMLAutoCorrectImport.hxx" 63 #include "SvXMLAutoCorrectExport.hxx" 64 #include "SvXMLAutoCorrectTokenHandler.hxx" 65 #include <ucbhelper/content.hxx> 66 #include <com/sun/star/ucb/ContentCreationException.hpp> 67 #include <com/sun/star/ucb/XCommandEnvironment.hpp> 68 #include <com/sun/star/ucb/TransferInfo.hpp> 69 #include <com/sun/star/ucb/NameClash.hpp> 70 #include <tools/diagnose_ex.h> 71 #include <xmloff/xmltoken.hxx> 72 #include <unordered_map> 73 #include <rtl/character.hxx> 74 75 using namespace ::com::sun::star::ucb; 76 using namespace ::com::sun::star::uno; 77 using namespace ::com::sun::star::xml::sax; 78 using namespace ::com::sun::star; 79 using namespace ::xmloff::token; 80 using namespace ::utl; 81 82 namespace { 83 84 enum class Flags { 85 NONE = 0x00, 86 FullStop = 0x01, 87 ExclamationMark = 0x02, 88 QuestionMark = 0x04, 89 }; 90 91 } 92 93 namespace o3tl { 94 template<> struct typed_flags<Flags> : is_typed_flags<Flags, 0x07> {}; 95 } 96 const sal_Unicode cNonBreakingSpace = 0xA0; // UNICODE code for no break space 97 98 constexpr OUStringLiteral pXMLImplWrdStt_ExcptLstStr = u"WordExceptList.xml"; 99 constexpr OUStringLiteral pXMLImplCplStt_ExcptLstStr = u"SentenceExceptList.xml"; 100 constexpr OUStringLiteral pXMLImplAutocorr_ListStr = u"DocumentList.xml"; 101 102 const char 103 /* also at these beginnings - Brackets and all kinds of begin characters */ 104 sImplSttSkipChars[] = "\"\'([{\x83\x84\x89\x91\x92\x93\x94", 105 /* also at these ends - Brackets and all kinds of begin characters */ 106 sImplEndSkipChars[] = "\"\')]}\x83\x84\x89\x91\x92\x93\x94"; 107 108 static OUString EncryptBlockName_Imp(const OUString& rName); 109 110 static bool NonFieldWordDelim( const sal_Unicode c ) 111 { 112 return ' ' == c || '\t' == c || 0x0a == c || 113 cNonBreakingSpace == c || 0x2011 == c; 114 } 115 116 static bool IsWordDelim( const sal_Unicode c ) 117 { 118 return c == 0x1 || NonFieldWordDelim(c); 119 } 120 121 122 static bool IsLowerLetter( sal_Int32 nCharType ) 123 { 124 return CharClass::isLetterType( nCharType ) && 125 ( css::i18n::KCharacterType::LOWER & nCharType); 126 } 127 128 static bool IsUpperLetter( sal_Int32 nCharType ) 129 { 130 return CharClass::isLetterType( nCharType ) && 131 ( css::i18n::KCharacterType::UPPER & nCharType); 132 } 133 134 static bool lcl_IsUnsupportedUnicodeChar( CharClass const & rCC, const OUString& rTxt, 135 sal_Int32 nStt, sal_Int32 nEnd ) 136 { 137 for( ; nStt < nEnd; ++nStt ) 138 { 139 css::i18n::UnicodeScript nScript = rCC.getScript( rTxt, nStt ); 140 switch( nScript ) 141 { 142 case css::i18n::UnicodeScript_kCJKRadicalsSupplement: 143 case css::i18n::UnicodeScript_kHangulJamo: 144 case css::i18n::UnicodeScript_kCJKSymbolPunctuation: 145 case css::i18n::UnicodeScript_kHiragana: 146 case css::i18n::UnicodeScript_kKatakana: 147 case css::i18n::UnicodeScript_kHangulCompatibilityJamo: 148 case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth: 149 case css::i18n::UnicodeScript_kCJKCompatibility: 150 case css::i18n::UnicodeScript_k_CJKUnifiedIdeographsExtensionA: 151 case css::i18n::UnicodeScript_kCJKUnifiedIdeograph: 152 case css::i18n::UnicodeScript_kHangulSyllable: 153 case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph: 154 case css::i18n::UnicodeScript_kHalfwidthFullwidthForm: 155 return true; 156 default: ; //do nothing 157 } 158 } 159 return false; 160 } 161 162 static bool lcl_IsSymbolChar( CharClass const & rCC, const OUString& rTxt, 163 sal_Int32 nStt, sal_Int32 nEnd ) 164 { 165 for( ; nStt < nEnd; ++nStt ) 166 { 167 if( css::i18n::UnicodeType::PRIVATE_USE == rCC.getType( rTxt, nStt )) 168 return true; 169 } 170 return false; 171 } 172 173 static bool lcl_IsInAsciiArr( const char* pArr, const sal_Unicode c ) 174 { 175 // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks 176 if ( 0x2018 <= c && c <= 0x201F && (pArr == sImplSttSkipChars || pArr == sImplEndSkipChars) ) 177 return true; 178 179 bool bRet = false; 180 for( ; *pArr; ++pArr ) 181 if( *pArr == c ) 182 { 183 bRet = true; 184 break; 185 } 186 return bRet; 187 } 188 189 SvxAutoCorrDoc::~SvxAutoCorrDoc() 190 { 191 } 192 193 // Called by the functions: 194 // - FnCapitalStartWord 195 // - FnCapitalStartSentence 196 // after the exchange of characters. Then the words, if necessary, can be inserted 197 // into the exception list. 198 void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&, 199 sal_Unicode ) 200 { 201 } 202 203 LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32 ) const 204 { 205 return LANGUAGE_SYSTEM; 206 } 207 208 static const LanguageTag& GetAppLang() 209 { 210 return Application::GetSettings().GetLanguageTag(); 211 } 212 213 /// Never use an unresolved LANGUAGE_SYSTEM. 214 static LanguageType GetDocLanguage( const SvxAutoCorrDoc& rDoc, sal_Int32 nPos ) 215 { 216 LanguageType eLang = rDoc.GetLanguage( nPos ); 217 if (eLang == LANGUAGE_SYSTEM) 218 eLang = GetAppLang().getLanguageType(); // the current work locale 219 return eLang; 220 } 221 222 static LocaleDataWrapper& GetLocaleDataWrapper( LanguageType nLang ) 223 { 224 static LocaleDataWrapper aLclDtWrp( GetAppLang() ); 225 LanguageTag aLcl( nLang ); 226 const LanguageTag& rLcl = aLclDtWrp.getLoadedLanguageTag(); 227 if( aLcl != rLcl ) 228 aLclDtWrp.setLanguageTag( aLcl ); 229 return aLclDtWrp; 230 } 231 static TransliterationWrapper& GetIgnoreTranslWrapper() 232 { 233 static int bIsInit = 0; 234 static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(), 235 TransliterationFlags::IGNORE_KANA | 236 TransliterationFlags::IGNORE_WIDTH ); 237 if( !bIsInit ) 238 { 239 aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() ); 240 bIsInit = 1; 241 } 242 return aWrp; 243 } 244 static CollatorWrapper& GetCollatorWrapper() 245 { 246 static CollatorWrapper aCollWrp = [&]() 247 { 248 CollatorWrapper tmp( ::comphelper::getProcessComponentContext() ); 249 tmp.loadDefaultCollator( GetAppLang().getLocale(), 0 ); 250 return tmp; 251 }(); 252 return aCollWrp; 253 } 254 255 bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar ) 256 { 257 return cChar == '\0' || cChar == '\t' || cChar == 0x0a || 258 cChar == ' ' || cChar == '\'' || cChar == '\"' || 259 cChar == '*' || cChar == '_' || cChar == '%' || 260 cChar == '.' || cChar == ',' || cChar == ';' || 261 cChar == ':' || cChar == '?' || cChar == '!' || 262 cChar == '<' || cChar == '>' || 263 cChar == '/' || cChar == '-'; 264 } 265 266 namespace 267 { 268 bool IsCompoundWordDelimChar(sal_Unicode cChar) 269 { 270 return cChar == '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar); 271 } 272 } 273 274 bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar ) 275 { 276 return cChar == '%' || cChar == ';' || cChar == ':' || cChar == '?' || cChar == '!' || 277 cChar == '/' /*case for the urls exception*/; 278 } 279 280 ACFlags SvxAutoCorrect::GetDefaultFlags() 281 { 282 ACFlags nRet = ACFlags::Autocorrect 283 | ACFlags::CapitalStartSentence 284 | ACFlags::CapitalStartWord 285 | ACFlags::ChgOrdinalNumber 286 | ACFlags::ChgToEnEmDash 287 | ACFlags::AddNonBrkSpace 288 | ACFlags::TransliterateRTL 289 | ACFlags::ChgAngleQuotes 290 | ACFlags::ChgWeightUnderl 291 | ACFlags::SetINetAttr 292 | ACFlags::ChgQuotes 293 | ACFlags::SaveWordCplSttLst 294 | ACFlags::SaveWordWrdSttLst 295 | ACFlags::CorrectCapsLock; 296 LanguageType eLang = GetAppLang().getLanguageType(); 297 if( eLang.anyOf( 298 LANGUAGE_ENGLISH, 299 LANGUAGE_ENGLISH_US, 300 LANGUAGE_ENGLISH_UK, 301 LANGUAGE_ENGLISH_AUS, 302 LANGUAGE_ENGLISH_CAN, 303 LANGUAGE_ENGLISH_NZ, 304 LANGUAGE_ENGLISH_EIRE, 305 LANGUAGE_ENGLISH_SAFRICA, 306 LANGUAGE_ENGLISH_JAMAICA, 307 LANGUAGE_ENGLISH_CARIBBEAN)) 308 nRet &= ~ACFlags(ACFlags::ChgQuotes|ACFlags::ChgSglQuotes); 309 return nRet; 310 } 311 312 constexpr sal_Unicode cEmDash = 0x2014; 313 constexpr sal_Unicode cEnDash = 0x2013; 314 constexpr sal_Unicode cApostrophe = 0x2019; 315 constexpr sal_Unicode cLeftDoubleAngleQuote = 0xAB; 316 constexpr sal_Unicode cRightDoubleAngleQuote = 0xBB; 317 constexpr sal_Unicode cLeftSingleAngleQuote = 0x2039; 318 constexpr sal_Unicode cRightSingleAngleQuote = 0x203A; 319 // stop characters for searching preceding quotes 320 // (the first character is also the opening quote we are looking for) 321 const sal_Unicode aStopDoubleAngleQuoteStart[] = { 0x201E, 0x201D, 0x201C, 0 }; // preceding ,, 322 const sal_Unicode aStopDoubleAngleQuoteEnd[] = { cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0x201D, 0x201E, 0 }; // preceding >> 323 // preceding << for Romanian, handle also alternative primary closing quotation mark U+201C 324 const sal_Unicode aStopDoubleAngleQuoteEndRo[] = { cLeftDoubleAngleQuote, cRightDoubleAngleQuote, 0x201D, 0x201E, 0x201C, 0 }; 325 const sal_Unicode aStopSingleQuoteEnd[] = { 0x201A, 0x2018, 0x201C, 0x201E, 0 }; 326 const sal_Unicode aStopSingleQuoteEndRuUa[] = { 0x201E, 0x201C, cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0 }; 327 328 SvxAutoCorrect::SvxAutoCorrect( const OUString& rShareAutocorrFile, 329 const OUString& rUserAutocorrFile ) 330 : sShareAutoCorrFile( rShareAutocorrFile ) 331 , sUserAutoCorrFile( rUserAutocorrFile ) 332 , eCharClassLang( LANGUAGE_DONTKNOW ) 333 , nFlags(SvxAutoCorrect::GetDefaultFlags()) 334 , cStartDQuote( 0 ) 335 , cEndDQuote( 0 ) 336 , cStartSQuote( 0 ) 337 , cEndSQuote( 0 ) 338 { 339 } 340 341 SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy ) 342 : sShareAutoCorrFile( rCpy.sShareAutoCorrFile ) 343 , sUserAutoCorrFile( rCpy.sUserAutoCorrFile ) 344 , aSwFlags( rCpy.aSwFlags ) 345 , eCharClassLang(rCpy.eCharClassLang) 346 , nFlags( rCpy.nFlags & ~ACFlags(ACFlags::ChgWordLstLoad|ACFlags::CplSttLstLoad|ACFlags::WrdSttLstLoad)) 347 , cStartDQuote( rCpy.cStartDQuote ) 348 , cEndDQuote( rCpy.cEndDQuote ) 349 , cStartSQuote( rCpy.cStartSQuote ) 350 , cEndSQuote( rCpy.cEndSQuote ) 351 { 352 } 353 354 355 SvxAutoCorrect::~SvxAutoCorrect() 356 { 357 } 358 359 void SvxAutoCorrect::GetCharClass_( LanguageType eLang ) 360 { 361 pCharClass.reset( new CharClass( LanguageTag( eLang)) ); 362 eCharClassLang = eLang; 363 } 364 365 void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag, bool bOn ) 366 { 367 ACFlags nOld = nFlags; 368 nFlags = bOn ? nFlags | nFlag 369 : nFlags & ~nFlag; 370 371 if( !bOn ) 372 { 373 if( (nOld & ACFlags::CapitalStartSentence) != (nFlags & ACFlags::CapitalStartSentence) ) 374 nFlags &= ~ACFlags::CplSttLstLoad; 375 if( (nOld & ACFlags::CapitalStartWord) != (nFlags & ACFlags::CapitalStartWord) ) 376 nFlags &= ~ACFlags::WrdSttLstLoad; 377 if( (nOld & ACFlags::Autocorrect) != (nFlags & ACFlags::Autocorrect) ) 378 nFlags &= ~ACFlags::ChgWordLstLoad; 379 } 380 } 381 382 383 // Correct TWo INitial CApitals 384 void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 385 sal_Int32 nSttPos, sal_Int32 nEndPos, 386 LanguageType eLang ) 387 { 388 CharClass& rCC = GetCharClass( eLang ); 389 390 // Delete all non alphanumeric. Test the characters at the beginning/end of 391 // the word ( recognizes: "(min.", "/min.", and so on.) 392 for( ; nSttPos < nEndPos; ++nSttPos ) 393 if( rCC.isLetterNumeric( rTxt, nSttPos )) 394 break; 395 for( ; nSttPos < nEndPos; --nEndPos ) 396 if( rCC.isLetterNumeric( rTxt, nEndPos - 1 )) 397 break; 398 399 // Is the word a compounded word separated by delimiters? 400 // If so, keep track of all delimiters so each constituent 401 // word can be checked for two initial capital letters. 402 std::deque<sal_Int32> aDelimiters; 403 404 // Always check for two capitals at the beginning 405 // of the entire word, so start at nSttPos. 406 aDelimiters.push_back(nSttPos); 407 408 // Find all compound word delimiters 409 for (sal_Int32 n = nSttPos; n < nEndPos; ++n) 410 { 411 if (IsCompoundWordDelimChar(rTxt[ n ])) 412 { 413 aDelimiters.push_back( n + 1 ); // Get position of char after delimiter 414 } 415 } 416 417 // Decide where to put the terminating delimiter. 418 // If the last AutoCorrect char was a newline, then the AutoCorrect 419 // char will not be included in rTxt. 420 // If the last AutoCorrect char was not a newline, then the AutoCorrect 421 // character will be the last character in rTxt. 422 if (!IsCompoundWordDelimChar(rTxt[nEndPos-1])) 423 aDelimiters.push_back(nEndPos); 424 425 // Iterate through the word and all words that compose it. 426 // Two capital letters at the beginning of word? 427 for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI) 428 { 429 nSttPos = aDelimiters[nI]; 430 nEndPos = aDelimiters[nI + 1]; 431 432 if( nSttPos+2 < nEndPos && 433 IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) && 434 IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) && 435 // Is the third character a lower case 436 IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) && 437 // Do not replace special attributes 438 0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ]) 439 { 440 // test if the word is in an exception list 441 OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 )); 442 if( !FindInWrdSttExceptList(eLang, sWord) ) 443 { 444 // Check that word isn't correctly spelt before correcting: 445 css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller = 446 LinguMgr::GetSpellChecker(); 447 if( xSpeller->hasLanguage(static_cast<sal_uInt16>(eLang)) ) 448 { 449 Sequence< css::beans::PropertyValue > aEmptySeq; 450 if (xSpeller->isValid(sWord, static_cast<sal_uInt16>(eLang), aEmptySeq)) 451 { 452 return; 453 } 454 } 455 sal_Unicode cSave = rTxt[ nSttPos ]; 456 OUString sChar = rCC.lowercase( OUString(cSave) ); 457 if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar )) 458 { 459 if( ACFlags::SaveWordWrdSttLst & nFlags ) 460 rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave ); 461 } 462 } 463 } 464 } 465 } 466 467 // Format ordinal numbers suffixes (1st -> 1^st) 468 bool SvxAutoCorrect::FnChgOrdinalNumber( 469 SvxAutoCorrDoc& rDoc, const OUString& rTxt, 470 sal_Int32 nSttPos, sal_Int32 nEndPos, 471 LanguageType eLang) 472 { 473 // 1st, 2nd, 3rd, 4 - 0th 474 // 201th or 201st 475 // 12th or 12nd 476 bool bChg = false; 477 478 // In some languages ordinal suffixes should never be 479 // changed to superscript. Let's break for those languages. 480 if (!eLang.anyOf( 481 LANGUAGE_SWEDISH, 482 LANGUAGE_SWEDISH_FINLAND)) 483 { 484 CharClass& rCC = GetCharClass(eLang); 485 486 for (; nSttPos < nEndPos; ++nSttPos) 487 if (!lcl_IsInAsciiArr(sImplSttSkipChars, rTxt[nSttPos])) 488 break; 489 for (; nSttPos < nEndPos; --nEndPos) 490 if (!lcl_IsInAsciiArr(sImplEndSkipChars, rTxt[nEndPos - 1])) 491 break; 492 493 494 // Get the last number in the string to check 495 sal_Int32 nNumEnd = nEndPos; 496 bool bFoundEnd = false; 497 bool isValidNumber = true; 498 sal_Int32 i = nEndPos; 499 while (i > nSttPos) 500 { 501 i--; 502 bool isDigit = rCC.isDigit(rTxt, i); 503 if (bFoundEnd) 504 isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i)); 505 506 if (isDigit && !bFoundEnd) 507 { 508 bFoundEnd = true; 509 nNumEnd = i; 510 } 511 } 512 513 if (bFoundEnd && isValidNumber) { 514 sal_Int32 nNum = rTxt.copy(nSttPos, nNumEnd - nSttPos + 1).toInt32(); 515 516 // Check if the characters after that number correspond to the ordinal suffix 517 uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix 518 = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext()); 519 520 const uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale()); 521 for (OUString const & sSuffix : aSuffixes) 522 { 523 OUString sEnd = rTxt.copy(nNumEnd + 1, nEndPos - nNumEnd - 1); 524 525 if (sSuffix == sEnd) 526 { 527 // Check if the ordinal suffix has to be set as super script 528 if (rCC.isLetter(sSuffix)) 529 { 530 // Do the change 531 SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER, 532 DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT); 533 rDoc.SetAttr(nNumEnd + 1, nEndPos, 534 SID_ATTR_CHAR_ESCAPEMENT, 535 aSvxEscapementItem); 536 bChg = true; 537 } 538 } 539 } 540 } 541 } 542 return bChg; 543 } 544 545 // Replace dashes 546 bool SvxAutoCorrect::FnChgToEnEmDash( 547 SvxAutoCorrDoc& rDoc, const OUString& rTxt, 548 sal_Int32 nSttPos, sal_Int32 nEndPos, 549 LanguageType eLang ) 550 { 551 bool bRet = false; 552 CharClass& rCC = GetCharClass( eLang ); 553 if (eLang == LANGUAGE_SYSTEM) 554 eLang = GetAppLang().getLanguageType(); 555 bool bAlwaysUseEmDash = (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN); 556 557 // replace " - " or " --" with "enDash" 558 if( 1 < nSttPos && 1 <= nEndPos - nSttPos ) 559 { 560 sal_Unicode cCh = rTxt[ nSttPos ]; 561 if( '-' == cCh ) 562 { 563 if( 1 < nEndPos - nSttPos && 564 ' ' == rTxt[ nSttPos-1 ] && 565 '-' == rTxt[ nSttPos+1 ]) 566 { 567 sal_Int32 n; 568 for( n = nSttPos+2; n < nEndPos && lcl_IsInAsciiArr( 569 sImplSttSkipChars,(cCh = rTxt[ n ])); 570 ++n ) 571 ; 572 573 // found: " --[<AnySttChars>][A-z0-9] 574 if( rCC.isLetterNumeric( OUString(cCh) ) ) 575 { 576 for( n = nSttPos-1; n && lcl_IsInAsciiArr( 577 sImplEndSkipChars,(cCh = rTxt[ --n ])); ) 578 ; 579 580 // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9] 581 if( rCC.isLetterNumeric( OUString(cCh) )) 582 { 583 rDoc.Delete( nSttPos, nSttPos + 2 ); 584 rDoc.Insert( nSttPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) ); 585 bRet = true; 586 } 587 } 588 } 589 } 590 else if( 3 < nSttPos && 591 ' ' == rTxt[ nSttPos-1 ] && 592 '-' == rTxt[ nSttPos-2 ]) 593 { 594 sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2; 595 if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) ) 596 { 597 --nTmpPos; 598 ++nLen; 599 cCh = rTxt[ nTmpPos-1 ]; 600 } 601 if( ' ' == cCh ) 602 { 603 for( n = nSttPos; n < nEndPos && lcl_IsInAsciiArr( 604 sImplSttSkipChars,(cCh = rTxt[ n ])); 605 ++n ) 606 ; 607 608 // found: " - [<AnySttChars>][A-z0-9] 609 if( rCC.isLetterNumeric( OUString(cCh) ) ) 610 { 611 cCh = ' '; 612 for( n = nTmpPos-1; n && lcl_IsInAsciiArr( 613 sImplEndSkipChars,(cCh = rTxt[ --n ])); ) 614 ; 615 // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9] 616 if( rCC.isLetterNumeric( OUString(cCh) )) 617 { 618 rDoc.Delete( nTmpPos, nTmpPos + nLen ); 619 rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) ); 620 bRet = true; 621 } 622 } 623 } 624 } 625 } 626 627 // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash" 628 // [0-9]--[0-9] double dash always replaced with "enDash" 629 // Finnish and Hungarian use enDash instead of emDash. 630 bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH); 631 if( 4 <= nEndPos - nSttPos ) 632 { 633 OUString sTmp( rTxt.copy( nSttPos, nEndPos - nSttPos ) ); 634 sal_Int32 nFndPos = sTmp.indexOf("--"); 635 if( nFndPos != -1 && nFndPos && 636 nFndPos + 2 < sTmp.getLength() && 637 ( rCC.isLetterNumeric( sTmp, nFndPos - 1 ) || 638 lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nFndPos - 1 ] )) && 639 ( rCC.isLetterNumeric( sTmp, nFndPos + 2 ) || 640 lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nFndPos + 2 ] ))) 641 { 642 nSttPos = nSttPos + nFndPos; 643 rDoc.Delete( nSttPos, nSttPos + 2 ); 644 rDoc.Insert( nSttPos, (bEnDash || (rCC.isDigit( sTmp, nFndPos - 1 ) && 645 rCC.isDigit( sTmp, nFndPos + 2 )) ? OUString(cEnDash) : OUString(cEmDash)) ); 646 bRet = true; 647 } 648 } 649 return bRet; 650 } 651 652 // Add non-breaking space before specific punctuation marks in French text 653 bool SvxAutoCorrect::FnAddNonBrkSpace( 654 SvxAutoCorrDoc& rDoc, const OUString& rTxt, 655 sal_Int32 nEndPos, 656 LanguageType eLang, bool& io_bNbspRunNext ) 657 { 658 bool bRet = false; 659 660 CharClass& rCC = GetCharClass( eLang ); 661 662 if ( rCC.getLanguageTag().getLanguage() == "fr" ) 663 { 664 bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA"); 665 OUString allChars = ":;?!%"; 666 OUString chars( allChars ); 667 if ( bFrCA ) 668 chars = ":"; 669 670 sal_Unicode cChar = rTxt[ nEndPos ]; 671 bool bHasSpace = chars.indexOf( cChar ) != -1; 672 bool bIsSpecial = allChars.indexOf( cChar ) != -1; 673 if ( bIsSpecial ) 674 { 675 // Get the last word delimiter position 676 sal_Int32 nSttWdPos = nEndPos; 677 bool bWasWordDelim = false; 678 while( nSttWdPos ) 679 { 680 bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]); 681 if (bWasWordDelim) 682 break; 683 } 684 685 //See if the text is the start of a protocol string, e.g. have text of 686 //"http" see if it is the start of "http:" and if so leave it alone 687 sal_Int32 nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0); 688 sal_Int32 nProtocolLen = nEndPos - nSttWdPos + 1; 689 if (nIndex + nProtocolLen <= rTxt.getLength()) 690 { 691 if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid) 692 return false; 693 } 694 695 // Check the presence of "://" in the word 696 sal_Int32 nStrPos = rTxt.indexOf( "://", nSttWdPos + 1 ); 697 if ( nStrPos == -1 && nEndPos > 0 ) 698 { 699 // Check the previous char 700 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ]; 701 if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' ) 702 { 703 // Remove any previous normal space 704 sal_Int32 nPos = nEndPos - 1; 705 while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace ) 706 { 707 if ( nPos == 0 ) break; 708 nPos--; 709 cPrevChar = rTxt[ nPos ]; 710 } 711 712 nPos++; 713 if ( nEndPos - nPos > 0 ) 714 rDoc.Delete( nPos, nEndPos ); 715 716 // Add the non-breaking space at the end pos 717 if ( bHasSpace ) 718 rDoc.Insert( nPos, OUString(cNonBreakingSpace) ); 719 io_bNbspRunNext = true; 720 bRet = true; 721 } 722 else if ( chars.indexOf( cPrevChar ) != -1 ) 723 io_bNbspRunNext = true; 724 } 725 } 726 else if ( cChar == '/' && nEndPos > 1 && rTxt.getLength() > (nEndPos - 1) ) 727 { 728 // Remove the hardspace right before to avoid formatting URLs 729 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ]; 730 sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ]; 731 if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace ) 732 { 733 rDoc.Delete( nEndPos - 2, nEndPos - 1 ); 734 bRet = true; 735 } 736 } 737 } 738 739 return bRet; 740 } 741 742 // URL recognition 743 bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 744 sal_Int32 nSttPos, sal_Int32 nEndPos, 745 LanguageType eLang ) 746 { 747 OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos, 748 GetCharClass( eLang ) )); 749 bool bRet = !sURL.isEmpty(); 750 if( bRet ) // so, set attribute: 751 rDoc.SetINetAttr( nSttPos, nEndPos, sURL ); 752 return bRet; 753 } 754 755 // Automatic *bold*, /italic/, -strikeout- and _underline_ 756 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 757 sal_Int32 nEndPos ) 758 { 759 // Condition: 760 // at the beginning: _, *, / or ~ after Space with the following !Space 761 // at the end: _, *, / or ~ before Space (word delimiter?) 762 763 sal_Unicode cInsChar = rTxt[ nEndPos ]; // underline, bold, italic or strikeout 764 if( ++nEndPos != rTxt.getLength() && 765 !IsWordDelim( rTxt[ nEndPos ] ) ) 766 return false; 767 768 --nEndPos; 769 770 bool bAlphaNum = false; 771 sal_Int32 nPos = nEndPos; 772 sal_Int32 nFndPos = -1; 773 CharClass& rCC = GetCharClass( LANGUAGE_SYSTEM ); 774 775 while( nPos ) 776 { 777 switch( sal_Unicode c = rTxt[ --nPos ] ) 778 { 779 case '_': 780 case '-': 781 case '/': 782 case '*': 783 if( c == cInsChar ) 784 { 785 if( bAlphaNum && nPos+1 < nEndPos && ( !nPos || 786 IsWordDelim( rTxt[ nPos-1 ])) && 787 !IsWordDelim( rTxt[ nPos+1 ])) 788 nFndPos = nPos; 789 else 790 // Condition is not satisfied, so cancel 791 nFndPos = -1; 792 nPos = 0; 793 } 794 break; 795 default: 796 if( !bAlphaNum ) 797 bAlphaNum = rCC.isLetterNumeric( rTxt, nPos ); 798 } 799 } 800 801 if( -1 != nFndPos ) 802 { 803 // first delete the Character at the end - this allows insertion 804 // of an empty hint in SetAttr which would be removed by Delete 805 // (fdo#62536, AUTOFMT in Writer) 806 rDoc.Delete( nEndPos, nEndPos + 1 ); 807 rDoc.Delete( nFndPos, nFndPos + 1 ); 808 // Span the Attribute over the area 809 // the end. 810 if( '*' == cInsChar ) // Bold 811 { 812 SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT ); 813 rDoc.SetAttr( nFndPos, nEndPos - 1, 814 SID_ATTR_CHAR_WEIGHT, 815 aSvxWeightItem); 816 } 817 else if( '/' == cInsChar ) // Italic 818 { 819 SvxPostureItem aSvxPostureItem( ITALIC_NORMAL, SID_ATTR_CHAR_POSTURE ); 820 rDoc.SetAttr( nFndPos, nEndPos - 1, 821 SID_ATTR_CHAR_POSTURE, 822 aSvxPostureItem); 823 } 824 else if( '-' == cInsChar ) // Strikeout 825 { 826 SvxCrossedOutItem aSvxCrossedOutItem( STRIKEOUT_SINGLE, SID_ATTR_CHAR_STRIKEOUT ); 827 rDoc.SetAttr( nFndPos, nEndPos - 1, 828 SID_ATTR_CHAR_STRIKEOUT, 829 aSvxCrossedOutItem); 830 } 831 else // Underline 832 { 833 SvxUnderlineItem aSvxUnderlineItem( LINESTYLE_SINGLE, SID_ATTR_CHAR_UNDERLINE ); 834 rDoc.SetAttr( nFndPos, nEndPos - 1, 835 SID_ATTR_CHAR_UNDERLINE, 836 aSvxUnderlineItem); 837 } 838 } 839 840 return -1 != nFndPos; 841 } 842 843 // Capitalize first letter of every sentence 844 void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc& rDoc, 845 const OUString& rTxt, bool bNormalPos, 846 sal_Int32 nSttPos, sal_Int32 nEndPos, 847 LanguageType eLang ) 848 { 849 850 if( rTxt.isEmpty() || nEndPos <= nSttPos ) 851 return; 852 853 CharClass& rCC = GetCharClass( eLang ); 854 OUString aText( rTxt ); 855 const sal_Unicode *pStart = aText.getStr(), 856 *pStr = pStart + nEndPos, 857 *pWordStt = nullptr, 858 *pDelim = nullptr; 859 860 bool bAtStart = false; 861 do { 862 --pStr; 863 if (rCC.isLetter(aText, pStr - pStart)) 864 { 865 if( !pWordStt ) 866 pDelim = pStr+1; 867 pWordStt = pStr; 868 } 869 else if (pWordStt && !rCC.isDigit(aText, pStr - pStart)) 870 { 871 if( (lcl_IsInAsciiArr( "-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words 872 pWordStt - 1 == pStr && 873 // Installation at beginning of paragraph. Replaced < by <= (#i38971#) 874 (pStart + 1) <= pStr && 875 rCC.isLetter(aText, pStr-1 - pStart)) 876 pWordStt = --pStr; 877 else 878 break; 879 } 880 bAtStart = (pStart == pStr); 881 } while( !bAtStart ); 882 883 if (!pWordStt) 884 return; // no character to be replaced 885 886 887 if (rCC.isDigit(aText, pStr - pStart)) 888 return; // already ok 889 890 if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart))) 891 return; // already ok 892 893 //See if the text is the start of a protocol string, e.g. have text of 894 //"http" see if it is the start of "http:" and if so leave it alone 895 sal_Int32 nIndex = pWordStt - pStart; 896 sal_Int32 nProtocolLen = pDelim - pWordStt + 1; 897 if (nIndex + nProtocolLen <= rTxt.getLength()) 898 { 899 if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid) 900 return; // already ok 901 } 902 903 if (0x1 == *pWordStt || 0x2 == *pWordStt) 904 return; // already ok 905 906 // Only capitalize, if string before specified characters is long enough 907 if( *pDelim && 2 >= pDelim - pWordStt && 908 lcl_IsInAsciiArr( ".-)>", *pDelim ) ) 909 return; 910 911 // tdf#59666 don't capitalize single Greek letters (except in Greek texts) 912 if ( 1 == pDelim - pWordStt && 0x03B1 <= *pWordStt && *pWordStt <= 0x03C9 && eLang != LANGUAGE_GREEK ) 913 return; 914 915 if( !bAtStart ) // Still no beginning of a paragraph? 916 { 917 if (NonFieldWordDelim(*pStr)) 918 { 919 for (;;) 920 { 921 bAtStart = (pStart == pStr--); 922 if (bAtStart || !NonFieldWordDelim(*pStr)) 923 break; 924 } 925 } 926 // Asian full stop, full width full stop, full width exclamation mark 927 // and full width question marks are treated as word delimiters 928 else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr && 929 0xFF1F != *pStr ) 930 return; // no valid separator -> no replacement 931 } 932 933 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list 934 if (FindInWrdSttExceptList(eLang, OUString(pWordStt, pDelim - pWordStt))) 935 return; 936 937 if( bAtStart ) // at the beginning of a paragraph? 938 { 939 // Check out the previous paragraph, if it exists. 940 // If so, then check to paragraph separator at the end. 941 OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos); 942 if (!pPrevPara) 943 { 944 // valid separator -> replace 945 OUString sChar( *pWordStt ); 946 sChar = rCC.titlecase(sChar); //see fdo#56740 947 if (sChar != OUStringChar(*pWordStt)) 948 rDoc.ReplaceRange( pWordStt - pStart, 1, sChar ); 949 return; 950 } 951 952 aText = *pPrevPara; 953 bAtStart = false; 954 pStart = aText.getStr(); 955 pStr = pStart + aText.getLength(); 956 957 do { // overwrite all blanks 958 --pStr; 959 if (!NonFieldWordDelim(*pStr)) 960 break; 961 bAtStart = (pStart == pStr); 962 } while( !bAtStart ); 963 964 if( bAtStart ) 965 return; // no valid separator -> no replacement 966 } 967 968 // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator. 969 // all three can happen, but not more than once! 970 const sal_Unicode* pExceptStt = nullptr; 971 bool bContinue = true; 972 Flags nFlag = Flags::NONE; 973 do 974 { 975 switch (*pStr) 976 { 977 // Western and Asian full stop 978 case '.': 979 case 0x3002: 980 case 0xFF0E: 981 { 982 if (pStr >= pStart + 2 && *(pStr - 2) == '.') 983 { 984 //e.g. text "f.o.o. word": Now currently considering 985 //capitalizing word but second last character of 986 //previous word is a . So probably last word is an 987 //anagram that ends in . and not truly the end of a 988 //previous sentence, so don't autocapitalize this word 989 return; 990 } 991 if (nFlag & Flags::FullStop) 992 return; // no valid separator -> no replacement 993 nFlag |= Flags::FullStop; 994 pExceptStt = pStr; 995 } 996 break; 997 case '!': 998 case 0xFF01: 999 { 1000 if (nFlag & Flags::ExclamationMark) 1001 return; // no valid separator -> no replacement 1002 nFlag |= Flags::ExclamationMark; 1003 } 1004 break; 1005 case '?': 1006 case 0xFF1F: 1007 { 1008 if (nFlag & Flags::QuestionMark) 1009 return; // no valid separator -> no replacement 1010 nFlag |= Flags::QuestionMark; 1011 } 1012 break; 1013 default: 1014 if (nFlag == Flags::NONE) 1015 return; // no valid separator -> no replacement 1016 else 1017 bContinue = false; 1018 break; 1019 } 1020 1021 if (bContinue && pStr-- == pStart) 1022 { 1023 return; // no valid separator -> no replacement 1024 } 1025 } while (bContinue); 1026 if (Flags::FullStop != nFlag) 1027 pExceptStt = nullptr; 1028 1029 // Only capitalize, if string is long enough 1030 if( 2 > ( pStr - pStart ) ) 1031 return; 1032 1033 if (!rCC.isLetterNumeric(aText, pStr-- - pStart)) 1034 { 1035 bool bValid = false, bAlphaFnd = false; 1036 const sal_Unicode* pTmpStr = pStr; 1037 while( !bValid ) 1038 { 1039 if( rCC.isDigit( aText, pTmpStr - pStart ) ) 1040 { 1041 bValid = true; 1042 pStr = pTmpStr - 1; 1043 } 1044 else if( rCC.isLetter( aText, pTmpStr - pStart ) ) 1045 { 1046 if( bAlphaFnd ) 1047 { 1048 bValid = true; 1049 pStr = pTmpStr; 1050 } 1051 else 1052 bAlphaFnd = true; 1053 } 1054 else if (bAlphaFnd || NonFieldWordDelim(*pTmpStr)) 1055 break; 1056 1057 if( pTmpStr == pStart ) 1058 break; 1059 1060 --pTmpStr; 1061 } 1062 1063 if( !bValid ) 1064 return; // no valid separator -> no replacement 1065 } 1066 1067 bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9'; 1068 1069 // Search for the beginning of the word 1070 while (!NonFieldWordDelim(*pStr)) 1071 { 1072 if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) ) 1073 bNumericOnly = false; 1074 1075 if( pStart == pStr ) 1076 break; 1077 1078 --pStr; 1079 } 1080 1081 if( bNumericOnly ) // consists of only numbers, then not 1082 return; 1083 1084 if (NonFieldWordDelim(*pStr)) 1085 ++pStr; 1086 1087 OUString sWord; 1088 1089 // check on the basis of the exception list 1090 if( pExceptStt ) 1091 { 1092 sWord = OUString(pStr, pExceptStt - pStr + 1); 1093 if( FindInCplSttExceptList(eLang, sWord) ) 1094 return; 1095 1096 // Delete all non alphanumeric. Test the characters at the 1097 // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.) 1098 OUString sTmp( sWord ); 1099 while( !sTmp.isEmpty() && 1100 !rCC.isLetterNumeric( sTmp, 0 ) ) 1101 sTmp = sTmp.copy(1); 1102 1103 // Remove all non alphanumeric characters towards the end up until 1104 // the last one. 1105 sal_Int32 nLen = sTmp.getLength(); 1106 while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) ) 1107 --nLen; 1108 if( nLen + 1 < sTmp.getLength() ) 1109 sTmp = sTmp.copy( 0, nLen + 1 ); 1110 1111 if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() && 1112 FindInCplSttExceptList(eLang, sTmp)) 1113 return; 1114 1115 if(FindInCplSttExceptList(eLang, sWord, true)) 1116 return; 1117 } 1118 1119 // Ok, then replace 1120 sal_Unicode cSave = *pWordStt; 1121 nSttPos = pWordStt - rTxt.getStr(); 1122 OUString sChar = rCC.titlecase(OUString(cSave)); //see fdo#56740 1123 bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ); 1124 1125 // Perhaps someone wants to have the word 1126 if( bRet && ACFlags::SaveWordCplSttLst & nFlags ) 1127 rDoc.SaveCpltSttWord( ACFlags::CapitalStartSentence, nSttPos, sWord, cSave ); 1128 } 1129 1130 // Correct accidental use of cAPS LOCK key 1131 bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 1132 sal_Int32 nSttPos, sal_Int32 nEndPos, 1133 LanguageType eLang ) 1134 { 1135 if (nEndPos - nSttPos < 2) 1136 // string must be at least 2-character long. 1137 return false; 1138 1139 CharClass& rCC = GetCharClass( eLang ); 1140 1141 // Check the first 2 letters. 1142 if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) ) 1143 return false; 1144 1145 if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) ) 1146 return false; 1147 1148 OUStringBuffer aConverted; 1149 aConverted.append( rCC.uppercase(OUString(rTxt[nSttPos])) ); 1150 aConverted.append( rCC.lowercase(OUString(rTxt[nSttPos+1])) ); 1151 1152 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list 1153 if (FindInWrdSttExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos))) 1154 return false; 1155 1156 for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i ) 1157 { 1158 if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) ) 1159 // A lowercase letter disqualifies the whole text. 1160 return false; 1161 1162 if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) ) 1163 // Another uppercase letter. Convert it. 1164 aConverted.append( rCC.lowercase(OUString(rTxt[i])) ); 1165 else 1166 // This is not an alphabetic letter. Leave it as-is. 1167 aConverted.append( rTxt[i] ); 1168 } 1169 1170 // Replace the word. 1171 rDoc.Delete(nSttPos, nEndPos); 1172 rDoc.Insert(nSttPos, aConverted.makeStringAndClear()); 1173 1174 return true; 1175 } 1176 1177 1178 sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote, 1179 LanguageType eLang ) const 1180 { 1181 sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar 1182 ? GetStartDoubleQuote() 1183 : GetStartSingleQuote() ) 1184 : ( '\"' == cInsChar 1185 ? GetEndDoubleQuote() 1186 : GetEndSingleQuote() ); 1187 if( !cRet ) 1188 { 1189 // then through the Language find the right character 1190 if( LANGUAGE_NONE == eLang ) 1191 cRet = cInsChar; 1192 else 1193 { 1194 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang ); 1195 OUString sRet( bSttQuote 1196 ? ( '\"' == cInsChar 1197 ? rLcl.getDoubleQuotationMarkStart() 1198 : rLcl.getQuotationMarkStart() ) 1199 : ( '\"' == cInsChar 1200 ? rLcl.getDoubleQuotationMarkEnd() 1201 : rLcl.getQuotationMarkEnd() )); 1202 cRet = !sRet.isEmpty() ? sRet[0] : cInsChar; 1203 } 1204 } 1205 return cRet; 1206 } 1207 1208 void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos, 1209 sal_Unicode cInsChar, bool bSttQuote, 1210 bool bIns, LanguageType eLang, ACQuotes eType ) const 1211 { 1212 sal_Unicode cRet; 1213 1214 if ( eType == ACQuotes::DoubleAngleQuote ) 1215 { 1216 bool bSwiss = eLang == LANGUAGE_FRENCH_SWISS; 1217 // pressing " inside a quotation -> use second level angle quotes 1218 bool bLeftQuote = '\"' == cInsChar && 1219 // start position and Romanian OR 1220 // not start position and Hungarian 1221 bSttQuote == (eLang != LANGUAGE_HUNGARIAN); 1222 cRet = ( '<' == cInsChar || bLeftQuote ) 1223 ? ( bSwiss ? cLeftSingleAngleQuote : cLeftDoubleAngleQuote ) 1224 : ( bSwiss ? cRightSingleAngleQuote : cRightDoubleAngleQuote ); 1225 } 1226 else if ( eType == ACQuotes::UseApostrophe ) 1227 cRet = cApostrophe; 1228 else 1229 cRet = GetQuote( cInsChar, bSttQuote, eLang ); 1230 1231 OUString sChg( cInsChar ); 1232 if( bIns ) 1233 rDoc.Insert( nInsPos, sChg ); 1234 else 1235 rDoc.Replace( nInsPos, sChg ); 1236 1237 sChg = OUString(cRet); 1238 1239 if( eType == ACQuotes::NonBreakingSpace ) 1240 { 1241 if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, OUStringChar(cNonBreakingSpace) )) 1242 { 1243 if( !bSttQuote ) 1244 ++nInsPos; 1245 } 1246 } 1247 else if( eType == ACQuotes::DoubleAngleQuote && cInsChar != '\"' ) 1248 { 1249 rDoc.Delete( nInsPos-1, nInsPos); 1250 --nInsPos; 1251 } 1252 1253 rDoc.Replace( nInsPos, sChg ); 1254 1255 // i' -> I' in English (last step for the Undo) 1256 if( eType == ACQuotes::CapitalizeIAm ) 1257 rDoc.Replace( nInsPos-1, "I" ); 1258 } 1259 1260 OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPos, 1261 sal_Unicode cInsChar, bool bSttQuote ) 1262 { 1263 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos ); 1264 sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang ); 1265 1266 OUString sRet(cRet); 1267 1268 if( '\"' == cInsChar ) 1269 { 1270 if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS) 1271 { 1272 if( bSttQuote ) 1273 sRet += " "; 1274 else 1275 sRet = " " + sRet; 1276 } 1277 } 1278 return sRet; 1279 } 1280 1281 // search preceding opening quote in the paragraph before the insert position 1282 static bool lcl_HasPrecedingChar( std::u16string_view rTxt, sal_Int32 nPos, 1283 const sal_Unicode sPrecedingChar, const sal_Unicode* aStopChars ) 1284 { 1285 sal_Unicode cTmpChar; 1286 1287 do { 1288 cTmpChar = rTxt[ --nPos ]; 1289 if ( cTmpChar == sPrecedingChar ) 1290 return true; 1291 1292 for ( const sal_Unicode* pCh = aStopChars; *pCh; ++pCh ) 1293 if ( cTmpChar == *pCh ) 1294 return false; 1295 1296 } while ( nPos > 0 ); 1297 1298 return false; 1299 } 1300 1301 // WARNING: rText may become invalid, see comment below 1302 void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 1303 sal_Int32 nInsPos, sal_Unicode cChar, 1304 bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin ) 1305 { 1306 bool bIsNextRun = io_bNbspRunNext; 1307 io_bNbspRunNext = false; // if it was set, then it has to be turned off 1308 1309 do{ // only for middle check loop !! 1310 if( cChar ) 1311 { 1312 // Prevent double space 1313 if( nInsPos && ' ' == cChar && 1314 IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace ) && 1315 ' ' == rTxt[ nInsPos - 1 ]) 1316 { 1317 break; 1318 } 1319 1320 bool bSingle = '\'' == cChar; 1321 bool bIsReplaceQuote = 1322 (IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('\"' == cChar )) || 1323 (IsAutoCorrFlag( ACFlags::ChgSglQuotes ) && bSingle ); 1324 if( bIsReplaceQuote ) 1325 { 1326 bool bSttQuote = !nInsPos; 1327 ACQuotes eType = ACQuotes::NONE; 1328 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos ); 1329 if (!bSttQuote) 1330 { 1331 sal_Unicode cPrev = rTxt[ nInsPos-1 ]; 1332 bSttQuote = NonFieldWordDelim(cPrev) || 1333 lcl_IsInAsciiArr( "([{", cPrev ) || 1334 ( cEmDash == cPrev ) || 1335 ( cEnDash == cPrev ); 1336 // tdf#38394 use opening quotation mark << in French l'<<word>> 1337 if ( !bSingle && !bSttQuote && cPrev == cApostrophe && 1338 primary(eLang) == primary(LANGUAGE_FRENCH) && 1339 ( ( ( nInsPos == 2 || ( nInsPos > 2 && IsWordDelim( rTxt[ nInsPos-3 ] ) ) ) && 1340 // abbreviated form of ce, de, je, la, le, ne, me, te, se or si 1341 OUString("cdjlnmtsCDJLNMTS").indexOf( rTxt[ nInsPos-2 ] ) > -1 ) || 1342 ( ( nInsPos == 3 || (nInsPos > 3 && IsWordDelim( rTxt[ nInsPos-4 ] ) ) ) && 1343 // abbreviated form of que 1344 ( rTxt[ nInsPos-2 ] == 'u' || rTxt[ nInsPos-2 ] == 'U' ) && 1345 ( rTxt[ nInsPos-3 ] == 'q' || rTxt[ nInsPos-3 ] == 'Q' ) ) ) ) 1346 { 1347 bSttQuote = true; 1348 } 1349 // tdf#108423 for capitalization of English i'm 1350 else if ( bSingle && ( cPrev == 'i' ) && 1351 primary(eLang) == primary(LANGUAGE_ENGLISH) && 1352 ( nInsPos == 1 || IsWordDelim( rTxt[ nInsPos-2 ] ) ) ) 1353 { 1354 eType = ACQuotes::CapitalizeIAm; 1355 } 1356 // tdf#133524 support >>Hungarian<< and <<Romanian>> secondary level quotations 1357 else if ( !bSingle && nInsPos && 1358 ( ( eLang == LANGUAGE_HUNGARIAN && 1359 lcl_HasPrecedingChar( rTxt, nInsPos, 1360 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEnd[0], 1361 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEnd + 1 ) ) || 1362 ( eLang.anyOf( 1363 LANGUAGE_ROMANIAN, 1364 LANGUAGE_ROMANIAN_MOLDOVA ) && 1365 lcl_HasPrecedingChar( rTxt, nInsPos, 1366 bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEndRo[0], 1367 bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEndRo + 1 ) ) ) ) 1368 { 1369 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang ); 1370 // only if the opening double quotation mark is the default one 1371 if ( rLcl.getDoubleQuotationMarkStart() == OUStringChar(aStopDoubleAngleQuoteStart[0]) ) 1372 eType = ACQuotes::DoubleAngleQuote; 1373 } 1374 else if ( bSingle && nInsPos && !bSttQuote && 1375 // tdf#128860 use apostrophe outside of second level quotation in Czech, German, Icelandic, 1376 // Slovak and Slovenian instead of the – in this case, bad – closing quotation mark U+2018. 1377 // tdf#123786 the same for Russian and Ukrainian 1378 ( ( eLang.anyOf ( 1379 LANGUAGE_CZECH, 1380 LANGUAGE_GERMAN, 1381 LANGUAGE_GERMAN_SWISS, 1382 LANGUAGE_GERMAN_AUSTRIAN, 1383 LANGUAGE_GERMAN_LUXEMBOURG, 1384 LANGUAGE_GERMAN_LIECHTENSTEIN, 1385 LANGUAGE_ICELANDIC, 1386 LANGUAGE_SLOVAK, 1387 LANGUAGE_SLOVENIAN ) && 1388 !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEnd[0], aStopSingleQuoteEnd + 1 ) ) || 1389 ( eLang.anyOf ( 1390 LANGUAGE_RUSSIAN, 1391 LANGUAGE_UKRAINIAN ) && 1392 !lcl_HasPrecedingChar( rTxt, nInsPos, aStopSingleQuoteEndRuUa[0], aStopSingleQuoteEndRuUa + 1 ) ) ) ) 1393 { 1394 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang ); 1395 CharClass& rCC = GetCharClass( eLang ); 1396 if ( ( rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEnd[0]) || 1397 rLcl.getQuotationMarkStart() == OUStringChar(aStopSingleQuoteEndRuUa[0]) ) && 1398 // use apostrophe only after letters, not after digits or punctuation 1399 rCC.isLetter(rTxt, nInsPos-1) ) 1400 { 1401 eType = ACQuotes::UseApostrophe; 1402 } 1403 } 1404 } 1405 1406 if ( eType == ACQuotes::NONE && !bSingle && 1407 ( primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS ) ) 1408 eType = ACQuotes::NonBreakingSpace; 1409 1410 InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, eLang, eType ); 1411 break; 1412 } 1413 // tdf#133524 change "<<" and ">>" to double angle quotation marks 1414 else if ( IsAutoCorrFlag( ACFlags::ChgQuotes ) && 1415 IsAutoCorrFlag( ACFlags::ChgAngleQuotes ) && 1416 ('<' == cChar || '>' == cChar) && 1417 nInsPos > 0 && cChar == rTxt[ nInsPos-1 ] ) 1418 { 1419 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos ); 1420 if ( eLang.anyOf( 1421 LANGUAGE_CATALAN, // primary level 1422 LANGUAGE_CATALAN_VALENCIAN, // primary level 1423 LANGUAGE_FINNISH, // alternative primary level 1424 LANGUAGE_FRENCH_SWISS, // second level 1425 LANGUAGE_GALICIAN, // primary level 1426 LANGUAGE_HUNGARIAN, // second level 1427 LANGUAGE_POLISH, // second level 1428 LANGUAGE_PORTUGUESE, // primary level 1429 LANGUAGE_PORTUGUESE_BRAZILIAN, // primary level 1430 LANGUAGE_ROMANIAN, // second level 1431 LANGUAGE_ROMANIAN_MOLDOVA, // second level 1432 LANGUAGE_SWEDISH, // alternative primary level 1433 LANGUAGE_SWEDISH_FINLAND, // alternative primary level 1434 LANGUAGE_UKRAINIAN, // primary level 1435 LANGUAGE_USER_ARAGONESE, // primary level 1436 LANGUAGE_USER_ASTURIAN ) || // primary level 1437 primary(eLang) == primary(LANGUAGE_GERMAN) || // alternative primary level 1438 primary(eLang) == primary(LANGUAGE_SPANISH) ) // primary level 1439 { 1440 InsertQuote( rDoc, nInsPos, cChar, false, bInsert, eLang, ACQuotes::DoubleAngleQuote ); 1441 break; 1442 } 1443 } 1444 1445 if( bInsert ) 1446 rDoc.Insert( nInsPos, OUString(cChar) ); 1447 else 1448 rDoc.Replace( nInsPos, OUString(cChar) ); 1449 1450 // Hardspaces autocorrection 1451 if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace ) ) 1452 { 1453 if ( NeedsHardspaceAutocorr( cChar ) && 1454 FnAddNonBrkSpace( rDoc, rTxt, nInsPos, GetDocLanguage( rDoc, nInsPos ), io_bNbspRunNext ) ) 1455 { 1456 ; 1457 } 1458 else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) ) 1459 { 1460 // Remove the NBSP if it wasn't an autocorrection 1461 if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) && 1462 cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace ) 1463 { 1464 // Look for the last HARD_SPACE 1465 sal_Int32 nPos = nInsPos - 1; 1466 bool bContinue = true; 1467 while ( bContinue ) 1468 { 1469 const sal_Unicode cTmpChar = rTxt[ nPos ]; 1470 if ( cTmpChar == cNonBreakingSpace ) 1471 { 1472 rDoc.Delete( nPos, nPos + 1 ); 1473 bContinue = false; 1474 } 1475 else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 ) 1476 bContinue = false; 1477 nPos--; 1478 } 1479 } 1480 } 1481 } 1482 } 1483 1484 if( !nInsPos ) 1485 break; 1486 1487 sal_Int32 nPos = nInsPos - 1; 1488 1489 if( IsWordDelim( rTxt[ nPos ])) 1490 break; 1491 1492 // Set bold or underline automatically? 1493 if (('*' == cChar || '_' == cChar || '/' == cChar || '-' == cChar) && (nPos+1 < rTxt.getLength())) 1494 { 1495 if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl ) ) 1496 { 1497 FnChgWeightUnderl( rDoc, rTxt, nPos+1 ); 1498 } 1499 break; 1500 } 1501 1502 while( nPos && !IsWordDelim( rTxt[ --nPos ])) 1503 ; 1504 1505 // Found a Paragraph-start or a Blank, search for the word shortcut in 1506 // auto. 1507 sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character 1508 if( !nPos && !IsWordDelim( rTxt[ 0 ])) 1509 --nCapLttrPos; // begin of paragraph and no blank 1510 1511 const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos ); 1512 CharClass& rCC = GetCharClass( eLang ); 1513 1514 // no symbol characters 1515 if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos )) 1516 break; 1517 1518 if( IsAutoCorrFlag( ACFlags::Autocorrect ) && 1519 // tdf#134940 fix regression of arrow "-->" resulted by premature 1520 // replacement of "--" since '>' was added to IsAutoCorrectChar() 1521 '>' != cChar ) 1522 { 1523 // WARNING ATTENTION: rTxt is an alias of the text node's OUString 1524 // and becomes INVALID if ChgAutoCorrWord returns true! 1525 // => use aPara/pPara to create a valid copy of the string! 1526 OUString aPara; 1527 OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr; 1528 1529 bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos, 1530 *this, pPara ); 1531 if( !bChgWord ) 1532 { 1533 sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos; 1534 while( nCapLttrPos1 < nInsPos && 1535 lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] ) 1536 ) 1537 ++nCapLttrPos1; 1538 while( nCapLttrPos1 < nInsPos1 && nInsPos1 && 1539 lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] ) 1540 ) 1541 --nInsPos1; 1542 1543 if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) && 1544 nCapLttrPos1 < nInsPos1 && 1545 rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara )) 1546 { 1547 bChgWord = true; 1548 nCapLttrPos = nCapLttrPos1; 1549 } 1550 } 1551 1552 if( bChgWord ) 1553 { 1554 if( !aPara.isEmpty() ) 1555 { 1556 sal_Int32 nEnd = nCapLttrPos; 1557 while( nEnd < aPara.getLength() && 1558 !IsWordDelim( aPara[ nEnd ])) 1559 ++nEnd; 1560 1561 // Capital letter at beginning of paragraph? 1562 if( IsAutoCorrFlag( ACFlags::CapitalStartSentence ) ) 1563 { 1564 FnCapitalStartSentence( rDoc, aPara, false, 1565 nCapLttrPos, nEnd, eLang ); 1566 } 1567 1568 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) ) 1569 { 1570 FnChgToEnEmDash( rDoc, aPara, nCapLttrPos, nEnd, eLang ); 1571 } 1572 } 1573 break; 1574 } 1575 } 1576 1577 if( IsAutoCorrFlag( ACFlags::TransliterateRTL ) && GetDocLanguage( rDoc, nInsPos ) == LANGUAGE_HUNGARIAN ) 1578 { 1579 // WARNING ATTENTION: rTxt is an alias of the text node's OUString 1580 // and becomes INVALID if TransliterateRTLWord returns true! 1581 if ( rDoc.TransliterateRTLWord( nCapLttrPos, nInsPos ) ) 1582 break; 1583 } 1584 1585 if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber ) && 1586 (nInsPos >= 2 ) && // fdo#69762 avoid autocorrect for 2e-3 1587 ( '-' != cChar || 'E' != rtl::toAsciiUpperCase(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) && 1588 FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) || 1589 ( IsAutoCorrFlag( ACFlags::SetINetAttr ) && 1590 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) && 1591 FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ) 1592 ; 1593 else 1594 { 1595 bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK); 1596 bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos ); 1597 1598 if ( bLockKeyOn && IsAutoCorrFlag( ACFlags::CorrectCapsLock ) && 1599 FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) 1600 { 1601 // Correct accidental use of cAPS LOCK key (do this only when 1602 // the caps or shift lock key is pressed). Turn off the caps 1603 // lock afterwards. 1604 pFrameWin->SimulateKeyPress( KEY_CAPSLOCK ); 1605 } 1606 1607 // Capital letter at beginning of paragraph ? 1608 if( !bUnsupported && 1609 IsAutoCorrFlag( ACFlags::CapitalStartSentence ) ) 1610 { 1611 FnCapitalStartSentence( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang ); 1612 } 1613 1614 // Two capital letters at beginning of word ?? 1615 if( !bUnsupported && 1616 IsAutoCorrFlag( ACFlags::CapitalStartWord ) ) 1617 { 1618 FnCapitalStartWord( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ); 1619 } 1620 1621 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) ) 1622 { 1623 FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ); 1624 } 1625 } 1626 1627 } while( false ); 1628 } 1629 1630 SvxAutoCorrectLanguageLists& SvxAutoCorrect::GetLanguageList_( 1631 LanguageType eLang ) 1632 { 1633 LanguageTag aLanguageTag( eLang); 1634 if (m_aLangTable.find(aLanguageTag) == m_aLangTable.end()) 1635 (void)CreateLanguageFile(aLanguageTag); 1636 return *(m_aLangTable.find(aLanguageTag)->second); 1637 } 1638 1639 void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang ) 1640 { 1641 auto const iter = m_aLangTable.find(LanguageTag(eLang)); 1642 if (iter != m_aLangTable.end() && iter->second) 1643 iter->second->SaveCplSttExceptList(); 1644 else 1645 { 1646 SAL_WARN("editeng", "Save an empty list? "); 1647 } 1648 } 1649 1650 void SvxAutoCorrect::SaveWrdSttExceptList(LanguageType eLang) 1651 { 1652 auto const iter = m_aLangTable.find(LanguageTag(eLang)); 1653 if (iter != m_aLangTable.end() && iter->second) 1654 iter->second->SaveWrdSttExceptList(); 1655 else 1656 { 1657 SAL_WARN("editeng", "Save an empty list? "); 1658 } 1659 } 1660 1661 // Adds a single word. The list will immediately be written to the file! 1662 bool SvxAutoCorrect::AddCplSttException( const OUString& rNew, 1663 LanguageType eLang ) 1664 { 1665 SvxAutoCorrectLanguageLists* pLists = nullptr; 1666 // either the right language is present or it will be this in the general list 1667 auto iter = m_aLangTable.find(LanguageTag(eLang)); 1668 if (iter != m_aLangTable.end()) 1669 pLists = iter->second.get(); 1670 else 1671 { 1672 LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED); 1673 iter = m_aLangTable.find(aLangTagUndetermined); 1674 if (iter != m_aLangTable.end()) 1675 pLists = iter->second.get(); 1676 else if(CreateLanguageFile(aLangTagUndetermined)) 1677 pLists = m_aLangTable.find(aLangTagUndetermined)->second.get(); 1678 } 1679 OSL_ENSURE(pLists, "No auto correction data"); 1680 return pLists && pLists->AddToCplSttExceptList(rNew); 1681 } 1682 1683 // Adds a single word. The list will immediately be written to the file! 1684 bool SvxAutoCorrect::AddWrtSttException( const OUString& rNew, 1685 LanguageType eLang ) 1686 { 1687 SvxAutoCorrectLanguageLists* pLists = nullptr; 1688 //either the right language is present or it is set in the general list 1689 auto iter = m_aLangTable.find(LanguageTag(eLang)); 1690 if (iter != m_aLangTable.end()) 1691 pLists = iter->second.get(); 1692 else 1693 { 1694 LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED); 1695 iter = m_aLangTable.find(aLangTagUndetermined); 1696 if (iter != m_aLangTable.end()) 1697 pLists = iter->second.get(); 1698 else if(CreateLanguageFile(aLangTagUndetermined)) 1699 pLists = m_aLangTable.find(aLangTagUndetermined)->second.get(); 1700 } 1701 OSL_ENSURE(pLists, "No auto correction file!"); 1702 return pLists && pLists->AddToWrdSttExceptList(rNew); 1703 } 1704 1705 OUString SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const OUString& rTxt, 1706 sal_Int32 nPos) 1707 { 1708 OUString sRet; 1709 if( !nPos ) 1710 return sRet; 1711 1712 sal_Int32 nEnd = nPos; 1713 1714 // it must be followed by a blank or tab! 1715 if( ( nPos < rTxt.getLength() && 1716 !IsWordDelim( rTxt[ nPos ])) || 1717 IsWordDelim( rTxt[ --nPos ])) 1718 return sRet; 1719 1720 while( nPos && !IsWordDelim( rTxt[ --nPos ])) 1721 ; 1722 1723 // Found a Paragraph-start or a Blank, search for the word shortcut in 1724 // auto. 1725 sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character 1726 if( !nPos && !IsWordDelim( rTxt[ 0 ])) 1727 --nCapLttrPos; // Beginning of paragraph and no Blank! 1728 1729 while( lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) ) 1730 if( ++nCapLttrPos >= nEnd ) 1731 return sRet; 1732 1733 if( 3 > nEnd - nCapLttrPos ) 1734 return sRet; 1735 1736 const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos ); 1737 1738 CharClass& rCC = GetCharClass(eLang); 1739 1740 if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnd )) 1741 return sRet; 1742 1743 sRet = rTxt.copy( nCapLttrPos, nEnd - nCapLttrPos ); 1744 return sRet; 1745 } 1746 1747 // static 1748 std::vector<OUString> SvxAutoCorrect::GetChunkForAutoText(const OUString& rTxt, 1749 const sal_Int32 nPos) 1750 { 1751 constexpr sal_Int32 nMinLen = 3; 1752 constexpr sal_Int32 nMaxLen = 9; 1753 std::vector<OUString> aRes; 1754 if (nPos >= nMinLen) 1755 { 1756 sal_Int32 nBegin = std::max<sal_Int32>(nPos - nMaxLen, 0); 1757 // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation) 1758 if (nBegin > 0 && !IsWordDelim(rTxt[nBegin-1])) 1759 { 1760 while (nBegin + nMinLen <= nPos && !IsWordDelim(rTxt[nBegin])) 1761 ++nBegin; 1762 } 1763 if (nBegin + nMinLen <= nPos) 1764 { 1765 OUString sRes = rTxt.copy(nBegin, nPos - nBegin); 1766 aRes.push_back(sRes); 1767 bool bLastStartedWithDelim = IsWordDelim(sRes[0]); 1768 for (sal_Int32 i = 1; i <= sRes.getLength() - nMinLen; ++i) 1769 { 1770 bool bAdd = bLastStartedWithDelim; 1771 bLastStartedWithDelim = IsWordDelim(sRes[i]); 1772 bAdd = bAdd || bLastStartedWithDelim; 1773 if (bAdd) 1774 aRes.push_back(sRes.copy(i)); 1775 } 1776 } 1777 } 1778 return aRes; 1779 } 1780 1781 bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile ) 1782 { 1783 OSL_ENSURE(m_aLangTable.find(rLanguageTag) == m_aLangTable.end(), "Language already exists "); 1784 1785 OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true )); 1786 OUString sShareDirFile( sUserDirFile ); 1787 1788 SvxAutoCorrectLanguageLists* pLists = nullptr; 1789 1790 tools::Time nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM ), nLastCheckTime( tools::Time::EMPTY ); 1791 1792 auto nFndPos = aLastFileTable.find(rLanguageTag); 1793 if(nFndPos != aLastFileTable.end() && 1794 (nLastCheckTime.SetTime(nFndPos->second), nLastCheckTime < nAktTime) && 1795 nAktTime - nLastCheckTime < nMinTime) 1796 { 1797 // no need to test the file, because the last check is not older then 1798 // 2 minutes. 1799 if( bNewFile ) 1800 { 1801 sShareDirFile = sUserDirFile; 1802 pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile ); 1803 LanguageTag aTmp(rLanguageTag); // this insert() needs a non-const reference 1804 m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists))); 1805 aLastFileTable.erase(nFndPos); 1806 } 1807 } 1808 else if( 1809 ( FStatHelper::IsDocument( sUserDirFile ) || 1810 FStatHelper::IsDocument( sShareDirFile = 1811 GetAutoCorrFileName( rLanguageTag ) ) || 1812 FStatHelper::IsDocument( sShareDirFile = 1813 GetAutoCorrFileName( rLanguageTag, false, false, true) ) 1814 ) || 1815 ( sShareDirFile = sUserDirFile, bNewFile ) 1816 ) 1817 { 1818 pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile ); 1819 LanguageTag aTmp(rLanguageTag); // this insert() needs a non-const reference 1820 m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists))); 1821 if (nFndPos != aLastFileTable.end()) 1822 aLastFileTable.erase(nFndPos); 1823 } 1824 else if( !bNewFile ) 1825 { 1826 aLastFileTable[rLanguageTag] = nAktTime.GetTime(); 1827 } 1828 return pLists != nullptr; 1829 } 1830 1831 bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong, 1832 LanguageType eLang ) 1833 { 1834 LanguageTag aLanguageTag( eLang); 1835 auto const iter = m_aLangTable.find(aLanguageTag); 1836 if (iter != m_aLangTable.end()) 1837 return iter->second->PutText(rShort, rLong); 1838 if(CreateLanguageFile(aLanguageTag)) 1839 return m_aLangTable.find(aLanguageTag)->second->PutText(rShort, rLong); 1840 return false; 1841 } 1842 1843 void SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, 1844 std::vector<SvxAutocorrWord>& aDeleteEntries, 1845 LanguageType eLang ) 1846 { 1847 LanguageTag aLanguageTag( eLang); 1848 auto const iter = m_aLangTable.find(aLanguageTag); 1849 if (iter != m_aLangTable.end()) 1850 { 1851 iter->second->MakeCombinedChanges( aNewEntries, aDeleteEntries ); 1852 } 1853 else if(CreateLanguageFile( aLanguageTag )) 1854 { 1855 m_aLangTable.find( aLanguageTag )->second->MakeCombinedChanges( aNewEntries, aDeleteEntries ); 1856 } 1857 } 1858 1859 // - return the replacement text (only for SWG-Format, all other 1860 // can be taken from the word list!) 1861 bool SvxAutoCorrect::GetLongText( const OUString&, OUString& ) 1862 { 1863 return false; 1864 } 1865 1866 void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& ) 1867 { 1868 } 1869 1870 // Text with attribution (only the SWG - SWG format!) 1871 bool SvxAutoCorrect::PutText( const css::uno::Reference < css::embed::XStorage >&, 1872 const OUString&, const OUString&, SfxObjectShell&, OUString& ) 1873 { 1874 return false; 1875 } 1876 1877 OUString EncryptBlockName_Imp(const OUString& rName) 1878 { 1879 OUStringBuffer aName; 1880 aName.append('#').append(rName); 1881 for (sal_Int32 nLen = rName.getLength(), nPos = 1; nPos < nLen; ++nPos) 1882 { 1883 if (lcl_IsInAsciiArr( "!/:.\\", aName[nPos])) 1884 aName[nPos] &= 0x0f; 1885 } 1886 return aName.makeStringAndClear(); 1887 } 1888 1889 /* This code is copied from SwXMLTextBlocks::GeneratePackageName */ 1890 static void GeneratePackageName ( std::u16string_view rShort, OUString& rPackageName ) 1891 { 1892 OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7)); 1893 OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US)); 1894 1895 for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos) 1896 { 1897 switch (aBuf[nPos]) 1898 { 1899 case '!': 1900 case '/': 1901 case ':': 1902 case '.': 1903 case '\\': 1904 aBuf[nPos] = '_'; 1905 break; 1906 default: 1907 break; 1908 } 1909 } 1910 1911 rPackageName = aBuf.makeStringAndClear(); 1912 } 1913 1914 static const SvxAutocorrWord* lcl_SearchWordsInList( 1915 SvxAutoCorrectLanguageLists* pList, const OUString& rTxt, 1916 sal_Int32& rStt, sal_Int32 nEndPos) 1917 { 1918 const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList(); 1919 return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos ); 1920 } 1921 1922 // the search for the words in the substitution table 1923 const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList( 1924 const OUString& rTxt, sal_Int32& rStt, sal_Int32 nEndPos, 1925 SvxAutoCorrDoc&, LanguageTag& rLang ) 1926 { 1927 const SvxAutocorrWord* pRet = nullptr; 1928 LanguageTag aLanguageTag( rLang); 1929 if( aLanguageTag.isSystemLocale() ) 1930 aLanguageTag.reset( MsLangId::getSystemLanguage()); 1931 1932 /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback 1933 * list instead? */ 1934 1935 // First search for eLang, then US-English -> English 1936 // and last in LANGUAGE_UNDETERMINED 1937 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) 1938 { 1939 //the language is available - so bring it on 1940 std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second; 1941 pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos ); 1942 if( pRet ) 1943 { 1944 rLang = aLanguageTag; 1945 return pRet; 1946 } 1947 else 1948 return nullptr; 1949 } 1950 1951 // If it still could not be found here, then keep on searching 1952 LanguageType eLang = aLanguageTag.getLanguageType(); 1953 // the primary language for example EN 1954 aLanguageTag.reset(aLanguageTag.getLanguage()); 1955 LanguageType nTmpKey = aLanguageTag.getLanguageType(false); 1956 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED && 1957 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || 1958 CreateLanguageFile(aLanguageTag, false))) 1959 { 1960 //the language is available - so bring it on 1961 std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second; 1962 pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos ); 1963 if( pRet ) 1964 { 1965 rLang = aLanguageTag; 1966 return pRet; 1967 } 1968 } 1969 1970 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() || 1971 CreateLanguageFile(aLanguageTag, false)) 1972 { 1973 //the language is available - so bring it on 1974 std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second; 1975 pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos ); 1976 if( pRet ) 1977 { 1978 rLang = aLanguageTag; 1979 return pRet; 1980 } 1981 } 1982 return nullptr; 1983 } 1984 1985 bool SvxAutoCorrect::FindInWrdSttExceptList( LanguageType eLang, 1986 const OUString& sWord ) 1987 { 1988 LanguageTag aLanguageTag( eLang); 1989 1990 /* TODO-BCP47: again horrible ugliness */ 1991 1992 // First search for eLang, then primary language of eLang 1993 // and last in LANGUAGE_UNDETERMINED 1994 1995 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) 1996 { 1997 //the language is available - so bring it on 1998 auto const& pList = m_aLangTable.find(aLanguageTag)->second; 1999 if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() ) 2000 return true; 2001 } 2002 2003 // If it still could not be found here, then keep on searching 2004 // the primary language for example EN 2005 aLanguageTag.reset(aLanguageTag.getLanguage()); 2006 LanguageType nTmpKey = aLanguageTag.getLanguageType(false); 2007 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED && 2008 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || 2009 CreateLanguageFile(aLanguageTag, false))) 2010 { 2011 //the language is available - so bring it on 2012 auto const& pList = m_aLangTable.find(aLanguageTag)->second; 2013 if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() ) 2014 return true; 2015 } 2016 2017 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() || 2018 CreateLanguageFile(aLanguageTag, false)) 2019 { 2020 //the language is available - so bring it on 2021 auto const& pList = m_aLangTable.find(aLanguageTag)->second; 2022 if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() ) 2023 return true; 2024 } 2025 return false; 2026 } 2027 2028 static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord) 2029 { 2030 SvStringsISortDtor::const_iterator it = pList->find( "~" ); 2031 SvStringsISortDtor::size_type nPos = it - pList->begin(); 2032 if( nPos < pList->size() ) 2033 { 2034 OUString sLowerWord(sWord.toAsciiLowerCase()); 2035 OUString sAbr; 2036 for( SvStringsISortDtor::size_type n = nPos; n < pList->size(); ++n ) 2037 { 2038 sAbr = (*pList)[ n ]; 2039 if (sAbr[0] != '~') 2040 break; 2041 // ~ and ~. are not allowed! 2042 if( 2 < sAbr.getLength() && sAbr.getLength() - 1 <= sWord.getLength() ) 2043 { 2044 OUString sLowerAbk(sAbr.toAsciiLowerCase()); 2045 for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;) 2046 { 2047 if( !--i ) // agrees 2048 return true; 2049 2050 if( sLowerAbk[i] != sLowerWord[--ii]) 2051 break; 2052 } 2053 } 2054 } 2055 } 2056 OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ), 2057 "Wrongly sorted exception list?" ); 2058 return false; 2059 } 2060 2061 bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang, 2062 const OUString& sWord, bool bAbbreviation) 2063 { 2064 LanguageTag aLanguageTag( eLang); 2065 2066 /* TODO-BCP47: did I mention terrible horrible ugliness? */ 2067 2068 // First search for eLang, then primary language of eLang 2069 // and last in LANGUAGE_UNDETERMINED 2070 2071 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) 2072 { 2073 //the language is available - so bring it on 2074 const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList(); 2075 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() ) 2076 return true; 2077 } 2078 2079 // If it still could not be found here, then keep on searching 2080 // the primary language for example EN 2081 aLanguageTag.reset(aLanguageTag.getLanguage()); 2082 LanguageType nTmpKey = aLanguageTag.getLanguageType(false); 2083 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED && 2084 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || 2085 CreateLanguageFile(aLanguageTag, false))) 2086 { 2087 //the language is available - so bring it on 2088 const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList(); 2089 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() ) 2090 return true; 2091 } 2092 2093 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() || 2094 CreateLanguageFile(aLanguageTag, false)) 2095 { 2096 //the language is available - so bring it on 2097 const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList(); 2098 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() ) 2099 return true; 2100 } 2101 return false; 2102 } 2103 2104 OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag, 2105 bool bNewFile, bool bTst, bool bUnlocalized ) const 2106 { 2107 OUString sRet, sExt( rLanguageTag.getBcp47() ); 2108 if (bUnlocalized) 2109 { 2110 // we don't want variant, so we'll take "fr" instead of "fr-CA" for example 2111 std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false); 2112 if (!vecFallBackStrings.empty()) 2113 sExt = vecFallBackStrings[0]; 2114 } 2115 2116 sExt = "_" + sExt + ".dat"; 2117 if( bNewFile ) 2118 sRet = sUserAutoCorrFile + sExt; 2119 else if( !bTst ) 2120 sRet = sShareAutoCorrFile + sExt; 2121 else 2122 { 2123 // test first in the user directory - if not exist, then 2124 sRet = sUserAutoCorrFile + sExt; 2125 if( !FStatHelper::IsDocument( sRet )) 2126 sRet = sShareAutoCorrFile + sExt; 2127 } 2128 return sRet; 2129 } 2130 2131 SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists( 2132 SvxAutoCorrect& rParent, 2133 const OUString& rShareAutoCorrectFile, 2134 const OUString& rUserAutoCorrectFile) 2135 : sShareAutoCorrFile( rShareAutoCorrectFile ), 2136 sUserAutoCorrFile( rUserAutoCorrectFile ), 2137 aModifiedDate( Date::EMPTY ), 2138 aModifiedTime( tools::Time::EMPTY ), 2139 aLastCheckTime( tools::Time::EMPTY ), 2140 rAutoCorrect(rParent), 2141 nFlags(ACFlags::NONE) 2142 { 2143 } 2144 2145 SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists() 2146 { 2147 } 2148 2149 bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp() 2150 { 2151 // Access the file system only every 2 minutes to check the date stamp 2152 bool bRet = false; 2153 2154 tools::Time nMinTime( 0, 2 ); 2155 tools::Time nAktTime( tools::Time::SYSTEM ); 2156 if( aLastCheckTime <= nAktTime) // overflow? 2157 return false; 2158 nAktTime -= aLastCheckTime; 2159 if( nAktTime > nMinTime ) // min time past 2160 { 2161 Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY ); 2162 if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile, 2163 &aTstDate, &aTstTime ) && 2164 ( aModifiedDate != aTstDate || aModifiedTime != aTstTime )) 2165 { 2166 bRet = true; 2167 // then remove all the lists fast! 2168 if( (ACFlags::CplSttLstLoad & nFlags) && pCplStt_ExcptLst ) 2169 { 2170 pCplStt_ExcptLst.reset(); 2171 } 2172 if( (ACFlags::WrdSttLstLoad & nFlags) && pWrdStt_ExcptLst ) 2173 { 2174 pWrdStt_ExcptLst.reset(); 2175 } 2176 if( (ACFlags::ChgWordLstLoad & nFlags) && pAutocorr_List ) 2177 { 2178 pAutocorr_List.reset(); 2179 } 2180 nFlags &= ~ACFlags(ACFlags::CplSttLstLoad | ACFlags::WrdSttLstLoad | ACFlags::ChgWordLstLoad ); 2181 } 2182 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2183 } 2184 return bRet; 2185 } 2186 2187 void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp( 2188 std::unique_ptr<SvStringsISortDtor>& rpLst, 2189 const OUString& sStrmName, 2190 tools::SvRef<SotStorage>& rStg) 2191 { 2192 if( rpLst ) 2193 rpLst->clear(); 2194 else 2195 rpLst.reset( new SvStringsISortDtor ); 2196 2197 { 2198 if( rStg.is() && rStg->IsStream( sStrmName ) ) 2199 { 2200 tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName, 2201 ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) ); 2202 if( ERRCODE_NONE != xStrm->GetError()) 2203 { 2204 xStrm.clear(); 2205 rStg.clear(); 2206 RemoveStream_Imp( sStrmName ); 2207 } 2208 else 2209 { 2210 uno::Reference< uno::XComponentContext > xContext = 2211 comphelper::getProcessComponentContext(); 2212 2213 xml::sax::InputSource aParserInput; 2214 aParserInput.sSystemId = sStrmName; 2215 2216 xStrm->Seek( 0 ); 2217 xStrm->SetBufferSize( 8 * 1024 ); 2218 aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm ); 2219 2220 // get filter 2221 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLExceptionListImport ( xContext, *rpLst ); 2222 2223 // connect parser and filter 2224 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create( xContext ); 2225 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler; 2226 xParser->setFastDocumentHandler( xFilter ); 2227 xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE ); 2228 xParser->setTokenHandler( xTokenHandler ); 2229 2230 // parse 2231 try 2232 { 2233 xParser->parseStream( aParserInput ); 2234 } 2235 catch( const xml::sax::SAXParseException& ) 2236 { 2237 // re throw ? 2238 } 2239 catch( const xml::sax::SAXException& ) 2240 { 2241 // re throw ? 2242 } 2243 catch( const io::IOException& ) 2244 { 2245 // re throw ? 2246 } 2247 } 2248 } 2249 2250 // Set time stamp 2251 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile, 2252 &aModifiedDate, &aModifiedTime ); 2253 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2254 } 2255 2256 } 2257 2258 void SvxAutoCorrectLanguageLists::SaveExceptList_Imp( 2259 const SvStringsISortDtor& rLst, 2260 const OUString& sStrmName, 2261 tools::SvRef<SotStorage> const &rStg, 2262 bool bConvert ) 2263 { 2264 if( !rStg.is() ) 2265 return; 2266 2267 if( rLst.empty() ) 2268 { 2269 rStg->Remove( sStrmName ); 2270 rStg->Commit(); 2271 } 2272 else 2273 { 2274 tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName, 2275 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) ); 2276 if( xStrm.is() ) 2277 { 2278 xStrm->SetSize( 0 ); 2279 xStrm->SetBufferSize( 8192 ); 2280 xStrm->SetProperty( "MediaType", Any(OUString( "text/xml" )) ); 2281 2282 2283 uno::Reference< uno::XComponentContext > xContext = 2284 comphelper::getProcessComponentContext(); 2285 2286 uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext); 2287 uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm ); 2288 xWriter->setOutputStream(xOut); 2289 2290 uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW); 2291 rtl::Reference< SvXMLExceptionListExport > xExp( new SvXMLExceptionListExport( xContext, rLst, sStrmName, xHandler ) ); 2292 2293 xExp->exportDoc( XML_BLOCK_LIST ); 2294 2295 xStrm->Commit(); 2296 if( xStrm->GetError() == ERRCODE_NONE ) 2297 { 2298 xStrm.clear(); 2299 if (!bConvert) 2300 { 2301 rStg->Commit(); 2302 if( ERRCODE_NONE != rStg->GetError() ) 2303 { 2304 rStg->Remove( sStrmName ); 2305 rStg->Commit(); 2306 } 2307 } 2308 } 2309 } 2310 } 2311 } 2312 2313 SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList() 2314 { 2315 if( pAutocorr_List ) 2316 pAutocorr_List->DeleteAndDestroyAll(); 2317 else 2318 pAutocorr_List.reset( new SvxAutocorrWordList() ); 2319 2320 try 2321 { 2322 uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ ); 2323 uno::Reference < io::XStream > xStrm = xStg->openStreamElement( pXMLImplAutocorr_ListStr, embed::ElementModes::READ ); 2324 uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext(); 2325 2326 xml::sax::InputSource aParserInput; 2327 aParserInput.sSystemId = pXMLImplAutocorr_ListStr; 2328 aParserInput.aInputStream = xStrm->getInputStream(); 2329 2330 // get parser 2331 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext); 2332 SAL_INFO("editeng", "AutoCorrect Import" ); 2333 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List.get(), rAutoCorrect, xStg ); 2334 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler; 2335 2336 // connect parser and filter 2337 xParser->setFastDocumentHandler( xFilter ); 2338 xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE ); 2339 xParser->setTokenHandler(xTokenHandler); 2340 2341 // parse 2342 xParser->parseStream( aParserInput ); 2343 } 2344 catch ( const uno::Exception& ) 2345 { 2346 TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile); 2347 } 2348 2349 // Set time stamp 2350 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile, 2351 &aModifiedDate, &aModifiedTime ); 2352 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2353 2354 return pAutocorr_List.get(); 2355 } 2356 2357 const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList() 2358 { 2359 if( !( ACFlags::ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() ) 2360 { 2361 LoadAutocorrWordList(); 2362 if( !pAutocorr_List ) 2363 { 2364 OSL_ENSURE( false, "No valid list" ); 2365 pAutocorr_List.reset( new SvxAutocorrWordList() ); 2366 } 2367 nFlags |= ACFlags::ChgWordLstLoad; 2368 } 2369 return pAutocorr_List.get(); 2370 } 2371 2372 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList() 2373 { 2374 if( !( ACFlags::CplSttLstLoad & nFlags ) || IsFileChanged_Imp() ) 2375 { 2376 LoadCplSttExceptList(); 2377 if( !pCplStt_ExcptLst ) 2378 { 2379 OSL_ENSURE( false, "No valid list" ); 2380 pCplStt_ExcptLst.reset( new SvStringsISortDtor ); 2381 } 2382 nFlags |= ACFlags::CplSttLstLoad; 2383 } 2384 return pCplStt_ExcptLst.get(); 2385 } 2386 2387 bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew) 2388 { 2389 bool bRet = false; 2390 if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second ) 2391 { 2392 MakeUserStorage_Impl(); 2393 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2394 2395 SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg ); 2396 2397 xStg = nullptr; 2398 // Set time stamp 2399 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, 2400 &aModifiedDate, &aModifiedTime ); 2401 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2402 bRet = true; 2403 } 2404 return bRet; 2405 } 2406 2407 bool SvxAutoCorrectLanguageLists::AddToWrdSttExceptList(const OUString& rNew) 2408 { 2409 bool bRet = false; 2410 SvStringsISortDtor* pExceptList = LoadWrdSttExceptList(); 2411 if( !rNew.isEmpty() && pExceptList && pExceptList->insert( rNew ).second ) 2412 { 2413 MakeUserStorage_Impl(); 2414 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2415 2416 SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg ); 2417 2418 xStg = nullptr; 2419 // Set time stamp 2420 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, 2421 &aModifiedDate, &aModifiedTime ); 2422 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2423 bRet = true; 2424 } 2425 return bRet; 2426 } 2427 2428 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList() 2429 { 2430 try 2431 { 2432 tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE ); 2433 if( xStg.is() && xStg->IsContained( pXMLImplCplStt_ExcptLstStr ) ) 2434 LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg ); 2435 } 2436 catch (const css::ucb::ContentCreationException&) 2437 { 2438 } 2439 return pCplStt_ExcptLst.get(); 2440 } 2441 2442 void SvxAutoCorrectLanguageLists::SaveCplSttExceptList() 2443 { 2444 MakeUserStorage_Impl(); 2445 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2446 2447 SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg ); 2448 2449 xStg = nullptr; 2450 2451 // Set time stamp 2452 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, 2453 &aModifiedDate, &aModifiedTime ); 2454 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2455 } 2456 2457 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWrdSttExceptList() 2458 { 2459 try 2460 { 2461 tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE ); 2462 if( xStg.is() && xStg->IsContained( pXMLImplWrdStt_ExcptLstStr ) ) 2463 LoadXMLExceptList_Imp( pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg ); 2464 } 2465 catch (const css::ucb::ContentCreationException &) 2466 { 2467 TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWrdSttExceptList"); 2468 } 2469 return pWrdStt_ExcptLst.get(); 2470 } 2471 2472 void SvxAutoCorrectLanguageLists::SaveWrdSttExceptList() 2473 { 2474 MakeUserStorage_Impl(); 2475 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2476 2477 SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg ); 2478 2479 xStg = nullptr; 2480 // Set time stamp 2481 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, 2482 &aModifiedDate, &aModifiedTime ); 2483 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2484 } 2485 2486 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWrdSttExceptList() 2487 { 2488 if( !( ACFlags::WrdSttLstLoad & nFlags ) || IsFileChanged_Imp() ) 2489 { 2490 LoadWrdSttExceptList(); 2491 if( !pWrdStt_ExcptLst ) 2492 { 2493 OSL_ENSURE( false, "No valid list" ); 2494 pWrdStt_ExcptLst.reset( new SvStringsISortDtor ); 2495 } 2496 nFlags |= ACFlags::WrdSttLstLoad; 2497 } 2498 return pWrdStt_ExcptLst.get(); 2499 } 2500 2501 void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName ) 2502 { 2503 if( sShareAutoCorrFile != sUserAutoCorrFile ) 2504 { 2505 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2506 if( xStg.is() && ERRCODE_NONE == xStg->GetError() && 2507 xStg->IsStream( rName ) ) 2508 { 2509 xStg->Remove( rName ); 2510 xStg->Commit(); 2511 2512 xStg = nullptr; 2513 } 2514 } 2515 } 2516 2517 void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl() 2518 { 2519 // The conversion needs to happen if the file is already in the user 2520 // directory and is in the old format. Additionally it needs to 2521 // happen when the file is being copied from share to user. 2522 2523 bool bError = false, bConvert = false, bCopy = false; 2524 INetURLObject aDest; 2525 INetURLObject aSource; 2526 2527 if (sUserAutoCorrFile != sShareAutoCorrFile ) 2528 { 2529 aSource = INetURLObject ( sShareAutoCorrFile ); 2530 aDest = INetURLObject ( sUserAutoCorrFile ); 2531 if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) ) 2532 { 2533 aDest.SetExtension ( "bak" ); 2534 bConvert = true; 2535 } 2536 bCopy = true; 2537 } 2538 else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) ) 2539 { 2540 aSource = INetURLObject ( sUserAutoCorrFile ); 2541 aDest = INetURLObject ( sUserAutoCorrFile ); 2542 aDest.SetExtension ( "bak" ); 2543 bCopy = bConvert = true; 2544 } 2545 if (bCopy) 2546 { 2547 try 2548 { 2549 OUString sMain(aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri )); 2550 sal_Int32 nSlashPos = sMain.lastIndexOf('/'); 2551 sMain = sMain.copy(0, nSlashPos); 2552 ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() ); 2553 TransferInfo aInfo; 2554 aInfo.NameClash = NameClash::OVERWRITE; 2555 aInfo.NewTitle = aDest.GetLastName(); 2556 aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ); 2557 aInfo.MoveData = false; 2558 aNewContent.executeCommand( "transfer", Any(aInfo)); 2559 } 2560 catch (...) 2561 { 2562 bError = true; 2563 } 2564 } 2565 if (bConvert && !bError) 2566 { 2567 tools::SvRef<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), StreamMode::READ ); 2568 tools::SvRef<SotStorage> xDstStg = new SotStorage( sUserAutoCorrFile, StreamMode::WRITE ); 2569 2570 if( xSrcStg.is() && xDstStg.is() ) 2571 { 2572 std::unique_ptr<SvStringsISortDtor> pTmpWordList; 2573 2574 if (xSrcStg->IsContained( pXMLImplWrdStt_ExcptLstStr ) ) 2575 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xSrcStg ); 2576 2577 if (pTmpWordList) 2578 { 2579 SaveExceptList_Imp( *pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xDstStg, true ); 2580 pTmpWordList.reset(); 2581 } 2582 2583 2584 if (xSrcStg->IsContained( pXMLImplCplStt_ExcptLstStr ) ) 2585 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg ); 2586 2587 if (pTmpWordList) 2588 { 2589 SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true ); 2590 pTmpWordList->clear(); 2591 } 2592 2593 GetAutocorrWordList(); 2594 MakeBlocklist_Imp( *xDstStg ); 2595 sShareAutoCorrFile = sUserAutoCorrFile; 2596 xDstStg = nullptr; 2597 try 2598 { 2599 ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() ); 2600 aContent.executeCommand ( "delete", makeAny ( true ) ); 2601 } 2602 catch (...) 2603 { 2604 } 2605 } 2606 } 2607 else if( bCopy && !bError ) 2608 sShareAutoCorrFile = sUserAutoCorrFile; 2609 } 2610 2611 bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg ) 2612 { 2613 bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty(); 2614 if( !bRemove ) 2615 { 2616 tools::SvRef<SotStorageStream> refList = rStg.OpenSotStream( pXMLImplAutocorr_ListStr, 2617 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) ); 2618 if( refList.is() ) 2619 { 2620 refList->SetSize( 0 ); 2621 refList->SetBufferSize( 8192 ); 2622 refList->SetProperty( "MediaType", Any(OUString( "text/xml" )) ); 2623 2624 uno::Reference< uno::XComponentContext > xContext = 2625 comphelper::getProcessComponentContext(); 2626 2627 uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext); 2628 uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList ); 2629 xWriter->setOutputStream(xOut); 2630 2631 rtl::Reference< SvXMLAutoCorrectExport > xExp( new SvXMLAutoCorrectExport( xContext, pAutocorr_List.get(), pXMLImplAutocorr_ListStr, xWriter ) ); 2632 2633 xExp->exportDoc( XML_BLOCK_LIST ); 2634 2635 refList->Commit(); 2636 bRet = ERRCODE_NONE == refList->GetError(); 2637 if( bRet ) 2638 { 2639 refList.clear(); 2640 rStg.Commit(); 2641 if( ERRCODE_NONE != rStg.GetError() ) 2642 { 2643 bRemove = true; 2644 bRet = false; 2645 } 2646 } 2647 } 2648 else 2649 bRet = false; 2650 } 2651 2652 if( bRemove ) 2653 { 2654 rStg.Remove( pXMLImplAutocorr_ListStr ); 2655 rStg.Commit(); 2656 } 2657 2658 return bRet; 2659 } 2660 2661 bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries ) 2662 { 2663 // First get the current list! 2664 GetAutocorrWordList(); 2665 2666 MakeUserStorage_Impl(); 2667 tools::SvRef<SotStorage> xStorage = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2668 2669 bool bRet = xStorage.is() && ERRCODE_NONE == xStorage->GetError(); 2670 2671 if( bRet ) 2672 { 2673 for (SvxAutocorrWord & aWordToDelete : aDeleteEntries) 2674 { 2675 std::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete ); 2676 if( xFoundEntry ) 2677 { 2678 if( !xFoundEntry->IsTextOnly() ) 2679 { 2680 OUString aName( aWordToDelete.GetShort() ); 2681 if (xStorage->IsOLEStorage()) 2682 aName = EncryptBlockName_Imp(aName); 2683 else 2684 GeneratePackageName ( aWordToDelete.GetShort(), aName ); 2685 2686 if( xStorage->IsContained( aName ) ) 2687 { 2688 xStorage->Remove( aName ); 2689 bRet = xStorage->Commit(); 2690 } 2691 } 2692 } 2693 } 2694 2695 for (const SvxAutocorrWord & aNewEntrie : aNewEntries) 2696 { 2697 SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true ); 2698 std::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd ); 2699 if( xRemoved ) 2700 { 2701 if( !xRemoved->IsTextOnly() ) 2702 { 2703 // Still have to remove the Storage 2704 OUString sStorageName( aWordToAdd.GetShort() ); 2705 if (xStorage->IsOLEStorage()) 2706 sStorageName = EncryptBlockName_Imp(sStorageName); 2707 else 2708 GeneratePackageName ( aWordToAdd.GetShort(), sStorageName); 2709 2710 if( xStorage->IsContained( sStorageName ) ) 2711 xStorage->Remove( sStorageName ); 2712 } 2713 } 2714 bRet = pAutocorr_List->Insert( std::move(aWordToAdd) ); 2715 2716 if ( !bRet ) 2717 { 2718 break; 2719 } 2720 } 2721 2722 if ( bRet ) 2723 { 2724 bRet = MakeBlocklist_Imp( *xStorage ); 2725 } 2726 } 2727 return bRet; 2728 } 2729 2730 bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong ) 2731 { 2732 // First get the current list! 2733 GetAutocorrWordList(); 2734 2735 MakeUserStorage_Impl(); 2736 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2737 2738 bool bRet = xStg.is() && ERRCODE_NONE == xStg->GetError(); 2739 2740 // Update the word list 2741 if( bRet ) 2742 { 2743 SvxAutocorrWord aNew(rShort, rLong, true ); 2744 std::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew ); 2745 if( xRemove ) 2746 { 2747 if( !xRemove->IsTextOnly() ) 2748 { 2749 // Still have to remove the Storage 2750 OUString sStgNm( rShort ); 2751 if (xStg->IsOLEStorage()) 2752 sStgNm = EncryptBlockName_Imp(sStgNm); 2753 else 2754 GeneratePackageName ( rShort, sStgNm); 2755 2756 if( xStg->IsContained( sStgNm ) ) 2757 xStg->Remove( sStgNm ); 2758 } 2759 } 2760 2761 if( pAutocorr_List->Insert( std::move(aNew) ) ) 2762 { 2763 bRet = MakeBlocklist_Imp( *xStg ); 2764 xStg = nullptr; 2765 } 2766 else 2767 { 2768 bRet = false; 2769 } 2770 } 2771 return bRet; 2772 } 2773 2774 void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, 2775 SfxObjectShell& rShell ) 2776 { 2777 // First get the current list! 2778 GetAutocorrWordList(); 2779 2780 MakeUserStorage_Impl(); 2781 2782 try 2783 { 2784 uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE ); 2785 OUString sLong; 2786 bool bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong ); 2787 xStg = nullptr; 2788 2789 // Update the word list 2790 if( bRet ) 2791 { 2792 if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) ) 2793 { 2794 tools::SvRef<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2795 MakeBlocklist_Imp( *xStor ); 2796 } 2797 } 2798 } 2799 catch ( const uno::Exception& ) 2800 { 2801 } 2802 } 2803 2804 // Keep the list sorted ... 2805 struct SvxAutocorrWordList::CompareSvxAutocorrWordList 2806 { 2807 bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const 2808 { 2809 CollatorWrapper& rCmp = ::GetCollatorWrapper(); 2810 return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0; 2811 } 2812 }; 2813 2814 namespace { 2815 2816 typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType; 2817 2818 } 2819 2820 struct SvxAutocorrWordList::Impl 2821 { 2822 2823 // only one of these contains the data 2824 // maSortedVector is manually sorted so we can optimise data movement 2825 mutable AutocorrWordSetType maSortedVector; 2826 mutable AutocorrWordHashType maHash; // key is 'Short' 2827 2828 void DeleteAndDestroyAll() 2829 { 2830 maHash.clear(); 2831 maSortedVector.clear(); 2832 } 2833 }; 2834 2835 SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {} 2836 2837 SvxAutocorrWordList::~SvxAutocorrWordList() 2838 { 2839 } 2840 2841 void SvxAutocorrWordList::DeleteAndDestroyAll() 2842 { 2843 mpImpl->DeleteAndDestroyAll(); 2844 } 2845 2846 // returns true if inserted 2847 const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const 2848 { 2849 if ( mpImpl->maSortedVector.empty() ) // use the hash 2850 { 2851 OUString aShort = aWord.GetShort(); 2852 auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) ); 2853 if (inserted) 2854 return &(it->second); 2855 return nullptr; 2856 } 2857 else 2858 { 2859 auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList()); 2860 CollatorWrapper& rCmp = ::GetCollatorWrapper(); 2861 if (it == mpImpl->maSortedVector.end() || rCmp.compareString( aWord.GetShort(), it->GetShort() ) != 0) 2862 { 2863 it = mpImpl->maSortedVector.insert(it, std::move(aWord)); 2864 return &*it; 2865 } 2866 return nullptr; 2867 } 2868 } 2869 2870 void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt) 2871 { 2872 (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt )); 2873 } 2874 2875 bool SvxAutocorrWordList::empty() const 2876 { 2877 return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty(); 2878 } 2879 2880 std::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord *pWord) 2881 { 2882 2883 if ( mpImpl->maSortedVector.empty() ) // use the hash 2884 { 2885 AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() ); 2886 if( it != mpImpl->maHash.end() ) 2887 { 2888 SvxAutocorrWord pMatch = std::move(it->second); 2889 mpImpl->maHash.erase (it); 2890 return pMatch; 2891 } 2892 } 2893 else 2894 { 2895 auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList()); 2896 if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it)) 2897 { 2898 SvxAutocorrWord pMatch = std::move(*it); 2899 mpImpl->maSortedVector.erase (it); 2900 return pMatch; 2901 } 2902 } 2903 return std::optional<SvxAutocorrWord>(); 2904 } 2905 2906 // return the sorted contents - defer sorting until we have to. 2907 const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const 2908 { 2909 // convert from hash to set permanently 2910 if ( mpImpl->maSortedVector.empty() ) 2911 { 2912 std::vector<SvxAutocorrWord> tmp; 2913 tmp.reserve(mpImpl->maHash.size()); 2914 for (auto & rPair : mpImpl->maHash) 2915 tmp.emplace_back(std::move(rPair.second)); 2916 mpImpl->maHash.clear(); 2917 // sort twice - this gets the list into mostly-sorted order, which 2918 // reduces the number of times we need to invoke the expensive ICU collate fn. 2919 std::sort(tmp.begin(), tmp.end(), 2920 [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) 2921 { 2922 return lhs.GetShort() < rhs.GetShort(); 2923 }); 2924 // This beast has some O(N log(N)) in a terribly slow ICU collate fn. 2925 // stable_sort is twice as fast as sort in this situation because it does 2926 // fewer comparison operations. 2927 std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList()); 2928 mpImpl->maSortedVector = std::move(tmp); 2929 } 2930 return mpImpl->maSortedVector; 2931 } 2932 2933 const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd, 2934 const OUString &rTxt, 2935 sal_Int32 &rStt, 2936 sal_Int32 nEndPos) const 2937 { 2938 const OUString& rChk = pFnd->GetShort(); 2939 2940 sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern? 2941 sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern? 2942 sal_Int32 nSttWdPos = nEndPos; 2943 2944 // direct replacement of keywords surrounded by colons (for example, ":name:") 2945 bool bColonNameColon = rTxt.getLength() > nEndPos && 2946 rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":"); 2947 if ( nEndPos + (bColonNameColon ? 1 : 0) >= rChk.getLength() - left_wildcard - right_wildcard ) 2948 { 2949 2950 bool bWasWordDelim = false; 2951 sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard; 2952 if (bColonNameColon) 2953 nCalcStt++; 2954 if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon || 2955 ( nCalcStt < rStt && 2956 IsWordDelim( rTxt[ nCalcStt - 1 ] ))) ) 2957 { 2958 TransliterationWrapper& rCmp = GetIgnoreTranslWrapper(); 2959 OUString sWord = rTxt.copy(nCalcStt, rChk.getLength() - left_wildcard); 2960 if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) )) 2961 { 2962 rStt = nCalcStt; 2963 if (!left_wildcard) 2964 { 2965 // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14 2966 if (rTxt.getLength() > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1) 2967 return nullptr; 2968 return pFnd; 2969 } 2970 // get the first word delimiter position before the matching ".*word" pattern 2971 while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ]))) 2972 ; 2973 if (bWasWordDelim) rStt++; 2974 OUString left_pattern = rTxt.copy(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard); 2975 // avoid double spaces before simple "word" replacement 2976 left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().copy(1) : pFnd->GetLong(); 2977 if( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(rTxt.copy(rStt, nEndPos - rStt), left_pattern) ) ) 2978 return pNew; 2979 } 2980 } else 2981 // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support 2982 if ( right_wildcard ) 2983 { 2984 2985 OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) ); 2986 // Get the last word delimiter position 2987 bool not_suffix; 2988 2989 while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]))) 2990 ; 2991 // search the first occurrence (with a left word delimitation, if needed) 2992 sal_Int32 nFndPos = -1; 2993 do { 2994 nFndPos = rTxt.indexOf( sTmp, nFndPos + 1); 2995 if (nFndPos == -1) 2996 break; 2997 not_suffix = bWasWordDelim && (nSttWdPos >= (nFndPos + sTmp.getLength())); 2998 } while ( (!left_wildcard && nFndPos && !IsWordDelim( rTxt[ nFndPos - 1 ])) || not_suffix ); 2999 3000 if ( nFndPos != -1 ) 3001 { 3002 sal_Int32 extra_repl = nFndPos + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:" 3003 3004 if ( left_wildcard ) 3005 { 3006 // get the first word delimiter position before the matching ".*word.*" pattern 3007 while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ]))) 3008 ; 3009 if (bWasWordDelim) nFndPos++; 3010 } 3011 if (nEndPos + extra_repl <= nFndPos) 3012 { 3013 return nullptr; 3014 } 3015 // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations" 3016 OUString aShort = rTxt.copy(nFndPos, nEndPos - nFndPos + extra_repl); 3017 3018 OUString aLong; 3019 rStt = nFndPos; 3020 if ( !left_wildcard ) 3021 { 3022 sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength(); 3023 aLong = pFnd->GetLong() + (siz > 0 ? rTxt.copy(nFndPos + sTmp.getLength(), siz) : ""); 3024 } else { 3025 OUStringBuffer buf; 3026 do { 3027 nSttWdPos = rTxt.indexOf( sTmp, nFndPos); 3028 if (nSttWdPos != -1) 3029 { 3030 sal_Int32 nTmp(nFndPos); 3031 while (nTmp < nSttWdPos && !IsWordDelim(rTxt[nTmp])) 3032 nTmp++; 3033 if (nTmp < nSttWdPos) 3034 break; // word delimiter found 3035 buf.append(rTxt.subView(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong()); 3036 nFndPos = nSttWdPos + sTmp.getLength(); 3037 } 3038 } while (nSttWdPos != -1); 3039 if (nEndPos - nFndPos > extra_repl) 3040 buf.append(rTxt.subView(nFndPos, nEndPos - nFndPos)); 3041 aLong = buf.makeStringAndClear(); 3042 } 3043 if ( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(aShort, aLong) ) ) 3044 { 3045 if ( (rTxt.getLength() > nEndPos && IsWordDelim(rTxt[nEndPos])) || rTxt.getLength() == nEndPos ) 3046 return pNew; 3047 } 3048 } 3049 } 3050 } 3051 return nullptr; 3052 } 3053 3054 const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(const OUString& rTxt, sal_Int32& rStt, 3055 sal_Int32 nEndPos) const 3056 { 3057 for (auto const& elem : mpImpl->maHash) 3058 { 3059 if( const SvxAutocorrWord *pTmp = WordMatches( &elem.second, rTxt, rStt, nEndPos ) ) 3060 return pTmp; 3061 } 3062 3063 for (auto const& elem : mpImpl->maSortedVector) 3064 { 3065 if( const SvxAutocorrWord *pTmp = WordMatches( &elem, rTxt, rStt, nEndPos ) ) 3066 return pTmp; 3067 } 3068 return nullptr; 3069 } 3070 3071 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ 3072
