1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 2 /* 3 * This file is part of the LibreOffice project. 4 * 5 * This Source Code Form is subject to the terms of the Mozilla Public 6 * License, v. 2.0. If a copy of the MPL was not distributed with this 7 * file, You can obtain one at http://mozilla.org/MPL/2.0/. 8 * 9 * This file incorporates work covered by the following license notice: 10 * 11 * Licensed to the Apache Software Foundation (ASF) under one or more 12 * contributor license agreements. See the NOTICE file distributed 13 * with this work for additional information regarding copyright 14 * ownership. The ASF licenses this file to you under the Apache 15 * License, Version 2.0 (the "License"); you may not use this file 16 * except in compliance with the License. You may obtain a copy of 17 * the License at http://www.apache.org/licenses/LICENSE-2.0 . 18 */ 19 20 #include <memory> 21 #include <string_view> 22 #include <sal/config.h> 23 24 #include <com/sun/star/linguistic2/XSpellChecker1.hpp> 25 #include <com/sun/star/embed/XStorage.hpp> 26 #include <com/sun/star/io/IOException.hpp> 27 #include <com/sun/star/io/XStream.hpp> 28 #include <tools/urlobj.hxx> 29 #include <i18nlangtag/mslangid.hxx> 30 #include <i18nutil/transliteration.hxx> 31 #include <sal/log.hxx> 32 #include <osl/diagnose.h> 33 #include <vcl/svapp.hxx> 34 #include <vcl/settings.hxx> 35 #include <svl/fstathelper.hxx> 36 #include <svl/urihelper.hxx> 37 #include <unotools/charclass.hxx> 38 #include <com/sun/star/i18n/UnicodeType.hpp> 39 #include <unotools/collatorwrapper.hxx> 40 #include <com/sun/star/i18n/UnicodeScript.hpp> 41 #include <com/sun/star/i18n/OrdinalSuffix.hpp> 42 #include <unotools/localedatawrapper.hxx> 43 #include <unotools/transliterationwrapper.hxx> 44 #include <comphelper/processfactory.hxx> 45 #include <comphelper/storagehelper.hxx> 46 #include <comphelper/string.hxx> 47 #include <editeng/editids.hrc> 48 #include <sot/storage.hxx> 49 #include <editeng/udlnitem.hxx> 50 #include <editeng/wghtitem.hxx> 51 #include <editeng/postitem.hxx> 52 #include <editeng/crossedoutitem.hxx> 53 #include <editeng/escapementitem.hxx> 54 #include <editeng/svxacorr.hxx> 55 #include <editeng/unolingu.hxx> 56 #include <vcl/window.hxx> 57 #include <com/sun/star/xml/sax/InputSource.hpp> 58 #include <com/sun/star/xml/sax/FastParser.hpp> 59 #include <com/sun/star/xml/sax/Writer.hpp> 60 #include <com/sun/star/xml/sax/SAXParseException.hpp> 61 #include <unotools/streamwrap.hxx> 62 #include "SvXMLAutoCorrectImport.hxx" 63 #include "SvXMLAutoCorrectExport.hxx" 64 #include "SvXMLAutoCorrectTokenHandler.hxx" 65 #include <ucbhelper/content.hxx> 66 #include <com/sun/star/ucb/ContentCreationException.hpp> 67 #include <com/sun/star/ucb/XCommandEnvironment.hpp> 68 #include <com/sun/star/ucb/TransferInfo.hpp> 69 #include <com/sun/star/ucb/NameClash.hpp> 70 #include <tools/diagnose_ex.h> 71 #include <xmloff/xmltoken.hxx> 72 #include <unordered_map> 73 #include <rtl/character.hxx> 74 75 using namespace ::com::sun::star::ucb; 76 using namespace ::com::sun::star::uno; 77 using namespace ::com::sun::star::xml::sax; 78 using namespace ::com::sun::star; 79 using namespace ::xmloff::token; 80 using namespace ::utl; 81 82 namespace { 83 84 enum class Flags { 85 NONE = 0x00, 86 FullStop = 0x01, 87 ExclamationMark = 0x02, 88 QuestionMark = 0x04, 89 }; 90 91 } 92 93 namespace o3tl { 94 template<> struct typed_flags<Flags> : is_typed_flags<Flags, 0x07> {}; 95 } 96 static const sal_Unicode cNonBreakingSpace = 0xA0; 97 98 static const char pXMLImplWrdStt_ExcptLstStr[] = "WordExceptList.xml"; 99 static const char pXMLImplCplStt_ExcptLstStr[] = "SentenceExceptList.xml"; 100 static const char pXMLImplAutocorr_ListStr[] = "DocumentList.xml"; 101 102 static const char 103 /* also at these beginnings - Brackets and all kinds of begin characters */ 104 sImplSttSkipChars[] = "\"\'([{\x83\x84\x89\x91\x92\x93\x94", 105 /* also at these ends - Brackets and all kinds of begin characters */ 106 sImplEndSkipChars[] = "\"\')]}\x83\x84\x89\x91\x92\x93\x94"; 107 108 static OUString EncryptBlockName_Imp(const OUString& rName); 109 110 static bool NonFieldWordDelim( const sal_Unicode c ) 111 { 112 return ' ' == c || '\t' == c || 0x0a == c || 113 cNonBreakingSpace == c || 0x2011 == c; 114 } 115 116 static bool IsWordDelim( const sal_Unicode c ) 117 { 118 return c == 0x1 || NonFieldWordDelim(c); 119 } 120 121 122 static bool IsLowerLetter( sal_Int32 nCharType ) 123 { 124 return CharClass::isLetterType( nCharType ) && 125 ( css::i18n::KCharacterType::LOWER & nCharType); 126 } 127 128 static bool IsUpperLetter( sal_Int32 nCharType ) 129 { 130 return CharClass::isLetterType( nCharType ) && 131 ( css::i18n::KCharacterType::UPPER & nCharType); 132 } 133 134 static bool lcl_IsUnsupportedUnicodeChar( CharClass const & rCC, const OUString& rTxt, 135 sal_Int32 nStt, sal_Int32 nEnd ) 136 { 137 for( ; nStt < nEnd; ++nStt ) 138 { 139 css::i18n::UnicodeScript nScript = rCC.getScript( rTxt, nStt ); 140 switch( nScript ) 141 { 142 case css::i18n::UnicodeScript_kCJKRadicalsSupplement: 143 case css::i18n::UnicodeScript_kHangulJamo: 144 case css::i18n::UnicodeScript_kCJKSymbolPunctuation: 145 case css::i18n::UnicodeScript_kHiragana: 146 case css::i18n::UnicodeScript_kKatakana: 147 case css::i18n::UnicodeScript_kHangulCompatibilityJamo: 148 case css::i18n::UnicodeScript_kEnclosedCJKLetterMonth: 149 case css::i18n::UnicodeScript_kCJKCompatibility: 150 case css::i18n::UnicodeScript_k_CJKUnifiedIdeographsExtensionA: 151 case css::i18n::UnicodeScript_kCJKUnifiedIdeograph: 152 case css::i18n::UnicodeScript_kHangulSyllable: 153 case css::i18n::UnicodeScript_kCJKCompatibilityIdeograph: 154 case css::i18n::UnicodeScript_kHalfwidthFullwidthForm: 155 return true; 156 default: ; //do nothing 157 } 158 } 159 return false; 160 } 161 162 static bool lcl_IsSymbolChar( CharClass const & rCC, const OUString& rTxt, 163 sal_Int32 nStt, sal_Int32 nEnd ) 164 { 165 for( ; nStt < nEnd; ++nStt ) 166 { 167 if( css::i18n::UnicodeType::PRIVATE_USE == rCC.getType( rTxt, nStt )) 168 return true; 169 } 170 return false; 171 } 172 173 static bool lcl_IsInAsciiArr( const char* pArr, const sal_Unicode c ) 174 { 175 // tdf#54409 check also typographical quotation marks in the case of skipped ASCII quotation marks 176 if ( 0x2018 <= c && c <= 0x201F && (pArr == sImplSttSkipChars || pArr == sImplEndSkipChars) ) 177 return true; 178 179 bool bRet = false; 180 for( ; *pArr; ++pArr ) 181 if( *pArr == c ) 182 { 183 bRet = true; 184 break; 185 } 186 return bRet; 187 } 188 189 SvxAutoCorrDoc::~SvxAutoCorrDoc() 190 { 191 } 192 193 // Called by the functions: 194 // - FnCapitalStartWord 195 // - FnCapitalStartSentence 196 // after the exchange of characters. Then the words, if necessary, can be inserted 197 // into the exception list. 198 void SvxAutoCorrDoc::SaveCpltSttWord( ACFlags, sal_Int32, const OUString&, 199 sal_Unicode ) 200 { 201 } 202 203 LanguageType SvxAutoCorrDoc::GetLanguage( sal_Int32 ) const 204 { 205 return LANGUAGE_SYSTEM; 206 } 207 208 static const LanguageTag& GetAppLang() 209 { 210 return Application::GetSettings().GetLanguageTag(); 211 } 212 213 /// Never use an unresolved LANGUAGE_SYSTEM. 214 static LanguageType GetDocLanguage( const SvxAutoCorrDoc& rDoc, sal_Int32 nPos ) 215 { 216 LanguageType eLang = rDoc.GetLanguage( nPos ); 217 if (eLang == LANGUAGE_SYSTEM) 218 eLang = GetAppLang().getLanguageType(); // the current work locale 219 return eLang; 220 } 221 222 static LocaleDataWrapper& GetLocaleDataWrapper( LanguageType nLang ) 223 { 224 static LocaleDataWrapper aLclDtWrp( GetAppLang() ); 225 LanguageTag aLcl( nLang ); 226 const LanguageTag& rLcl = aLclDtWrp.getLoadedLanguageTag(); 227 if( aLcl != rLcl ) 228 aLclDtWrp.setLanguageTag( aLcl ); 229 return aLclDtWrp; 230 } 231 static TransliterationWrapper& GetIgnoreTranslWrapper() 232 { 233 static int bIsInit = 0; 234 static TransliterationWrapper aWrp( ::comphelper::getProcessComponentContext(), 235 TransliterationFlags::IGNORE_KANA | 236 TransliterationFlags::IGNORE_WIDTH ); 237 if( !bIsInit ) 238 { 239 aWrp.loadModuleIfNeeded( GetAppLang().getLanguageType() ); 240 bIsInit = 1; 241 } 242 return aWrp; 243 } 244 static CollatorWrapper& GetCollatorWrapper() 245 { 246 static CollatorWrapper aCollWrp = [&]() 247 { 248 CollatorWrapper tmp( ::comphelper::getProcessComponentContext() ); 249 tmp.loadDefaultCollator( GetAppLang().getLocale(), 0 ); 250 return tmp; 251 }(); 252 return aCollWrp; 253 } 254 255 bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar ) 256 { 257 return cChar == '\0' || cChar == '\t' || cChar == 0x0a || 258 cChar == ' ' || cChar == '\'' || cChar == '\"' || 259 cChar == '*' || cChar == '_' || cChar == '%' || 260 cChar == '.' || cChar == ',' || cChar == ';' || 261 cChar == ':' || cChar == '?' || cChar == '!' || 262 cChar == '/' || cChar == '-'; 263 } 264 265 namespace 266 { 267 bool IsCompoundWordDelimChar(sal_Unicode cChar) 268 { 269 return cChar == '-' || SvxAutoCorrect::IsAutoCorrectChar(cChar); 270 } 271 } 272 273 bool SvxAutoCorrect::NeedsHardspaceAutocorr( sal_Unicode cChar ) 274 { 275 return cChar == '%' || cChar == ';' || cChar == ':' || cChar == '?' || cChar == '!' || 276 cChar == '/' /*case for the urls exception*/; 277 } 278 279 ACFlags SvxAutoCorrect::GetDefaultFlags() 280 { 281 ACFlags nRet = ACFlags::Autocorrect 282 | ACFlags::CapitalStartSentence 283 | ACFlags::CapitalStartWord 284 | ACFlags::ChgOrdinalNumber 285 | ACFlags::ChgToEnEmDash 286 | ACFlags::AddNonBrkSpace 287 | ACFlags::ChgWeightUnderl 288 | ACFlags::SetINetAttr 289 | ACFlags::ChgQuotes 290 | ACFlags::SaveWordCplSttLst 291 | ACFlags::SaveWordWrdSttLst 292 | ACFlags::CorrectCapsLock; 293 LanguageType eLang = GetAppLang().getLanguageType(); 294 if( eLang.anyOf( 295 LANGUAGE_ENGLISH, 296 LANGUAGE_ENGLISH_US, 297 LANGUAGE_ENGLISH_UK, 298 LANGUAGE_ENGLISH_AUS, 299 LANGUAGE_ENGLISH_CAN, 300 LANGUAGE_ENGLISH_NZ, 301 LANGUAGE_ENGLISH_EIRE, 302 LANGUAGE_ENGLISH_SAFRICA, 303 LANGUAGE_ENGLISH_JAMAICA, 304 LANGUAGE_ENGLISH_CARIBBEAN)) 305 nRet &= ~ACFlags(ACFlags::ChgQuotes|ACFlags::ChgSglQuotes); 306 return nRet; 307 } 308 309 static constexpr sal_Unicode cEmDash = 0x2014; 310 static constexpr sal_Unicode cEnDash = 0x2013; 311 static constexpr sal_Unicode cApostrophe = 0x2019; 312 313 SvxAutoCorrect::SvxAutoCorrect( const OUString& rShareAutocorrFile, 314 const OUString& rUserAutocorrFile ) 315 : sShareAutoCorrFile( rShareAutocorrFile ) 316 , sUserAutoCorrFile( rUserAutocorrFile ) 317 , eCharClassLang( LANGUAGE_DONTKNOW ) 318 , nFlags(SvxAutoCorrect::GetDefaultFlags()) 319 , cStartDQuote( 0 ) 320 , cEndDQuote( 0 ) 321 , cStartSQuote( 0 ) 322 , cEndSQuote( 0 ) 323 { 324 } 325 326 SvxAutoCorrect::SvxAutoCorrect( const SvxAutoCorrect& rCpy ) 327 : sShareAutoCorrFile( rCpy.sShareAutoCorrFile ) 328 , sUserAutoCorrFile( rCpy.sUserAutoCorrFile ) 329 , aSwFlags( rCpy.aSwFlags ) 330 , eCharClassLang(rCpy.eCharClassLang) 331 , nFlags( rCpy.nFlags & ~ACFlags(ACFlags::ChgWordLstLoad|ACFlags::CplSttLstLoad|ACFlags::WrdSttLstLoad)) 332 , cStartDQuote( rCpy.cStartDQuote ) 333 , cEndDQuote( rCpy.cEndDQuote ) 334 , cStartSQuote( rCpy.cStartSQuote ) 335 , cEndSQuote( rCpy.cEndSQuote ) 336 { 337 } 338 339 340 SvxAutoCorrect::~SvxAutoCorrect() 341 { 342 } 343 344 void SvxAutoCorrect::GetCharClass_( LanguageType eLang ) 345 { 346 pCharClass.reset( new CharClass( LanguageTag( eLang)) ); 347 eCharClassLang = eLang; 348 } 349 350 void SvxAutoCorrect::SetAutoCorrFlag( ACFlags nFlag, bool bOn ) 351 { 352 ACFlags nOld = nFlags; 353 nFlags = bOn ? nFlags | nFlag 354 : nFlags & ~nFlag; 355 356 if( !bOn ) 357 { 358 if( (nOld & ACFlags::CapitalStartSentence) != (nFlags & ACFlags::CapitalStartSentence) ) 359 nFlags &= ~ACFlags::CplSttLstLoad; 360 if( (nOld & ACFlags::CapitalStartWord) != (nFlags & ACFlags::CapitalStartWord) ) 361 nFlags &= ~ACFlags::WrdSttLstLoad; 362 if( (nOld & ACFlags::Autocorrect) != (nFlags & ACFlags::Autocorrect) ) 363 nFlags &= ~ACFlags::ChgWordLstLoad; 364 } 365 } 366 367 368 // Correct TWo INitial CApitals 369 void SvxAutoCorrect::FnCapitalStartWord( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 370 sal_Int32 nSttPos, sal_Int32 nEndPos, 371 LanguageType eLang ) 372 { 373 CharClass& rCC = GetCharClass( eLang ); 374 375 // Delete all non alphanumeric. Test the characters at the beginning/end of 376 // the word ( recognizes: "(min.", "/min.", and so on.) 377 for( ; nSttPos < nEndPos; ++nSttPos ) 378 if( rCC.isLetterNumeric( rTxt, nSttPos )) 379 break; 380 for( ; nSttPos < nEndPos; --nEndPos ) 381 if( rCC.isLetterNumeric( rTxt, nEndPos - 1 )) 382 break; 383 384 // Is the word a compounded word separated by delimiters? 385 // If so, keep track of all delimiters so each constituent 386 // word can be checked for two initial capital letters. 387 std::deque<sal_Int32> aDelimiters; 388 389 // Always check for two capitals at the beginning 390 // of the entire word, so start at nSttPos. 391 aDelimiters.push_back(nSttPos); 392 393 // Find all compound word delimiters 394 for (sal_Int32 n = nSttPos; n < nEndPos; ++n) 395 { 396 if (IsCompoundWordDelimChar(rTxt[ n ])) 397 { 398 aDelimiters.push_back( n + 1 ); // Get position of char after delimiter 399 } 400 } 401 402 // Decide where to put the terminating delimiter. 403 // If the last AutoCorrect char was a newline, then the AutoCorrect 404 // char will not be included in rTxt. 405 // If the last AutoCorrect char was not a newline, then the AutoCorrect 406 // character will be the last character in rTxt. 407 if (!IsCompoundWordDelimChar(rTxt[nEndPos-1])) 408 aDelimiters.push_back(nEndPos); 409 410 // Iterate through the word and all words that compose it. 411 // Two capital letters at the beginning of word? 412 for (size_t nI = 0; nI < aDelimiters.size() - 1; ++nI) 413 { 414 nSttPos = aDelimiters[nI]; 415 nEndPos = aDelimiters[nI + 1]; 416 417 if( nSttPos+2 < nEndPos && 418 IsUpperLetter( rCC.getCharacterType( rTxt, nSttPos )) && 419 IsUpperLetter( rCC.getCharacterType( rTxt, ++nSttPos )) && 420 // Is the third character a lower case 421 IsLowerLetter( rCC.getCharacterType( rTxt, nSttPos +1 )) && 422 // Do not replace special attributes 423 0x1 != rTxt[ nSttPos ] && 0x2 != rTxt[ nSttPos ]) 424 { 425 // test if the word is in an exception list 426 OUString sWord( rTxt.copy( nSttPos - 1, nEndPos - nSttPos + 1 )); 427 if( !FindInWrdSttExceptList(eLang, sWord) ) 428 { 429 // Check that word isn't correctly spelled before correcting: 430 css::uno::Reference< css::linguistic2::XSpellChecker1 > xSpeller = 431 LinguMgr::GetSpellChecker(); 432 if( xSpeller->hasLanguage(static_cast<sal_uInt16>(eLang)) ) 433 { 434 Sequence< css::beans::PropertyValue > aEmptySeq; 435 if (xSpeller->isValid(sWord, static_cast<sal_uInt16>(eLang), aEmptySeq)) 436 { 437 return; 438 } 439 } 440 sal_Unicode cSave = rTxt[ nSttPos ]; 441 OUString sChar = rCC.lowercase( OUString(cSave) ); 442 if( sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar )) 443 { 444 if( ACFlags::SaveWordWrdSttLst & nFlags ) 445 rDoc.SaveCpltSttWord( ACFlags::CapitalStartWord, nSttPos, sWord, cSave ); 446 } 447 } 448 } 449 } 450 } 451 452 // Format ordinal numbers suffixes (1st -> 1^st) 453 bool SvxAutoCorrect::FnChgOrdinalNumber( 454 SvxAutoCorrDoc& rDoc, const OUString& rTxt, 455 sal_Int32 nSttPos, sal_Int32 nEndPos, 456 LanguageType eLang) 457 { 458 // 1st, 2nd, 3rd, 4 - 0th 459 // 201th or 201st 460 // 12th or 12nd 461 bool bChg = false; 462 463 // In some languages ordinal suffixes should never be 464 // changed to superscript. Let's break for those languages. 465 if (!eLang.anyOf( 466 LANGUAGE_SWEDISH, 467 LANGUAGE_SWEDISH_FINLAND)) 468 { 469 CharClass& rCC = GetCharClass(eLang); 470 471 for (; nSttPos < nEndPos; ++nSttPos) 472 if (!lcl_IsInAsciiArr(sImplSttSkipChars, rTxt[nSttPos])) 473 break; 474 for (; nSttPos < nEndPos; --nEndPos) 475 if (!lcl_IsInAsciiArr(sImplEndSkipChars, rTxt[nEndPos - 1])) 476 break; 477 478 479 // Get the last number in the string to check 480 sal_Int32 nNumEnd = nEndPos; 481 bool bFoundEnd = false; 482 bool isValidNumber = true; 483 sal_Int32 i = nEndPos; 484 while (i > nSttPos) 485 { 486 i--; 487 bool isDigit = rCC.isDigit(rTxt, i); 488 if (bFoundEnd) 489 isValidNumber &= (isDigit || !rCC.isLetter(rTxt, i)); 490 491 if (isDigit && !bFoundEnd) 492 { 493 bFoundEnd = true; 494 nNumEnd = i; 495 } 496 } 497 498 if (bFoundEnd && isValidNumber) { 499 sal_Int32 nNum = rTxt.copy(nSttPos, nNumEnd - nSttPos + 1).toInt32(); 500 501 // Check if the characters after that number correspond to the ordinal suffix 502 uno::Reference< i18n::XOrdinalSuffix > xOrdSuffix 503 = i18n::OrdinalSuffix::create(comphelper::getProcessComponentContext()); 504 505 uno::Sequence< OUString > aSuffixes = xOrdSuffix->getOrdinalSuffix(nNum, rCC.getLanguageTag().getLocale()); 506 for (sal_Int32 nSuff = 0; nSuff < aSuffixes.getLength(); nSuff++) 507 { 508 OUString sSuffix(aSuffixes[nSuff]); 509 OUString sEnd = rTxt.copy(nNumEnd + 1, nEndPos - nNumEnd - 1); 510 511 if (sSuffix == sEnd) 512 { 513 // Check if the ordinal suffix has to be set as super script 514 if (rCC.isLetter(sSuffix)) 515 { 516 // Do the change 517 SvxEscapementItem aSvxEscapementItem(DFLT_ESC_AUTO_SUPER, 518 DFLT_ESC_PROP, SID_ATTR_CHAR_ESCAPEMENT); 519 rDoc.SetAttr(nNumEnd + 1, nEndPos, 520 SID_ATTR_CHAR_ESCAPEMENT, 521 aSvxEscapementItem); 522 bChg = true; 523 } 524 } 525 } 526 } 527 } 528 return bChg; 529 } 530 531 // Replace dashes 532 bool SvxAutoCorrect::FnChgToEnEmDash( 533 SvxAutoCorrDoc& rDoc, const OUString& rTxt, 534 sal_Int32 nSttPos, sal_Int32 nEndPos, 535 LanguageType eLang ) 536 { 537 bool bRet = false; 538 CharClass& rCC = GetCharClass( eLang ); 539 if (eLang == LANGUAGE_SYSTEM) 540 eLang = GetAppLang().getLanguageType(); 541 bool bAlwaysUseEmDash = (eLang == LANGUAGE_RUSSIAN || eLang == LANGUAGE_UKRAINIAN); 542 543 // replace " - " or " --" with "enDash" 544 if( 1 < nSttPos && 1 <= nEndPos - nSttPos ) 545 { 546 sal_Unicode cCh = rTxt[ nSttPos ]; 547 if( '-' == cCh ) 548 { 549 if( 1 < nEndPos - nSttPos && 550 ' ' == rTxt[ nSttPos-1 ] && 551 '-' == rTxt[ nSttPos+1 ]) 552 { 553 sal_Int32 n; 554 for( n = nSttPos+2; n < nEndPos && lcl_IsInAsciiArr( 555 sImplSttSkipChars,(cCh = rTxt[ n ])); 556 ++n ) 557 ; 558 559 // found: " --[<AnySttChars>][A-z0-9] 560 if( rCC.isLetterNumeric( OUString(cCh) ) ) 561 { 562 for( n = nSttPos-1; n && lcl_IsInAsciiArr( 563 sImplEndSkipChars,(cCh = rTxt[ --n ])); ) 564 ; 565 566 // found: "[A-z0-9][<AnyEndChars>] --[<AnySttChars>][A-z0-9] 567 if( rCC.isLetterNumeric( OUString(cCh) )) 568 { 569 rDoc.Delete( nSttPos, nSttPos + 2 ); 570 rDoc.Insert( nSttPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) ); 571 bRet = true; 572 } 573 } 574 } 575 } 576 else if( 3 < nSttPos && 577 ' ' == rTxt[ nSttPos-1 ] && 578 '-' == rTxt[ nSttPos-2 ]) 579 { 580 sal_Int32 n, nLen = 1, nTmpPos = nSttPos - 2; 581 if( '-' == ( cCh = rTxt[ nTmpPos-1 ]) ) 582 { 583 --nTmpPos; 584 ++nLen; 585 cCh = rTxt[ nTmpPos-1 ]; 586 } 587 if( ' ' == cCh ) 588 { 589 for( n = nSttPos; n < nEndPos && lcl_IsInAsciiArr( 590 sImplSttSkipChars,(cCh = rTxt[ n ])); 591 ++n ) 592 ; 593 594 // found: " - [<AnySttChars>][A-z0-9] 595 if( rCC.isLetterNumeric( OUString(cCh) ) ) 596 { 597 cCh = ' '; 598 for( n = nTmpPos-1; n && lcl_IsInAsciiArr( 599 sImplEndSkipChars,(cCh = rTxt[ --n ])); ) 600 ; 601 // found: "[A-z0-9][<AnyEndChars>] - [<AnySttChars>][A-z0-9] 602 if( rCC.isLetterNumeric( OUString(cCh) )) 603 { 604 rDoc.Delete( nTmpPos, nTmpPos + nLen ); 605 rDoc.Insert( nTmpPos, bAlwaysUseEmDash ? OUString(cEmDash) : OUString(cEnDash) ); 606 bRet = true; 607 } 608 } 609 } 610 } 611 } 612 613 // Replace [A-z0-9]--[A-z0-9] double dash with "emDash" or "enDash" 614 // [0-9]--[0-9] double dash always replaced with "enDash" 615 // Finnish and Hungarian use enDash instead of emDash. 616 bool bEnDash = (eLang == LANGUAGE_HUNGARIAN || eLang == LANGUAGE_FINNISH); 617 if( 4 <= nEndPos - nSttPos ) 618 { 619 OUString sTmp( rTxt.copy( nSttPos, nEndPos - nSttPos ) ); 620 sal_Int32 nFndPos = sTmp.indexOf("--"); 621 if( nFndPos != -1 && nFndPos && 622 nFndPos + 2 < sTmp.getLength() && 623 ( rCC.isLetterNumeric( sTmp, nFndPos - 1 ) || 624 lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nFndPos - 1 ] )) && 625 ( rCC.isLetterNumeric( sTmp, nFndPos + 2 ) || 626 lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nFndPos + 2 ] ))) 627 { 628 nSttPos = nSttPos + nFndPos; 629 rDoc.Delete( nSttPos, nSttPos + 2 ); 630 rDoc.Insert( nSttPos, (bEnDash || (rCC.isDigit( sTmp, nFndPos - 1 ) && 631 rCC.isDigit( sTmp, nFndPos + 2 )) ? OUString(cEnDash) : OUString(cEmDash)) ); 632 bRet = true; 633 } 634 } 635 return bRet; 636 } 637 638 // Add non-breaking space before specific punctuation marks in French text 639 bool SvxAutoCorrect::FnAddNonBrkSpace( 640 SvxAutoCorrDoc& rDoc, const OUString& rTxt, 641 sal_Int32 nEndPos, 642 LanguageType eLang, bool& io_bNbspRunNext ) 643 { 644 bool bRet = false; 645 646 CharClass& rCC = GetCharClass( eLang ); 647 648 if ( rCC.getLanguageTag().getLanguage() == "fr" ) 649 { 650 bool bFrCA = (rCC.getLanguageTag().getCountry() == "CA"); 651 OUString allChars = ":;?!%"; 652 OUString chars( allChars ); 653 if ( bFrCA ) 654 chars = ":"; 655 656 sal_Unicode cChar = rTxt[ nEndPos ]; 657 bool bHasSpace = chars.indexOf( cChar ) != -1; 658 bool bIsSpecial = allChars.indexOf( cChar ) != -1; 659 if ( bIsSpecial ) 660 { 661 // Get the last word delimiter position 662 sal_Int32 nSttWdPos = nEndPos; 663 bool bWasWordDelim = false; 664 while( nSttWdPos ) 665 { 666 bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]); 667 if (bWasWordDelim) 668 break; 669 } 670 671 //See if the text is the start of a protocol string, e.g. have text of 672 //"http" see if it is the start of "http:" and if so leave it alone 673 sal_Int32 nIndex = nSttWdPos + (bWasWordDelim ? 1 : 0); 674 sal_Int32 nProtocolLen = nEndPos - nSttWdPos + 1; 675 if (nIndex + nProtocolLen <= rTxt.getLength()) 676 { 677 if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid) 678 return false; 679 } 680 681 // Check the presence of "://" in the word 682 sal_Int32 nStrPos = rTxt.indexOf( "://", nSttWdPos + 1 ); 683 if ( nStrPos == -1 && nEndPos > 0 ) 684 { 685 // Check the previous char 686 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ]; 687 if ( ( chars.indexOf( cPrevChar ) == -1 ) && cPrevChar != '\t' ) 688 { 689 // Remove any previous normal space 690 sal_Int32 nPos = nEndPos - 1; 691 while ( cPrevChar == ' ' || cPrevChar == cNonBreakingSpace ) 692 { 693 if ( nPos == 0 ) break; 694 nPos--; 695 cPrevChar = rTxt[ nPos ]; 696 } 697 698 nPos++; 699 if ( nEndPos - nPos > 0 ) 700 rDoc.Delete( nPos, nEndPos ); 701 702 // Add the non-breaking space at the end pos 703 if ( bHasSpace ) 704 rDoc.Insert( nPos, OUString(cNonBreakingSpace) ); 705 io_bNbspRunNext = true; 706 bRet = true; 707 } 708 else if ( chars.indexOf( cPrevChar ) != -1 ) 709 io_bNbspRunNext = true; 710 } 711 } 712 else if ( cChar == '/' && nEndPos > 1 && rTxt.getLength() > (nEndPos - 1) ) 713 { 714 // Remove the hardspace right before to avoid formatting URLs 715 sal_Unicode cPrevChar = rTxt[ nEndPos - 1 ]; 716 sal_Unicode cMaybeSpaceChar = rTxt[ nEndPos - 2 ]; 717 if ( cPrevChar == ':' && cMaybeSpaceChar == cNonBreakingSpace ) 718 { 719 rDoc.Delete( nEndPos - 2, nEndPos - 1 ); 720 bRet = true; 721 } 722 } 723 } 724 725 return bRet; 726 } 727 728 // URL recognition 729 bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 730 sal_Int32 nSttPos, sal_Int32 nEndPos, 731 LanguageType eLang ) 732 { 733 OUString sURL( URIHelper::FindFirstURLInText( rTxt, nSttPos, nEndPos, 734 GetCharClass( eLang ) )); 735 bool bRet = !sURL.isEmpty(); 736 if( bRet ) // so, set attribute: 737 rDoc.SetINetAttr( nSttPos, nEndPos, sURL ); 738 return bRet; 739 } 740 741 // Automatic *bold*, /italic/, -strikeout- and _underline_ 742 bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 743 sal_Int32 nEndPos ) 744 { 745 // Condition: 746 // at the beginning: _, *, / or ~ after Space with the following !Space 747 // at the end: _, *, / or ~ before Space (word delimiter?) 748 749 sal_Unicode cInsChar = rTxt[ nEndPos ]; // underline, bold, italic or strikeout 750 if( ++nEndPos != rTxt.getLength() && 751 !IsWordDelim( rTxt[ nEndPos ] ) ) 752 return false; 753 754 --nEndPos; 755 756 bool bAlphaNum = false; 757 sal_Int32 nPos = nEndPos; 758 sal_Int32 nFndPos = -1; 759 CharClass& rCC = GetCharClass( LANGUAGE_SYSTEM ); 760 761 while( nPos ) 762 { 763 switch( sal_Unicode c = rTxt[ --nPos ] ) 764 { 765 case '_': 766 case '-': 767 case '/': 768 case '*': 769 if( c == cInsChar ) 770 { 771 if( bAlphaNum && nPos+1 < nEndPos && ( !nPos || 772 IsWordDelim( rTxt[ nPos-1 ])) && 773 !IsWordDelim( rTxt[ nPos+1 ])) 774 nFndPos = nPos; 775 else 776 // Condition is not satisfied, so cancel 777 nFndPos = -1; 778 nPos = 0; 779 } 780 break; 781 default: 782 if( !bAlphaNum ) 783 bAlphaNum = rCC.isLetterNumeric( rTxt, nPos ); 784 } 785 } 786 787 if( -1 != nFndPos ) 788 { 789 // first delete the Character at the end - this allows insertion 790 // of an empty hint in SetAttr which would be removed by Delete 791 // (fdo#62536, AUTOFMT in Writer) 792 rDoc.Delete( nEndPos, nEndPos + 1 ); 793 rDoc.Delete( nFndPos, nFndPos + 1 ); 794 // Span the Attribute over the area 795 // the end. 796 if( '*' == cInsChar ) // Bold 797 { 798 SvxWeightItem aSvxWeightItem( WEIGHT_BOLD, SID_ATTR_CHAR_WEIGHT ); 799 rDoc.SetAttr( nFndPos, nEndPos - 1, 800 SID_ATTR_CHAR_WEIGHT, 801 aSvxWeightItem); 802 } 803 else if( '/' == cInsChar ) // Italic 804 { 805 SvxPostureItem aSvxPostureItem( ITALIC_NORMAL, SID_ATTR_CHAR_POSTURE ); 806 rDoc.SetAttr( nFndPos, nEndPos - 1, 807 SID_ATTR_CHAR_POSTURE, 808 aSvxPostureItem); 809 } 810 else if( '-' == cInsChar ) // Strikeout 811 { 812 SvxCrossedOutItem aSvxCrossedOutItem( STRIKEOUT_SINGLE, SID_ATTR_CHAR_STRIKEOUT ); 813 rDoc.SetAttr( nFndPos, nEndPos - 1, 814 SID_ATTR_CHAR_STRIKEOUT, 815 aSvxCrossedOutItem); 816 } 817 else // Underline 818 { 819 SvxUnderlineItem aSvxUnderlineItem( LINESTYLE_SINGLE, SID_ATTR_CHAR_UNDERLINE ); 820 rDoc.SetAttr( nFndPos, nEndPos - 1, 821 SID_ATTR_CHAR_UNDERLINE, 822 aSvxUnderlineItem); 823 } 824 } 825 826 return -1 != nFndPos; 827 } 828 829 // Capitalize first letter of every sentence 830 void SvxAutoCorrect::FnCapitalStartSentence( SvxAutoCorrDoc& rDoc, 831 const OUString& rTxt, bool bNormalPos, 832 sal_Int32 nSttPos, sal_Int32 nEndPos, 833 LanguageType eLang ) 834 { 835 836 if( rTxt.isEmpty() || nEndPos <= nSttPos ) 837 return; 838 839 CharClass& rCC = GetCharClass( eLang ); 840 OUString aText( rTxt ); 841 const sal_Unicode *pStart = aText.getStr(), 842 *pStr = pStart + nEndPos, 843 *pWordStt = nullptr, 844 *pDelim = nullptr; 845 846 bool bAtStart = false; 847 do { 848 --pStr; 849 if (rCC.isLetter(aText, pStr - pStart)) 850 { 851 if( !pWordStt ) 852 pDelim = pStr+1; 853 pWordStt = pStr; 854 } 855 else if (pWordStt && !rCC.isDigit(aText, pStr - pStart)) 856 { 857 if( (lcl_IsInAsciiArr( "-'", *pStr ) || *pStr == cApostrophe) && // These characters are allowed in words 858 pWordStt - 1 == pStr && 859 // Installation at beginning of paragraph. Replaced < by <= (#i38971#) 860 (pStart + 1) <= pStr && 861 rCC.isLetter(aText, pStr-1 - pStart)) 862 pWordStt = --pStr; 863 else 864 break; 865 } 866 bAtStart = (pStart == pStr); 867 } while( !bAtStart ); 868 869 if (!pWordStt) 870 return; // no character to be replaced 871 872 873 if (rCC.isDigit(aText, pStr - pStart)) 874 return; // already ok 875 876 if (IsUpperLetter(rCC.getCharacterType(aText, pWordStt - pStart))) 877 return; // already ok 878 879 //See if the text is the start of a protocol string, e.g. have text of 880 //"http" see if it is the start of "http:" and if so leave it alone 881 sal_Int32 nIndex = pWordStt - pStart; 882 sal_Int32 nProtocolLen = pDelim - pWordStt + 1; 883 if (nIndex + nProtocolLen <= rTxt.getLength()) 884 { 885 if (INetURLObject::CompareProtocolScheme(rTxt.copy(nIndex, nProtocolLen)) != INetProtocol::NotValid) 886 return; // already ok 887 } 888 889 if (0x1 == *pWordStt || 0x2 == *pWordStt) 890 return; // already ok 891 892 // Only capitalize, if string before specified characters is long enough 893 if( *pDelim && 2 >= pDelim - pWordStt && 894 lcl_IsInAsciiArr( ".-)>", *pDelim ) ) 895 return; 896 897 // tdf#59666 don't capitalize single Greek letters (except in Greek texts) 898 if ( 1 == pDelim - pWordStt && 0x03B1 <= *pWordStt && *pWordStt <= 0x03C9 && eLang != LANGUAGE_GREEK ) 899 return; 900 901 if( !bAtStart ) // Still no beginning of a paragraph? 902 { 903 if (NonFieldWordDelim(*pStr)) 904 { 905 for (;;) 906 { 907 bAtStart = (pStart == pStr--); 908 if (bAtStart || !NonFieldWordDelim(*pStr)) 909 break; 910 } 911 } 912 // Asian full stop, full width full stop, full width exclamation mark 913 // and full width question marks are treated as word delimiters 914 else if ( 0x3002 != *pStr && 0xFF0E != *pStr && 0xFF01 != *pStr && 915 0xFF1F != *pStr ) 916 return; // no valid separator -> no replacement 917 } 918 919 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list 920 if (FindInWrdSttExceptList(eLang, OUString(pWordStt, pDelim - pWordStt))) 921 return; 922 923 if( bAtStart ) // at the beginning of a paragraph? 924 { 925 // Check out the previous paragraph, if it exists. 926 // If so, then check to paragraph separator at the end. 927 OUString const*const pPrevPara = rDoc.GetPrevPara(bNormalPos); 928 if (!pPrevPara) 929 { 930 // valid separator -> replace 931 OUString sChar( *pWordStt ); 932 sChar = rCC.titlecase(sChar); //see fdo#56740 933 if (!comphelper::string::equals(sChar, *pWordStt)) 934 rDoc.ReplaceRange( pWordStt - pStart, 1, sChar ); 935 return; 936 } 937 938 aText = *pPrevPara; 939 bAtStart = false; 940 pStart = aText.getStr(); 941 pStr = pStart + aText.getLength(); 942 943 do { // overwrite all blanks 944 --pStr; 945 if (!NonFieldWordDelim(*pStr)) 946 break; 947 bAtStart = (pStart == pStr); 948 } while( !bAtStart ); 949 950 if( bAtStart ) 951 return; // no valid separator -> no replacement 952 } 953 954 // Found [ \t]+[A-Z0-9]+ until here. Test now on the paragraph separator. 955 // all three can happen, but not more than once! 956 const sal_Unicode* pExceptStt = nullptr; 957 bool bContinue = true; 958 Flags nFlag = Flags::NONE; 959 do 960 { 961 switch (*pStr) 962 { 963 // Western and Asian full stop 964 case '.': 965 case 0x3002: 966 case 0xFF0E: 967 { 968 if (pStr >= pStart + 2 && *(pStr - 2) == '.') 969 { 970 //e.g. text "f.o.o. word": Now currently considering 971 //capitalizing word but second last character of 972 //previous word is a . So probably last word is an 973 //anagram that ends in . and not truly the end of a 974 //previous sentence, so don't autocapitalize this word 975 return; 976 } 977 if (nFlag & Flags::FullStop) 978 return; // no valid separator -> no replacement 979 nFlag |= Flags::FullStop; 980 pExceptStt = pStr; 981 } 982 break; 983 case '!': 984 case 0xFF01: 985 { 986 if (nFlag & Flags::ExclamationMark) 987 return; // no valid separator -> no replacement 988 nFlag |= Flags::ExclamationMark; 989 } 990 break; 991 case '?': 992 case 0xFF1F: 993 { 994 if (nFlag & Flags::QuestionMark) 995 return; // no valid separator -> no replacement 996 nFlag |= Flags::QuestionMark; 997 } 998 break; 999 default: 1000 if (nFlag == Flags::NONE) 1001 return; // no valid separator -> no replacement 1002 else 1003 bContinue = false; 1004 break; 1005 } 1006 1007 if (bContinue && pStr-- == pStart) 1008 { 1009 return; // no valid separator -> no replacement 1010 } 1011 } while (bContinue); 1012 if (Flags::FullStop != nFlag) 1013 pExceptStt = nullptr; 1014 1015 // Only capitalize, if string is long enough 1016 if( 2 > ( pStr - pStart ) ) 1017 return; 1018 1019 if (!rCC.isLetterNumeric(aText, pStr-- - pStart)) 1020 { 1021 bool bValid = false, bAlphaFnd = false; 1022 const sal_Unicode* pTmpStr = pStr; 1023 while( !bValid ) 1024 { 1025 if( rCC.isDigit( aText, pTmpStr - pStart ) ) 1026 { 1027 bValid = true; 1028 pStr = pTmpStr - 1; 1029 } 1030 else if( rCC.isLetter( aText, pTmpStr - pStart ) ) 1031 { 1032 if( bAlphaFnd ) 1033 { 1034 bValid = true; 1035 pStr = pTmpStr; 1036 } 1037 else 1038 bAlphaFnd = true; 1039 } 1040 else if (bAlphaFnd || NonFieldWordDelim(*pTmpStr)) 1041 break; 1042 1043 if( pTmpStr == pStart ) 1044 break; 1045 1046 --pTmpStr; 1047 } 1048 1049 if( !bValid ) 1050 return; // no valid separator -> no replacement 1051 } 1052 1053 bool bNumericOnly = '0' <= *(pStr+1) && *(pStr+1) <= '9'; 1054 1055 // Search for the beginning of the word 1056 while (!NonFieldWordDelim(*pStr)) 1057 { 1058 if( bNumericOnly && rCC.isLetter( aText, pStr - pStart ) ) 1059 bNumericOnly = false; 1060 1061 if( pStart == pStr ) 1062 break; 1063 1064 --pStr; 1065 } 1066 1067 if( bNumericOnly ) // consists of only numbers, then not 1068 return; 1069 1070 if (NonFieldWordDelim(*pStr)) 1071 ++pStr; 1072 1073 OUString sWord; 1074 1075 // check on the basis of the exception list 1076 if( pExceptStt ) 1077 { 1078 sWord = OUString(pStr, pExceptStt - pStr + 1); 1079 if( FindInCplSttExceptList(eLang, sWord) ) 1080 return; 1081 1082 // Delete all non alphanumeric. Test the characters at the 1083 // beginning/end of the word ( recognizes: "(min.", "/min.", and so on.) 1084 OUString sTmp( sWord ); 1085 while( !sTmp.isEmpty() && 1086 !rCC.isLetterNumeric( sTmp, 0 ) ) 1087 sTmp = sTmp.copy(1); 1088 1089 // Remove all non alphanumeric characters towards the end up until 1090 // the last one. 1091 sal_Int32 nLen = sTmp.getLength(); 1092 while( nLen && !rCC.isLetterNumeric( sTmp, nLen-1 ) ) 1093 --nLen; 1094 if( nLen + 1 < sTmp.getLength() ) 1095 sTmp = sTmp.copy( 0, nLen + 1 ); 1096 1097 if( !sTmp.isEmpty() && sTmp.getLength() != sWord.getLength() && 1098 FindInCplSttExceptList(eLang, sTmp)) 1099 return; 1100 1101 if(FindInCplSttExceptList(eLang, sWord, true)) 1102 return; 1103 } 1104 1105 // Ok, then replace 1106 sal_Unicode cSave = *pWordStt; 1107 nSttPos = pWordStt - rTxt.getStr(); 1108 OUString sChar = rCC.titlecase(OUString(cSave)); //see fdo#56740 1109 bool bRet = sChar[0] != cSave && rDoc.ReplaceRange( nSttPos, 1, sChar ); 1110 1111 // Perhaps someone wants to have the word 1112 if( bRet && ACFlags::SaveWordCplSttLst & nFlags ) 1113 rDoc.SaveCpltSttWord( ACFlags::CapitalStartSentence, nSttPos, sWord, cSave ); 1114 } 1115 1116 // Correct accidental use of cAPS LOCK key 1117 bool SvxAutoCorrect::FnCorrectCapsLock( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 1118 sal_Int32 nSttPos, sal_Int32 nEndPos, 1119 LanguageType eLang ) 1120 { 1121 if (nEndPos - nSttPos < 2) 1122 // string must be at least 2-character long. 1123 return false; 1124 1125 CharClass& rCC = GetCharClass( eLang ); 1126 1127 // Check the first 2 letters. 1128 if ( !IsLowerLetter(rCC.getCharacterType(rTxt, nSttPos)) ) 1129 return false; 1130 1131 if ( !IsUpperLetter(rCC.getCharacterType(rTxt, nSttPos+1)) ) 1132 return false; 1133 1134 OUStringBuffer aConverted; 1135 aConverted.append( rCC.uppercase(OUString(rTxt[nSttPos])) ); 1136 aConverted.append( rCC.lowercase(OUString(rTxt[nSttPos+1])) ); 1137 1138 // No replacement for words in TWo INitial CApitals or sMALL iNITIAL list 1139 if (FindInWrdSttExceptList(eLang, rTxt.copy(nSttPos, nEndPos - nSttPos))) 1140 return false; 1141 1142 for( sal_Int32 i = nSttPos+2; i < nEndPos; ++i ) 1143 { 1144 if ( IsLowerLetter(rCC.getCharacterType(rTxt, i)) ) 1145 // A lowercase letter disqualifies the whole text. 1146 return false; 1147 1148 if ( IsUpperLetter(rCC.getCharacterType(rTxt, i)) ) 1149 // Another uppercase letter. Convert it. 1150 aConverted.append( rCC.lowercase(OUString(rTxt[i])) ); 1151 else 1152 // This is not an alphabetic letter. Leave it as-is. 1153 aConverted.append( rTxt[i] ); 1154 } 1155 1156 // Replace the word. 1157 rDoc.Delete(nSttPos, nEndPos); 1158 rDoc.Insert(nSttPos, aConverted.makeStringAndClear()); 1159 1160 return true; 1161 } 1162 1163 1164 sal_Unicode SvxAutoCorrect::GetQuote( sal_Unicode cInsChar, bool bSttQuote, 1165 LanguageType eLang ) const 1166 { 1167 sal_Unicode cRet = bSttQuote ? ( '\"' == cInsChar 1168 ? GetStartDoubleQuote() 1169 : GetStartSingleQuote() ) 1170 : ( '\"' == cInsChar 1171 ? GetEndDoubleQuote() 1172 : GetEndSingleQuote() ); 1173 if( !cRet ) 1174 { 1175 // then through the Language find the right character 1176 if( LANGUAGE_NONE == eLang ) 1177 cRet = cInsChar; 1178 else 1179 { 1180 LocaleDataWrapper& rLcl = GetLocaleDataWrapper( eLang ); 1181 OUString sRet( bSttQuote 1182 ? ( '\"' == cInsChar 1183 ? rLcl.getDoubleQuotationMarkStart() 1184 : rLcl.getQuotationMarkStart() ) 1185 : ( '\"' == cInsChar 1186 ? rLcl.getDoubleQuotationMarkEnd() 1187 : rLcl.getQuotationMarkEnd() )); 1188 cRet = !sRet.isEmpty() ? sRet[0] : cInsChar; 1189 } 1190 } 1191 return cRet; 1192 } 1193 1194 void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos, 1195 sal_Unicode cInsChar, bool bSttQuote, 1196 bool bIns, bool b_iApostrophe ) const 1197 { 1198 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos ); 1199 sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang ); 1200 1201 OUString sChg( cInsChar ); 1202 if( bIns ) 1203 rDoc.Insert( nInsPos, sChg ); 1204 else 1205 rDoc.Replace( nInsPos, sChg ); 1206 1207 sChg = OUString(cRet); 1208 1209 if( '\"' == cInsChar ) 1210 { 1211 if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS) 1212 { 1213 OUString s( cNonBreakingSpace ); // UNICODE code for no break space 1214 if( rDoc.Insert( bSttQuote ? nInsPos+1 : nInsPos, s )) 1215 { 1216 if( !bSttQuote ) 1217 ++nInsPos; 1218 } 1219 } 1220 } 1221 1222 rDoc.Replace( nInsPos, sChg ); 1223 1224 // i' -> I' in English (last step for the undo) 1225 if( b_iApostrophe && eLang.anyOf( 1226 LANGUAGE_ENGLISH, 1227 LANGUAGE_ENGLISH_US, 1228 LANGUAGE_ENGLISH_UK, 1229 LANGUAGE_ENGLISH_AUS, 1230 LANGUAGE_ENGLISH_CAN, 1231 LANGUAGE_ENGLISH_NZ, 1232 LANGUAGE_ENGLISH_EIRE, 1233 LANGUAGE_ENGLISH_SAFRICA, 1234 LANGUAGE_ENGLISH_JAMAICA, 1235 LANGUAGE_ENGLISH_CARIBBEAN)) 1236 { 1237 rDoc.Replace( nInsPos-1, "I" ); 1238 } 1239 } 1240 1241 OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPos, 1242 sal_Unicode cInsChar, bool bSttQuote ) 1243 { 1244 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos ); 1245 sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang ); 1246 1247 OUString sRet(cRet); 1248 1249 if( '\"' == cInsChar ) 1250 { 1251 if (primary(eLang) == primary(LANGUAGE_FRENCH) && eLang != LANGUAGE_FRENCH_SWISS) 1252 { 1253 if( bSttQuote ) 1254 sRet += " "; 1255 else 1256 sRet = " " + sRet; 1257 } 1258 } 1259 return sRet; 1260 } 1261 1262 // WARNING: rText may become invalid, see comment below 1263 void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt, 1264 sal_Int32 nInsPos, sal_Unicode cChar, 1265 bool bInsert, bool& io_bNbspRunNext, vcl::Window const * pFrameWin ) 1266 { 1267 bool bIsNextRun = io_bNbspRunNext; 1268 io_bNbspRunNext = false; // if it was set, then it has to be turned off 1269 1270 do{ // only for middle check loop !! 1271 if( cChar ) 1272 { 1273 // Prevent double space 1274 if( nInsPos && ' ' == cChar && 1275 IsAutoCorrFlag( ACFlags::IgnoreDoubleSpace ) && 1276 ' ' == rTxt[ nInsPos - 1 ]) 1277 { 1278 break; 1279 } 1280 1281 bool bSingle = '\'' == cChar; 1282 bool bIsReplaceQuote = 1283 (IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('\"' == cChar )) || 1284 (IsAutoCorrFlag( ACFlags::ChgSglQuotes ) && bSingle ); 1285 if( bIsReplaceQuote ) 1286 { 1287 sal_Unicode cPrev; 1288 bool bSttQuote = !nInsPos; 1289 bool b_iApostrophe = false; 1290 if (!bSttQuote) 1291 { 1292 cPrev = rTxt[ nInsPos-1 ]; 1293 bSttQuote = NonFieldWordDelim(cPrev) || 1294 lcl_IsInAsciiArr( "([{", cPrev ) || 1295 ( cEmDash == cPrev ) || 1296 ( cEnDash == cPrev ); 1297 // tdf#38394 use opening quotation mark << in French l'<<word>> 1298 if ( !bSingle && !bSttQuote && cPrev == cApostrophe && 1299 (nInsPos == 2 || (nInsPos > 2 && IsWordDelim( rTxt[ nInsPos-3 ] ))) ) 1300 { 1301 const LanguageType eLang = GetDocLanguage( rDoc, nInsPos ); 1302 if ( primary(eLang) == primary(LANGUAGE_FRENCH) ) 1303 bSttQuote = true; 1304 } 1305 // tdf#108423 for capitalization of English i'm 1306 b_iApostrophe = bSingle && ( cPrev == 'i' ) && 1307 (( nInsPos == 1 ) || IsWordDelim( rTxt[ nInsPos-2 ] )); 1308 } 1309 InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, b_iApostrophe ); 1310 break; 1311 } 1312 1313 if( bInsert ) 1314 rDoc.Insert( nInsPos, OUString(cChar) ); 1315 else 1316 rDoc.Replace( nInsPos, OUString(cChar) ); 1317 1318 // Hardspaces autocorrection 1319 if ( IsAutoCorrFlag( ACFlags::AddNonBrkSpace ) ) 1320 { 1321 if ( NeedsHardspaceAutocorr( cChar ) && 1322 FnAddNonBrkSpace( rDoc, rTxt, nInsPos, GetDocLanguage( rDoc, nInsPos ), io_bNbspRunNext ) ) 1323 { 1324 ; 1325 } 1326 else if ( bIsNextRun && !IsAutoCorrectChar( cChar ) ) 1327 { 1328 // Remove the NBSP if it wasn't an autocorrection 1329 if ( nInsPos != 0 && NeedsHardspaceAutocorr( rTxt[ nInsPos - 1 ] ) && 1330 cChar != ' ' && cChar != '\t' && cChar != cNonBreakingSpace ) 1331 { 1332 // Look for the last HARD_SPACE 1333 sal_Int32 nPos = nInsPos - 1; 1334 bool bContinue = true; 1335 while ( bContinue ) 1336 { 1337 const sal_Unicode cTmpChar = rTxt[ nPos ]; 1338 if ( cTmpChar == cNonBreakingSpace ) 1339 { 1340 rDoc.Delete( nPos, nPos + 1 ); 1341 bContinue = false; 1342 } 1343 else if ( !NeedsHardspaceAutocorr( cTmpChar ) || nPos == 0 ) 1344 bContinue = false; 1345 nPos--; 1346 } 1347 } 1348 } 1349 } 1350 } 1351 1352 if( !nInsPos ) 1353 break; 1354 1355 sal_Int32 nPos = nInsPos - 1; 1356 1357 if( IsWordDelim( rTxt[ nPos ])) 1358 break; 1359 1360 // Set bold or underline automatically? 1361 if (('*' == cChar || '_' == cChar || '/' == cChar || '-' == cChar) && (nPos+1 < rTxt.getLength())) 1362 { 1363 if( IsAutoCorrFlag( ACFlags::ChgWeightUnderl ) ) 1364 { 1365 FnChgWeightUnderl( rDoc, rTxt, nPos+1 ); 1366 } 1367 break; 1368 } 1369 1370 while( nPos && !IsWordDelim( rTxt[ --nPos ])) 1371 ; 1372 1373 // Found a Paragraph-start or a Blank, search for the word shortcut in 1374 // auto. 1375 sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character 1376 if( !nPos && !IsWordDelim( rTxt[ 0 ])) 1377 --nCapLttrPos; // begin of paragraph and no blank 1378 1379 const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos ); 1380 CharClass& rCC = GetCharClass( eLang ); 1381 1382 // no symbol characters 1383 if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nInsPos )) 1384 break; 1385 1386 if( IsAutoCorrFlag( ACFlags::Autocorrect ) ) 1387 { 1388 // WARNING ATTENTION: rTxt is an alias of the text node's OUString 1389 // and becomes INVALID if ChgAutoCorrWord returns true! 1390 // => use aPara/pPara to create a valid copy of the string! 1391 OUString aPara; 1392 OUString* pPara = IsAutoCorrFlag(ACFlags::CapitalStartSentence) ? &aPara : nullptr; 1393 1394 bool bChgWord = rDoc.ChgAutoCorrWord( nCapLttrPos, nInsPos, 1395 *this, pPara ); 1396 if( !bChgWord ) 1397 { 1398 sal_Int32 nCapLttrPos1 = nCapLttrPos, nInsPos1 = nInsPos; 1399 while( nCapLttrPos1 < nInsPos && 1400 lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos1 ] ) 1401 ) 1402 ++nCapLttrPos1; 1403 while( nCapLttrPos1 < nInsPos1 && nInsPos1 && 1404 lcl_IsInAsciiArr( sImplEndSkipChars, rTxt[ nInsPos1-1 ] ) 1405 ) 1406 --nInsPos1; 1407 1408 if( (nCapLttrPos1 != nCapLttrPos || nInsPos1 != nInsPos ) && 1409 nCapLttrPos1 < nInsPos1 && 1410 rDoc.ChgAutoCorrWord( nCapLttrPos1, nInsPos1, *this, pPara )) 1411 { 1412 bChgWord = true; 1413 nCapLttrPos = nCapLttrPos1; 1414 } 1415 } 1416 1417 if( bChgWord ) 1418 { 1419 if( !aPara.isEmpty() ) 1420 { 1421 sal_Int32 nEnd = nCapLttrPos; 1422 while( nEnd < aPara.getLength() && 1423 !IsWordDelim( aPara[ nEnd ])) 1424 ++nEnd; 1425 1426 // Capital letter at beginning of paragraph? 1427 if( IsAutoCorrFlag( ACFlags::CapitalStartSentence ) ) 1428 { 1429 FnCapitalStartSentence( rDoc, aPara, false, 1430 nCapLttrPos, nEnd, eLang ); 1431 } 1432 1433 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) ) 1434 { 1435 FnChgToEnEmDash( rDoc, aPara, nCapLttrPos, nEnd, eLang ); 1436 } 1437 } 1438 break; 1439 } 1440 } 1441 1442 if( ( IsAutoCorrFlag( ACFlags::ChgOrdinalNumber ) && 1443 (nInsPos >= 2 ) && // fdo#69762 avoid autocorrect for 2e-3 1444 ( '-' != cChar || 'E' != rtl::toAsciiUpperCase(rTxt[nInsPos-1]) || '0' > rTxt[nInsPos-2] || '9' < rTxt[nInsPos-2] ) && 1445 FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) || 1446 ( IsAutoCorrFlag( ACFlags::SetINetAttr ) && 1447 ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) && 1448 FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ) 1449 ; 1450 else 1451 { 1452 bool bLockKeyOn = pFrameWin && (pFrameWin->GetIndicatorState() & KeyIndicatorState::CAPSLOCK); 1453 bool bUnsupported = lcl_IsUnsupportedUnicodeChar( rCC, rTxt, nCapLttrPos, nInsPos ); 1454 1455 if ( bLockKeyOn && IsAutoCorrFlag( ACFlags::CorrectCapsLock ) && 1456 FnCorrectCapsLock( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) 1457 { 1458 // Correct accidental use of cAPS LOCK key (do this only when 1459 // the caps or shift lock key is pressed). Turn off the caps 1460 // lock afterwards. 1461 pFrameWin->SimulateKeyPress( KEY_CAPSLOCK ); 1462 } 1463 1464 // Capital letter at beginning of paragraph ? 1465 if( !bUnsupported && 1466 IsAutoCorrFlag( ACFlags::CapitalStartSentence ) ) 1467 { 1468 FnCapitalStartSentence( rDoc, rTxt, true, nCapLttrPos, nInsPos, eLang ); 1469 } 1470 1471 // Two capital letters at beginning of word ?? 1472 if( !bUnsupported && 1473 IsAutoCorrFlag( ACFlags::CapitalStartWord ) ) 1474 { 1475 FnCapitalStartWord( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ); 1476 } 1477 1478 if( IsAutoCorrFlag( ACFlags::ChgToEnEmDash ) ) 1479 { 1480 FnChgToEnEmDash( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ); 1481 } 1482 } 1483 1484 } while( false ); 1485 } 1486 1487 SvxAutoCorrectLanguageLists& SvxAutoCorrect::GetLanguageList_( 1488 LanguageType eLang ) 1489 { 1490 LanguageTag aLanguageTag( eLang); 1491 if (m_aLangTable.find(aLanguageTag) == m_aLangTable.end()) 1492 (void)CreateLanguageFile(aLanguageTag); 1493 return *(m_aLangTable.find(aLanguageTag)->second); 1494 } 1495 1496 void SvxAutoCorrect::SaveCplSttExceptList( LanguageType eLang ) 1497 { 1498 auto const iter = m_aLangTable.find(LanguageTag(eLang)); 1499 if (iter != m_aLangTable.end() && iter->second) 1500 iter->second->SaveCplSttExceptList(); 1501 else 1502 { 1503 SAL_WARN("editeng", "Save an empty list? "); 1504 } 1505 } 1506 1507 void SvxAutoCorrect::SaveWrdSttExceptList(LanguageType eLang) 1508 { 1509 auto const iter = m_aLangTable.find(LanguageTag(eLang)); 1510 if (iter != m_aLangTable.end() && iter->second) 1511 iter->second->SaveWrdSttExceptList(); 1512 else 1513 { 1514 SAL_WARN("editeng", "Save an empty list? "); 1515 } 1516 } 1517 1518 // Adds a single word. The list will immediately be written to the file! 1519 bool SvxAutoCorrect::AddCplSttException( const OUString& rNew, 1520 LanguageType eLang ) 1521 { 1522 SvxAutoCorrectLanguageLists* pLists = nullptr; 1523 // either the right language is present or it will be this in the general list 1524 auto iter = m_aLangTable.find(LanguageTag(eLang)); 1525 if (iter != m_aLangTable.end()) 1526 pLists = iter->second.get(); 1527 else 1528 { 1529 LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED); 1530 iter = m_aLangTable.find(aLangTagUndetermined); 1531 if (iter != m_aLangTable.end()) 1532 pLists = iter->second.get(); 1533 else if(CreateLanguageFile(aLangTagUndetermined)) 1534 pLists = m_aLangTable.find(aLangTagUndetermined)->second.get(); 1535 } 1536 OSL_ENSURE(pLists, "No auto correction data"); 1537 return pLists && pLists->AddToCplSttExceptList(rNew); 1538 } 1539 1540 // Adds a single word. The list will immediately be written to the file! 1541 bool SvxAutoCorrect::AddWrtSttException( const OUString& rNew, 1542 LanguageType eLang ) 1543 { 1544 SvxAutoCorrectLanguageLists* pLists = nullptr; 1545 //either the right language is present or it is set in the general list 1546 auto iter = m_aLangTable.find(LanguageTag(eLang)); 1547 if (iter != m_aLangTable.end()) 1548 pLists = iter->second.get(); 1549 else 1550 { 1551 LanguageTag aLangTagUndetermined( LANGUAGE_UNDETERMINED); 1552 iter = m_aLangTable.find(aLangTagUndetermined); 1553 if (iter != m_aLangTable.end()) 1554 pLists = iter->second.get(); 1555 else if(CreateLanguageFile(aLangTagUndetermined)) 1556 pLists = m_aLangTable.find(aLangTagUndetermined)->second.get(); 1557 } 1558 OSL_ENSURE(pLists, "No auto correction file!"); 1559 return pLists && pLists->AddToWrdSttExceptList(rNew); 1560 } 1561 1562 OUString SvxAutoCorrect::GetPrevAutoCorrWord(SvxAutoCorrDoc const& rDoc, const OUString& rTxt, 1563 sal_Int32 nPos) 1564 { 1565 OUString sRet; 1566 if( !nPos ) 1567 return sRet; 1568 1569 sal_Int32 nEnd = nPos; 1570 1571 // it must be followed by a blank or tab! 1572 if( ( nPos < rTxt.getLength() && 1573 !IsWordDelim( rTxt[ nPos ])) || 1574 IsWordDelim( rTxt[ --nPos ])) 1575 return sRet; 1576 1577 while( nPos && !IsWordDelim( rTxt[ --nPos ])) 1578 ; 1579 1580 // Found a Paragraph-start or a Blank, search for the word shortcut in 1581 // auto. 1582 sal_Int32 nCapLttrPos = nPos+1; // on the 1st Character 1583 if( !nPos && !IsWordDelim( rTxt[ 0 ])) 1584 --nCapLttrPos; // Beginning of pargraph and no Blank! 1585 1586 while( lcl_IsInAsciiArr( sImplSttSkipChars, rTxt[ nCapLttrPos ]) ) 1587 if( ++nCapLttrPos >= nEnd ) 1588 return sRet; 1589 1590 if( 3 > nEnd - nCapLttrPos ) 1591 return sRet; 1592 1593 const LanguageType eLang = GetDocLanguage( rDoc, nCapLttrPos ); 1594 1595 CharClass& rCC = GetCharClass(eLang); 1596 1597 if( lcl_IsSymbolChar( rCC, rTxt, nCapLttrPos, nEnd )) 1598 return sRet; 1599 1600 sRet = rTxt.copy( nCapLttrPos, nEnd - nCapLttrPos ); 1601 return sRet; 1602 } 1603 1604 // static 1605 std::vector<OUString> SvxAutoCorrect::GetChunkForAutoText(const OUString& rTxt, 1606 const sal_Int32 nPos) 1607 { 1608 constexpr sal_Int32 nMinLen = 3; 1609 constexpr sal_Int32 nMaxLen = 9; 1610 std::vector<OUString> aRes; 1611 if (nPos >= nMinLen) 1612 { 1613 sal_Int32 nBegin = std::max<sal_Int32>(nPos - nMaxLen, 0); 1614 // TODO: better detect word boundaries (not only whitespaces, but also e.g. punctuation) 1615 if (nBegin > 0 && !IsWordDelim(rTxt[nBegin-1])) 1616 { 1617 while (nBegin + nMinLen <= nPos && !IsWordDelim(rTxt[nBegin])) 1618 ++nBegin; 1619 } 1620 if (nBegin + nMinLen <= nPos) 1621 { 1622 OUString sRes = rTxt.copy(nBegin, nPos - nBegin); 1623 aRes.push_back(sRes); 1624 bool bLastStartedWithDelim = IsWordDelim(sRes[0]); 1625 for (sal_Int32 i = 1; i <= sRes.getLength() - nMinLen; ++i) 1626 { 1627 bool bAdd = bLastStartedWithDelim; 1628 bLastStartedWithDelim = IsWordDelim(sRes[i]); 1629 bAdd = bAdd || bLastStartedWithDelim; 1630 if (bAdd) 1631 aRes.push_back(sRes.copy(i)); 1632 } 1633 } 1634 } 1635 return aRes; 1636 } 1637 1638 bool SvxAutoCorrect::CreateLanguageFile( const LanguageTag& rLanguageTag, bool bNewFile ) 1639 { 1640 OSL_ENSURE(m_aLangTable.find(rLanguageTag) == m_aLangTable.end(), "Language already exists "); 1641 1642 OUString sUserDirFile( GetAutoCorrFileName( rLanguageTag, true )); 1643 OUString sShareDirFile( sUserDirFile ); 1644 1645 SvxAutoCorrectLanguageLists* pLists = nullptr; 1646 1647 tools::Time nMinTime( 0, 2 ), nAktTime( tools::Time::SYSTEM ), nLastCheckTime( tools::Time::EMPTY ); 1648 1649 auto nFndPos = aLastFileTable.find(rLanguageTag); 1650 if(nFndPos != aLastFileTable.end() && 1651 (nLastCheckTime.SetTime(nFndPos->second), nLastCheckTime < nAktTime) && 1652 nAktTime - nLastCheckTime < nMinTime) 1653 { 1654 // no need to test the file, because the last check is not older then 1655 // 2 minutes. 1656 if( bNewFile ) 1657 { 1658 sShareDirFile = sUserDirFile; 1659 pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile ); 1660 LanguageTag aTmp(rLanguageTag); // this insert() needs a non-const reference 1661 m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists))); 1662 aLastFileTable.erase(nFndPos); 1663 } 1664 } 1665 else if( 1666 ( FStatHelper::IsDocument( sUserDirFile ) || 1667 FStatHelper::IsDocument( sShareDirFile = 1668 GetAutoCorrFileName( rLanguageTag ) ) || 1669 FStatHelper::IsDocument( sShareDirFile = 1670 GetAutoCorrFileName( rLanguageTag, false, false, true) ) 1671 ) || 1672 ( sShareDirFile = sUserDirFile, bNewFile ) 1673 ) 1674 { 1675 pLists = new SvxAutoCorrectLanguageLists( *this, sShareDirFile, sUserDirFile ); 1676 LanguageTag aTmp(rLanguageTag); // this insert() needs a non-const reference 1677 m_aLangTable.insert(std::make_pair(aTmp, std::unique_ptr<SvxAutoCorrectLanguageLists>(pLists))); 1678 if (nFndPos != aLastFileTable.end()) 1679 aLastFileTable.erase(nFndPos); 1680 } 1681 else if( !bNewFile ) 1682 { 1683 aLastFileTable[rLanguageTag] = nAktTime.GetTime(); 1684 } 1685 return pLists != nullptr; 1686 } 1687 1688 bool SvxAutoCorrect::PutText( const OUString& rShort, const OUString& rLong, 1689 LanguageType eLang ) 1690 { 1691 LanguageTag aLanguageTag( eLang); 1692 auto const iter = m_aLangTable.find(aLanguageTag); 1693 if (iter != m_aLangTable.end()) 1694 return iter->second->PutText(rShort, rLong); 1695 if(CreateLanguageFile(aLanguageTag)) 1696 return m_aLangTable.find(aLanguageTag)->second->PutText(rShort, rLong); 1697 return false; 1698 } 1699 1700 void SvxAutoCorrect::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, 1701 std::vector<SvxAutocorrWord>& aDeleteEntries, 1702 LanguageType eLang ) 1703 { 1704 LanguageTag aLanguageTag( eLang); 1705 auto const iter = m_aLangTable.find(aLanguageTag); 1706 if (iter != m_aLangTable.end()) 1707 { 1708 iter->second->MakeCombinedChanges( aNewEntries, aDeleteEntries ); 1709 } 1710 else if(CreateLanguageFile( aLanguageTag )) 1711 { 1712 m_aLangTable.find( aLanguageTag )->second->MakeCombinedChanges( aNewEntries, aDeleteEntries ); 1713 } 1714 } 1715 1716 // - return the replacement text (only for SWG-Format, all other 1717 // can be taken from the word list!) 1718 bool SvxAutoCorrect::GetLongText( const OUString&, OUString& ) 1719 { 1720 return false; 1721 } 1722 1723 void SvxAutoCorrect::refreshBlockList( const uno::Reference< embed::XStorage >& ) 1724 { 1725 } 1726 1727 // Text with attribution (only the SWG - SWG format!) 1728 bool SvxAutoCorrect::PutText( const css::uno::Reference < css::embed::XStorage >&, 1729 const OUString&, const OUString&, SfxObjectShell&, OUString& ) 1730 { 1731 return false; 1732 } 1733 1734 OUString EncryptBlockName_Imp(const OUString& rName) 1735 { 1736 OUStringBuffer aName; 1737 aName.append('#').append(rName); 1738 for (sal_Int32 nLen = rName.getLength(), nPos = 1; nPos < nLen; ++nPos) 1739 { 1740 if (lcl_IsInAsciiArr( "!/:.\\", aName[nPos])) 1741 aName[nPos] &= 0x0f; 1742 } 1743 return aName.makeStringAndClear(); 1744 } 1745 1746 /* This code is copied from SwXMLTextBlocks::GeneratePackageName */ 1747 static void GeneratePackageName ( const OUString& rShort, OUString& rPackageName ) 1748 { 1749 OString sByte(OUStringToOString(rShort, RTL_TEXTENCODING_UTF7)); 1750 OUStringBuffer aBuf(OStringToOUString(sByte, RTL_TEXTENCODING_ASCII_US)); 1751 1752 for (sal_Int32 nPos = 0; nPos < aBuf.getLength(); ++nPos) 1753 { 1754 switch (aBuf[nPos]) 1755 { 1756 case '!': 1757 case '/': 1758 case ':': 1759 case '.': 1760 case '\\': 1761 aBuf[nPos] = '_'; 1762 break; 1763 default: 1764 break; 1765 } 1766 } 1767 1768 rPackageName = aBuf.makeStringAndClear(); 1769 } 1770 1771 static const SvxAutocorrWord* lcl_SearchWordsInList( 1772 SvxAutoCorrectLanguageLists* pList, const OUString& rTxt, 1773 sal_Int32& rStt, sal_Int32 nEndPos) 1774 { 1775 const SvxAutocorrWordList* pAutoCorrWordList = pList->GetAutocorrWordList(); 1776 return pAutoCorrWordList->SearchWordsInList( rTxt, rStt, nEndPos ); 1777 } 1778 1779 // the search for the words in the substitution table 1780 const SvxAutocorrWord* SvxAutoCorrect::SearchWordsInList( 1781 const OUString& rTxt, sal_Int32& rStt, sal_Int32 nEndPos, 1782 SvxAutoCorrDoc&, LanguageTag& rLang ) 1783 { 1784 const SvxAutocorrWord* pRet = nullptr; 1785 LanguageTag aLanguageTag( rLang); 1786 if( aLanguageTag.isSystemLocale() ) 1787 aLanguageTag.reset( MsLangId::getSystemLanguage()); 1788 1789 /* TODO-BCP47: this is so ugly, should all maybe be a proper fallback 1790 * list instead? */ 1791 1792 // First search for eLang, then US-English -> English 1793 // and last in LANGUAGE_UNDETERMINED 1794 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) 1795 { 1796 //the language is available - so bring it on 1797 std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second; 1798 pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos ); 1799 if( pRet ) 1800 { 1801 rLang = aLanguageTag; 1802 return pRet; 1803 } 1804 } 1805 1806 // If it still could not be found here, then keep on searching 1807 LanguageType eLang = aLanguageTag.getLanguageType(); 1808 // the primary language for example EN 1809 aLanguageTag.reset(aLanguageTag.getLanguage()); 1810 LanguageType nTmpKey = aLanguageTag.getLanguageType(false); 1811 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED && 1812 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || 1813 CreateLanguageFile(aLanguageTag, false))) 1814 { 1815 //the language is available - so bring it on 1816 std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second; 1817 pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos ); 1818 if( pRet ) 1819 { 1820 rLang = aLanguageTag; 1821 return pRet; 1822 } 1823 } 1824 1825 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() || 1826 CreateLanguageFile(aLanguageTag, false)) 1827 { 1828 //the language is available - so bring it on 1829 std::unique_ptr<SvxAutoCorrectLanguageLists> const& pList = m_aLangTable.find(aLanguageTag)->second; 1830 pRet = lcl_SearchWordsInList( pList.get(), rTxt, rStt, nEndPos ); 1831 if( pRet ) 1832 { 1833 rLang = aLanguageTag; 1834 return pRet; 1835 } 1836 } 1837 return nullptr; 1838 } 1839 1840 bool SvxAutoCorrect::FindInWrdSttExceptList( LanguageType eLang, 1841 const OUString& sWord ) 1842 { 1843 LanguageTag aLanguageTag( eLang); 1844 1845 /* TODO-BCP47: again horrible ugliness */ 1846 1847 // First search for eLang, then primary language of eLang 1848 // and last in LANGUAGE_UNDETERMINED 1849 1850 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) 1851 { 1852 //the language is available - so bring it on 1853 auto const& pList = m_aLangTable.find(aLanguageTag)->second; 1854 if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() ) 1855 return true; 1856 } 1857 1858 // If it still could not be found here, then keep on searching 1859 // the primary language for example EN 1860 aLanguageTag.reset(aLanguageTag.getLanguage()); 1861 LanguageType nTmpKey = aLanguageTag.getLanguageType(false); 1862 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED && 1863 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || 1864 CreateLanguageFile(aLanguageTag, false))) 1865 { 1866 //the language is available - so bring it on 1867 auto const& pList = m_aLangTable.find(aLanguageTag)->second; 1868 if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() ) 1869 return true; 1870 } 1871 1872 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() || 1873 CreateLanguageFile(aLanguageTag, false)) 1874 { 1875 //the language is available - so bring it on 1876 auto const& pList = m_aLangTable.find(aLanguageTag)->second; 1877 if(pList->GetWrdSttExceptList()->find(sWord) != pList->GetWrdSttExceptList()->end() ) 1878 return true; 1879 } 1880 return false; 1881 } 1882 1883 static bool lcl_FindAbbreviation(const SvStringsISortDtor* pList, const OUString& sWord) 1884 { 1885 OUString sAbk('~'); 1886 SvStringsISortDtor::const_iterator it = pList->find( sAbk ); 1887 SvStringsISortDtor::size_type nPos = it - pList->begin(); 1888 if( nPos < pList->size() ) 1889 { 1890 OUString sLowerWord(sWord.toAsciiLowerCase()); 1891 OUString sAbr; 1892 for( SvStringsISortDtor::size_type n = nPos; n < pList->size(); ++n ) 1893 { 1894 sAbr = (*pList)[ n ]; 1895 if (sAbr[0] != '~') 1896 break; 1897 // ~ and ~. are not allowed! 1898 if( 2 < sAbr.getLength() && sAbr.getLength() - 1 <= sWord.getLength() ) 1899 { 1900 OUString sLowerAbk(sAbr.toAsciiLowerCase()); 1901 for (sal_Int32 i = sLowerAbk.getLength(), ii = sLowerWord.getLength(); i;) 1902 { 1903 if( !--i ) // agrees 1904 return true; 1905 1906 if( sLowerAbk[i] != sLowerWord[--ii]) 1907 break; 1908 } 1909 } 1910 } 1911 } 1912 OSL_ENSURE( !(nPos && '~' == (*pList)[ --nPos ][ 0 ] ), 1913 "Wrongly sorted exception list?" ); 1914 return false; 1915 } 1916 1917 bool SvxAutoCorrect::FindInCplSttExceptList(LanguageType eLang, 1918 const OUString& sWord, bool bAbbreviation) 1919 { 1920 LanguageTag aLanguageTag( eLang); 1921 1922 /* TODO-BCP47: did I mention terrible horrible ugliness? */ 1923 1924 // First search for eLang, then primary language of eLang 1925 // and last in LANGUAGE_UNDETERMINED 1926 1927 if (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || CreateLanguageFile(aLanguageTag, false)) 1928 { 1929 //the language is available - so bring it on 1930 const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList(); 1931 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() ) 1932 return true; 1933 } 1934 1935 // If it still could not be found here, then keep on searching 1936 // the primary language for example EN 1937 aLanguageTag.reset(aLanguageTag.getLanguage()); 1938 LanguageType nTmpKey = aLanguageTag.getLanguageType(false); 1939 if (nTmpKey != eLang && nTmpKey != LANGUAGE_UNDETERMINED && 1940 (m_aLangTable.find(aLanguageTag) != m_aLangTable.end() || 1941 CreateLanguageFile(aLanguageTag, false))) 1942 { 1943 //the language is available - so bring it on 1944 const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList(); 1945 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() ) 1946 return true; 1947 } 1948 1949 if (m_aLangTable.find(aLanguageTag.reset(LANGUAGE_UNDETERMINED)) != m_aLangTable.end() || 1950 CreateLanguageFile(aLanguageTag, false)) 1951 { 1952 //the language is available - so bring it on 1953 const SvStringsISortDtor* pList = m_aLangTable.find(aLanguageTag)->second->GetCplSttExceptList(); 1954 if(bAbbreviation ? lcl_FindAbbreviation(pList, sWord) : pList->find(sWord) != pList->end() ) 1955 return true; 1956 } 1957 return false; 1958 } 1959 1960 OUString SvxAutoCorrect::GetAutoCorrFileName( const LanguageTag& rLanguageTag, 1961 bool bNewFile, bool bTst, bool bUnlocalized ) const 1962 { 1963 OUString sRet, sExt( rLanguageTag.getBcp47() ); 1964 if (bUnlocalized) 1965 { 1966 // we don't want variant, so we'll take "fr" instead of "fr-CA" for example 1967 std::vector< OUString > vecFallBackStrings = rLanguageTag.getFallbackStrings(false); 1968 if (!vecFallBackStrings.empty()) 1969 sExt = vecFallBackStrings[0]; 1970 } 1971 1972 sExt = "_" + sExt + ".dat"; 1973 if( bNewFile ) 1974 sRet = sUserAutoCorrFile + sExt; 1975 else if( !bTst ) 1976 sRet = sShareAutoCorrFile + sExt; 1977 else 1978 { 1979 // test first in the user directory - if not exist, then 1980 sRet = sUserAutoCorrFile + sExt; 1981 if( !FStatHelper::IsDocument( sRet )) 1982 sRet = sShareAutoCorrFile + sExt; 1983 } 1984 return sRet; 1985 } 1986 1987 SvxAutoCorrectLanguageLists::SvxAutoCorrectLanguageLists( 1988 SvxAutoCorrect& rParent, 1989 const OUString& rShareAutoCorrectFile, 1990 const OUString& rUserAutoCorrectFile) 1991 : sShareAutoCorrFile( rShareAutoCorrectFile ), 1992 sUserAutoCorrFile( rUserAutoCorrectFile ), 1993 aModifiedDate( Date::EMPTY ), 1994 aModifiedTime( tools::Time::EMPTY ), 1995 aLastCheckTime( tools::Time::EMPTY ), 1996 rAutoCorrect(rParent), 1997 nFlags(ACFlags::NONE) 1998 { 1999 } 2000 2001 SvxAutoCorrectLanguageLists::~SvxAutoCorrectLanguageLists() 2002 { 2003 } 2004 2005 bool SvxAutoCorrectLanguageLists::IsFileChanged_Imp() 2006 { 2007 // Access the file system only every 2 minutes to check the date stamp 2008 bool bRet = false; 2009 2010 tools::Time nMinTime( 0, 2 ); 2011 tools::Time nAktTime( tools::Time::SYSTEM ); 2012 if( aLastCheckTime <= nAktTime) // overflow? 2013 return false; 2014 nAktTime -= aLastCheckTime; 2015 if( nAktTime > nMinTime ) // min time past 2016 { 2017 Date aTstDate( Date::EMPTY ); tools::Time aTstTime( tools::Time::EMPTY ); 2018 if( FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile, 2019 &aTstDate, &aTstTime ) && 2020 ( aModifiedDate != aTstDate || aModifiedTime != aTstTime )) 2021 { 2022 bRet = true; 2023 // then remove all the lists fast! 2024 if( (ACFlags::CplSttLstLoad & nFlags) && pCplStt_ExcptLst ) 2025 { 2026 pCplStt_ExcptLst.reset(); 2027 } 2028 if( (ACFlags::WrdSttLstLoad & nFlags) && pWrdStt_ExcptLst ) 2029 { 2030 pWrdStt_ExcptLst.reset(); 2031 } 2032 if( (ACFlags::ChgWordLstLoad & nFlags) && pAutocorr_List ) 2033 { 2034 pAutocorr_List.reset(); 2035 } 2036 nFlags &= ~ACFlags(ACFlags::CplSttLstLoad | ACFlags::WrdSttLstLoad | ACFlags::ChgWordLstLoad ); 2037 } 2038 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2039 } 2040 return bRet; 2041 } 2042 2043 void SvxAutoCorrectLanguageLists::LoadXMLExceptList_Imp( 2044 std::unique_ptr<SvStringsISortDtor>& rpLst, 2045 const char* pStrmName, 2046 tools::SvRef<SotStorage>& rStg) 2047 { 2048 if( rpLst ) 2049 rpLst->clear(); 2050 else 2051 rpLst.reset( new SvStringsISortDtor ); 2052 2053 { 2054 const OUString sStrmName( pStrmName, strlen(pStrmName), RTL_TEXTENCODING_MS_1252 ); 2055 2056 if( rStg.is() && rStg->IsStream( sStrmName ) ) 2057 { 2058 tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName, 2059 ( StreamMode::READ | StreamMode::SHARE_DENYWRITE | StreamMode::NOCREATE ) ); 2060 if( ERRCODE_NONE != xStrm->GetError()) 2061 { 2062 xStrm.clear(); 2063 rStg.clear(); 2064 RemoveStream_Imp( sStrmName ); 2065 } 2066 else 2067 { 2068 uno::Reference< uno::XComponentContext > xContext = 2069 comphelper::getProcessComponentContext(); 2070 2071 xml::sax::InputSource aParserInput; 2072 aParserInput.sSystemId = sStrmName; 2073 2074 xStrm->Seek( 0 ); 2075 xStrm->SetBufferSize( 8 * 1024 ); 2076 aParserInput.aInputStream = new utl::OInputStreamWrapper( *xStrm ); 2077 2078 // get filter 2079 rtl::Reference< SvXMLExceptionListImport > xImport = new SvXMLExceptionListImport ( xContext, *rpLst ); 2080 2081 // connect parser and filter 2082 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler; 2083 xImport->setTokenHandler( xTokenHandler ); 2084 2085 // parse 2086 try 2087 { 2088 xImport->parseStream( aParserInput ); 2089 } 2090 catch( const xml::sax::SAXParseException& ) 2091 { 2092 // re throw ? 2093 } 2094 catch( const xml::sax::SAXException& ) 2095 { 2096 // re throw ? 2097 } 2098 catch( const io::IOException& ) 2099 { 2100 // re throw ? 2101 } 2102 } 2103 } 2104 2105 // Set time stamp 2106 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile, 2107 &aModifiedDate, &aModifiedTime ); 2108 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2109 } 2110 2111 } 2112 2113 void SvxAutoCorrectLanguageLists::SaveExceptList_Imp( 2114 const SvStringsISortDtor& rLst, 2115 const char* pStrmName, 2116 tools::SvRef<SotStorage> const &rStg, 2117 bool bConvert ) 2118 { 2119 if( !rStg.is() ) 2120 return; 2121 2122 OUString sStrmName( pStrmName, strlen(pStrmName), RTL_TEXTENCODING_MS_1252 ); 2123 if( rLst.empty() ) 2124 { 2125 rStg->Remove( sStrmName ); 2126 rStg->Commit(); 2127 } 2128 else 2129 { 2130 tools::SvRef<SotStorageStream> xStrm = rStg->OpenSotStream( sStrmName, 2131 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) ); 2132 if( xStrm.is() ) 2133 { 2134 xStrm->SetSize( 0 ); 2135 xStrm->SetBufferSize( 8192 ); 2136 xStrm->SetProperty( "MediaType", Any(OUString( "text/xml" )) ); 2137 2138 2139 uno::Reference< uno::XComponentContext > xContext = 2140 comphelper::getProcessComponentContext(); 2141 2142 uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext); 2143 uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *xStrm ); 2144 xWriter->setOutputStream(xOut); 2145 2146 uno::Reference < xml::sax::XDocumentHandler > xHandler(xWriter, UNO_QUERY_THROW); 2147 rtl::Reference< SvXMLExceptionListExport > xExp( new SvXMLExceptionListExport( xContext, rLst, sStrmName, xHandler ) ); 2148 2149 xExp->exportDoc( XML_BLOCK_LIST ); 2150 2151 xStrm->Commit(); 2152 if( xStrm->GetError() == ERRCODE_NONE ) 2153 { 2154 xStrm.clear(); 2155 if (!bConvert) 2156 { 2157 rStg->Commit(); 2158 if( ERRCODE_NONE != rStg->GetError() ) 2159 { 2160 rStg->Remove( sStrmName ); 2161 rStg->Commit(); 2162 } 2163 } 2164 } 2165 } 2166 } 2167 } 2168 2169 SvxAutocorrWordList* SvxAutoCorrectLanguageLists::LoadAutocorrWordList() 2170 { 2171 if( pAutocorr_List ) 2172 pAutocorr_List->DeleteAndDestroyAll(); 2173 else 2174 pAutocorr_List.reset( new SvxAutocorrWordList() ); 2175 2176 try 2177 { 2178 uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sShareAutoCorrFile, embed::ElementModes::READ ); 2179 uno::Reference < io::XStream > xStrm = xStg->openStreamElement( pXMLImplAutocorr_ListStr, embed::ElementModes::READ ); 2180 uno::Reference< uno::XComponentContext > xContext = comphelper::getProcessComponentContext(); 2181 2182 xml::sax::InputSource aParserInput; 2183 aParserInput.sSystemId = pXMLImplAutocorr_ListStr; 2184 aParserInput.aInputStream = xStrm->getInputStream(); 2185 2186 // get parser 2187 uno::Reference< xml::sax::XFastParser > xParser = xml::sax::FastParser::create(xContext); 2188 SAL_INFO("editeng", "AutoCorrect Import" ); 2189 uno::Reference< xml::sax::XFastDocumentHandler > xFilter = new SvXMLAutoCorrectImport( xContext, pAutocorr_List.get(), rAutoCorrect, xStg ); 2190 uno::Reference<xml::sax::XFastTokenHandler> xTokenHandler = new SvXMLAutoCorrectTokenHandler; 2191 2192 // connect parser and filter 2193 xParser->setFastDocumentHandler( xFilter ); 2194 xParser->registerNamespace( "http://openoffice.org/2001/block-list", SvXMLAutoCorrectToken::NAMESPACE ); 2195 xParser->setTokenHandler(xTokenHandler); 2196 2197 // parse 2198 xParser->parseStream( aParserInput ); 2199 } 2200 catch ( const uno::Exception& ) 2201 { 2202 TOOLS_WARN_EXCEPTION("editeng", "when loading " << sShareAutoCorrFile); 2203 } 2204 2205 // Set time stamp 2206 FStatHelper::GetModifiedDateTimeOfFile( sShareAutoCorrFile, 2207 &aModifiedDate, &aModifiedTime ); 2208 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2209 2210 return pAutocorr_List.get(); 2211 } 2212 2213 const SvxAutocorrWordList* SvxAutoCorrectLanguageLists::GetAutocorrWordList() 2214 { 2215 if( !( ACFlags::ChgWordLstLoad & nFlags ) || IsFileChanged_Imp() ) 2216 { 2217 LoadAutocorrWordList(); 2218 if( !pAutocorr_List ) 2219 { 2220 OSL_ENSURE( false, "No valid list" ); 2221 pAutocorr_List.reset( new SvxAutocorrWordList() ); 2222 } 2223 nFlags |= ACFlags::ChgWordLstLoad; 2224 } 2225 return pAutocorr_List.get(); 2226 } 2227 2228 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetCplSttExceptList() 2229 { 2230 if( !( ACFlags::CplSttLstLoad & nFlags ) || IsFileChanged_Imp() ) 2231 { 2232 LoadCplSttExceptList(); 2233 if( !pCplStt_ExcptLst ) 2234 { 2235 OSL_ENSURE( false, "No valid list" ); 2236 pCplStt_ExcptLst.reset( new SvStringsISortDtor ); 2237 } 2238 nFlags |= ACFlags::CplSttLstLoad; 2239 } 2240 return pCplStt_ExcptLst.get(); 2241 } 2242 2243 bool SvxAutoCorrectLanguageLists::AddToCplSttExceptList(const OUString& rNew) 2244 { 2245 bool bRet = false; 2246 if( !rNew.isEmpty() && GetCplSttExceptList()->insert( rNew ).second ) 2247 { 2248 MakeUserStorage_Impl(); 2249 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2250 2251 SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg ); 2252 2253 xStg = nullptr; 2254 // Set time stamp 2255 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, 2256 &aModifiedDate, &aModifiedTime ); 2257 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2258 bRet = true; 2259 } 2260 return bRet; 2261 } 2262 2263 bool SvxAutoCorrectLanguageLists::AddToWrdSttExceptList(const OUString& rNew) 2264 { 2265 bool bRet = false; 2266 SvStringsISortDtor* pExceptList = LoadWrdSttExceptList(); 2267 if( !rNew.isEmpty() && pExceptList && pExceptList->insert( rNew ).second ) 2268 { 2269 MakeUserStorage_Impl(); 2270 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2271 2272 SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg ); 2273 2274 xStg = nullptr; 2275 // Set time stamp 2276 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, 2277 &aModifiedDate, &aModifiedTime ); 2278 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2279 bRet = true; 2280 } 2281 return bRet; 2282 } 2283 2284 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadCplSttExceptList() 2285 { 2286 try 2287 { 2288 tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE ); 2289 OUString sTemp ( pXMLImplCplStt_ExcptLstStr ); 2290 if( xStg.is() && xStg->IsContained( sTemp ) ) 2291 LoadXMLExceptList_Imp( pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg ); 2292 } 2293 catch (const css::ucb::ContentCreationException&) 2294 { 2295 } 2296 return pCplStt_ExcptLst.get(); 2297 } 2298 2299 void SvxAutoCorrectLanguageLists::SaveCplSttExceptList() 2300 { 2301 MakeUserStorage_Impl(); 2302 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2303 2304 SaveExceptList_Imp( *pCplStt_ExcptLst, pXMLImplCplStt_ExcptLstStr, xStg ); 2305 2306 xStg = nullptr; 2307 2308 // Set time stamp 2309 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, 2310 &aModifiedDate, &aModifiedTime ); 2311 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2312 } 2313 2314 SvStringsISortDtor* SvxAutoCorrectLanguageLists::LoadWrdSttExceptList() 2315 { 2316 try 2317 { 2318 tools::SvRef<SotStorage> xStg = new SotStorage( sShareAutoCorrFile, StreamMode::READ | StreamMode::SHARE_DENYNONE ); 2319 OUString sTemp ( pXMLImplWrdStt_ExcptLstStr ); 2320 if( xStg.is() && xStg->IsContained( sTemp ) ) 2321 LoadXMLExceptList_Imp( pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg ); 2322 } 2323 catch (const css::ucb::ContentCreationException &) 2324 { 2325 TOOLS_WARN_EXCEPTION("editeng", "SvxAutoCorrectLanguageLists::LoadWrdSttExceptList"); 2326 } 2327 return pWrdStt_ExcptLst.get(); 2328 } 2329 2330 void SvxAutoCorrectLanguageLists::SaveWrdSttExceptList() 2331 { 2332 MakeUserStorage_Impl(); 2333 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2334 2335 SaveExceptList_Imp( *pWrdStt_ExcptLst, pXMLImplWrdStt_ExcptLstStr, xStg ); 2336 2337 xStg = nullptr; 2338 // Set time stamp 2339 FStatHelper::GetModifiedDateTimeOfFile( sUserAutoCorrFile, 2340 &aModifiedDate, &aModifiedTime ); 2341 aLastCheckTime = tools::Time( tools::Time::SYSTEM ); 2342 } 2343 2344 SvStringsISortDtor* SvxAutoCorrectLanguageLists::GetWrdSttExceptList() 2345 { 2346 if( !( ACFlags::WrdSttLstLoad & nFlags ) || IsFileChanged_Imp() ) 2347 { 2348 LoadWrdSttExceptList(); 2349 if( !pWrdStt_ExcptLst ) 2350 { 2351 OSL_ENSURE( false, "No valid list" ); 2352 pWrdStt_ExcptLst.reset( new SvStringsISortDtor ); 2353 } 2354 nFlags |= ACFlags::WrdSttLstLoad; 2355 } 2356 return pWrdStt_ExcptLst.get(); 2357 } 2358 2359 void SvxAutoCorrectLanguageLists::RemoveStream_Imp( const OUString& rName ) 2360 { 2361 if( sShareAutoCorrFile != sUserAutoCorrFile ) 2362 { 2363 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2364 if( xStg.is() && ERRCODE_NONE == xStg->GetError() && 2365 xStg->IsStream( rName ) ) 2366 { 2367 xStg->Remove( rName ); 2368 xStg->Commit(); 2369 2370 xStg = nullptr; 2371 } 2372 } 2373 } 2374 2375 void SvxAutoCorrectLanguageLists::MakeUserStorage_Impl() 2376 { 2377 // The conversion needs to happen if the file is already in the user 2378 // directory and is in the old format. Additionally it needs to 2379 // happen when the file is being copied from share to user. 2380 2381 bool bError = false, bConvert = false, bCopy = false; 2382 INetURLObject aDest; 2383 INetURLObject aSource; 2384 2385 if (sUserAutoCorrFile != sShareAutoCorrFile ) 2386 { 2387 aSource = INetURLObject ( sShareAutoCorrFile ); 2388 aDest = INetURLObject ( sUserAutoCorrFile ); 2389 if ( SotStorage::IsOLEStorage ( sShareAutoCorrFile ) ) 2390 { 2391 aDest.SetExtension ( "bak" ); 2392 bConvert = true; 2393 } 2394 bCopy = true; 2395 } 2396 else if ( SotStorage::IsOLEStorage ( sUserAutoCorrFile ) ) 2397 { 2398 aSource = INetURLObject ( sUserAutoCorrFile ); 2399 aDest = INetURLObject ( sUserAutoCorrFile ); 2400 aDest.SetExtension ( "bak" ); 2401 bCopy = bConvert = true; 2402 } 2403 if (bCopy) 2404 { 2405 try 2406 { 2407 OUString sMain(aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri )); 2408 sal_Int32 nSlashPos = sMain.lastIndexOf('/'); 2409 sMain = sMain.copy(0, nSlashPos); 2410 ::ucbhelper::Content aNewContent( sMain, uno::Reference< XCommandEnvironment >(), comphelper::getProcessComponentContext() ); 2411 TransferInfo aInfo; 2412 aInfo.NameClash = NameClash::OVERWRITE; 2413 aInfo.NewTitle = aDest.GetLastName(); 2414 aInfo.SourceURL = aSource.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ); 2415 aInfo.MoveData = false; 2416 aNewContent.executeCommand( "transfer", Any(aInfo)); 2417 } 2418 catch (...) 2419 { 2420 bError = true; 2421 } 2422 } 2423 if (bConvert && !bError) 2424 { 2425 tools::SvRef<SotStorage> xSrcStg = new SotStorage( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), StreamMode::READ ); 2426 tools::SvRef<SotStorage> xDstStg = new SotStorage( sUserAutoCorrFile, StreamMode::WRITE ); 2427 2428 if( xSrcStg.is() && xDstStg.is() ) 2429 { 2430 OUString sXMLWord ( pXMLImplWrdStt_ExcptLstStr ); 2431 OUString sXMLSentence ( pXMLImplCplStt_ExcptLstStr ); 2432 std::unique_ptr<SvStringsISortDtor> pTmpWordList; 2433 2434 if (xSrcStg->IsContained( sXMLWord ) ) 2435 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xSrcStg ); 2436 2437 if (pTmpWordList) 2438 { 2439 SaveExceptList_Imp( *pTmpWordList, pXMLImplWrdStt_ExcptLstStr, xDstStg, true ); 2440 pTmpWordList.reset(); 2441 } 2442 2443 2444 if (xSrcStg->IsContained( sXMLSentence ) ) 2445 LoadXMLExceptList_Imp( pTmpWordList, pXMLImplCplStt_ExcptLstStr, xSrcStg ); 2446 2447 if (pTmpWordList) 2448 { 2449 SaveExceptList_Imp( *pTmpWordList, pXMLImplCplStt_ExcptLstStr, xDstStg, true ); 2450 pTmpWordList->clear(); 2451 } 2452 2453 GetAutocorrWordList(); 2454 MakeBlocklist_Imp( *xDstStg ); 2455 sShareAutoCorrFile = sUserAutoCorrFile; 2456 xDstStg = nullptr; 2457 try 2458 { 2459 ::ucbhelper::Content aContent ( aDest.GetMainURL( INetURLObject::DecodeMechanism::ToIUri ), uno::Reference < XCommandEnvironment >(), comphelper::getProcessComponentContext() ); 2460 aContent.executeCommand ( "delete", makeAny ( true ) ); 2461 } 2462 catch (...) 2463 { 2464 } 2465 } 2466 } 2467 else if( bCopy && !bError ) 2468 sShareAutoCorrFile = sUserAutoCorrFile; 2469 } 2470 2471 bool SvxAutoCorrectLanguageLists::MakeBlocklist_Imp( SotStorage& rStg ) 2472 { 2473 bool bRet = true, bRemove = !pAutocorr_List || pAutocorr_List->empty(); 2474 if( !bRemove ) 2475 { 2476 tools::SvRef<SotStorageStream> refList = rStg.OpenSotStream( pXMLImplAutocorr_ListStr, 2477 ( StreamMode::READ | StreamMode::WRITE | StreamMode::SHARE_DENYWRITE ) ); 2478 if( refList.is() ) 2479 { 2480 refList->SetSize( 0 ); 2481 refList->SetBufferSize( 8192 ); 2482 refList->SetProperty( "MediaType", Any(OUString( "text/xml" )) ); 2483 2484 uno::Reference< uno::XComponentContext > xContext = 2485 comphelper::getProcessComponentContext(); 2486 2487 uno::Reference < xml::sax::XWriter > xWriter = xml::sax::Writer::create(xContext); 2488 uno::Reference < io::XOutputStream> xOut = new utl::OOutputStreamWrapper( *refList ); 2489 xWriter->setOutputStream(xOut); 2490 2491 rtl::Reference< SvXMLAutoCorrectExport > xExp( new SvXMLAutoCorrectExport( xContext, pAutocorr_List.get(), pXMLImplAutocorr_ListStr, xWriter ) ); 2492 2493 xExp->exportDoc( XML_BLOCK_LIST ); 2494 2495 refList->Commit(); 2496 bRet = ERRCODE_NONE == refList->GetError(); 2497 if( bRet ) 2498 { 2499 refList.clear(); 2500 rStg.Commit(); 2501 if( ERRCODE_NONE != rStg.GetError() ) 2502 { 2503 bRemove = true; 2504 bRet = false; 2505 } 2506 } 2507 } 2508 else 2509 bRet = false; 2510 } 2511 2512 if( bRemove ) 2513 { 2514 rStg.Remove( pXMLImplAutocorr_ListStr ); 2515 rStg.Commit(); 2516 } 2517 2518 return bRet; 2519 } 2520 2521 bool SvxAutoCorrectLanguageLists::MakeCombinedChanges( std::vector<SvxAutocorrWord>& aNewEntries, std::vector<SvxAutocorrWord>& aDeleteEntries ) 2522 { 2523 // First get the current list! 2524 GetAutocorrWordList(); 2525 2526 MakeUserStorage_Impl(); 2527 tools::SvRef<SotStorage> xStorage = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2528 2529 bool bRet = xStorage.is() && ERRCODE_NONE == xStorage->GetError(); 2530 2531 if( bRet ) 2532 { 2533 for (SvxAutocorrWord & aWordToDelete : aDeleteEntries) 2534 { 2535 std::optional<SvxAutocorrWord> xFoundEntry = pAutocorr_List->FindAndRemove( &aWordToDelete ); 2536 if( xFoundEntry ) 2537 { 2538 if( !xFoundEntry->IsTextOnly() ) 2539 { 2540 OUString aName( aWordToDelete.GetShort() ); 2541 if (xStorage->IsOLEStorage()) 2542 aName = EncryptBlockName_Imp(aName); 2543 else 2544 GeneratePackageName ( aWordToDelete.GetShort(), aName ); 2545 2546 if( xStorage->IsContained( aName ) ) 2547 { 2548 xStorage->Remove( aName ); 2549 bRet = xStorage->Commit(); 2550 } 2551 } 2552 } 2553 } 2554 2555 for (const SvxAutocorrWord & aNewEntrie : aNewEntries) 2556 { 2557 SvxAutocorrWord aWordToAdd(aNewEntrie.GetShort(), aNewEntrie.GetLong(), true ); 2558 std::optional<SvxAutocorrWord> xRemoved = pAutocorr_List->FindAndRemove( &aWordToAdd ); 2559 if( xRemoved ) 2560 { 2561 if( !xRemoved->IsTextOnly() ) 2562 { 2563 // Still have to remove the Storage 2564 OUString sStorageName( aWordToAdd.GetShort() ); 2565 if (xStorage->IsOLEStorage()) 2566 sStorageName = EncryptBlockName_Imp(sStorageName); 2567 else 2568 GeneratePackageName ( aWordToAdd.GetShort(), sStorageName); 2569 2570 if( xStorage->IsContained( sStorageName ) ) 2571 xStorage->Remove( sStorageName ); 2572 } 2573 } 2574 bRet = pAutocorr_List->Insert( std::move(aWordToAdd) ); 2575 2576 if ( !bRet ) 2577 { 2578 break; 2579 } 2580 } 2581 2582 if ( bRet ) 2583 { 2584 bRet = MakeBlocklist_Imp( *xStorage ); 2585 } 2586 } 2587 return bRet; 2588 } 2589 2590 bool SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, const OUString& rLong ) 2591 { 2592 // First get the current list! 2593 GetAutocorrWordList(); 2594 2595 MakeUserStorage_Impl(); 2596 tools::SvRef<SotStorage> xStg = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2597 2598 bool bRet = xStg.is() && ERRCODE_NONE == xStg->GetError(); 2599 2600 // Update the word list 2601 if( bRet ) 2602 { 2603 SvxAutocorrWord aNew(rShort, rLong, true ); 2604 std::optional<SvxAutocorrWord> xRemove = pAutocorr_List->FindAndRemove( &aNew ); 2605 if( xRemove ) 2606 { 2607 if( !xRemove->IsTextOnly() ) 2608 { 2609 // Still have to remove the Storage 2610 OUString sStgNm( rShort ); 2611 if (xStg->IsOLEStorage()) 2612 sStgNm = EncryptBlockName_Imp(sStgNm); 2613 else 2614 GeneratePackageName ( rShort, sStgNm); 2615 2616 if( xStg->IsContained( sStgNm ) ) 2617 xStg->Remove( sStgNm ); 2618 } 2619 } 2620 2621 if( pAutocorr_List->Insert( std::move(aNew) ) ) 2622 { 2623 bRet = MakeBlocklist_Imp( *xStg ); 2624 xStg = nullptr; 2625 } 2626 else 2627 { 2628 bRet = false; 2629 } 2630 } 2631 return bRet; 2632 } 2633 2634 void SvxAutoCorrectLanguageLists::PutText( const OUString& rShort, 2635 SfxObjectShell& rShell ) 2636 { 2637 // First get the current list! 2638 GetAutocorrWordList(); 2639 2640 MakeUserStorage_Impl(); 2641 2642 OUString sLong; 2643 try 2644 { 2645 uno::Reference < embed::XStorage > xStg = comphelper::OStorageHelper::GetStorageFromURL( sUserAutoCorrFile, embed::ElementModes::READWRITE ); 2646 bool bRet = rAutoCorrect.PutText( xStg, sUserAutoCorrFile, rShort, rShell, sLong ); 2647 xStg = nullptr; 2648 2649 // Update the word list 2650 if( bRet ) 2651 { 2652 if( pAutocorr_List->Insert( SvxAutocorrWord(rShort, sLong, false) ) ) 2653 { 2654 tools::SvRef<SotStorage> xStor = new SotStorage( sUserAutoCorrFile, StreamMode::READWRITE ); 2655 MakeBlocklist_Imp( *xStor ); 2656 } 2657 } 2658 } 2659 catch ( const uno::Exception& ) 2660 { 2661 } 2662 } 2663 2664 // Keep the list sorted ... 2665 struct SvxAutocorrWordList::CompareSvxAutocorrWordList 2666 { 2667 bool operator()( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) const 2668 { 2669 CollatorWrapper& rCmp = ::GetCollatorWrapper(); 2670 return rCmp.compareString( lhs.GetShort(), rhs.GetShort() ) < 0; 2671 } 2672 }; 2673 2674 namespace { 2675 2676 typedef std::unordered_map<OUString, SvxAutocorrWord> AutocorrWordHashType; 2677 2678 } 2679 2680 struct SvxAutocorrWordList::Impl 2681 { 2682 2683 // only one of these contains the data 2684 // maSortedVector is manually sorted so we can optimise data movement 2685 mutable AutocorrWordSetType maSortedVector; 2686 mutable AutocorrWordHashType maHash; // key is 'Short' 2687 2688 void DeleteAndDestroyAll() 2689 { 2690 maHash.clear(); 2691 maSortedVector.clear(); 2692 } 2693 }; 2694 2695 SvxAutocorrWordList::SvxAutocorrWordList() : mpImpl(new Impl) {} 2696 2697 SvxAutocorrWordList::~SvxAutocorrWordList() 2698 { 2699 } 2700 2701 void SvxAutocorrWordList::DeleteAndDestroyAll() 2702 { 2703 mpImpl->DeleteAndDestroyAll(); 2704 } 2705 2706 // returns true if inserted 2707 const SvxAutocorrWord* SvxAutocorrWordList::Insert(SvxAutocorrWord aWord) const 2708 { 2709 if ( mpImpl->maSortedVector.empty() ) // use the hash 2710 { 2711 OUString aShort = aWord.GetShort(); 2712 auto [it,inserted] = mpImpl->maHash.emplace( std::move(aShort), std::move(aWord) ); 2713 if (inserted) 2714 return &(it->second); 2715 return nullptr; 2716 } 2717 else 2718 { 2719 auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), aWord, CompareSvxAutocorrWordList()); 2720 CollatorWrapper& rCmp = ::GetCollatorWrapper(); 2721 if (it == mpImpl->maSortedVector.end() || rCmp.compareString( aWord.GetShort(), it->GetShort() ) != 0) 2722 { 2723 it = mpImpl->maSortedVector.insert(it, std::move(aWord)); 2724 return &*it; 2725 } 2726 return nullptr; 2727 } 2728 } 2729 2730 void SvxAutocorrWordList::LoadEntry(const OUString& sWrong, const OUString& sRight, bool bOnlyTxt) 2731 { 2732 (void)Insert(SvxAutocorrWord( sWrong, sRight, bOnlyTxt )); 2733 } 2734 2735 bool SvxAutocorrWordList::empty() const 2736 { 2737 return mpImpl->maHash.empty() && mpImpl->maSortedVector.empty(); 2738 } 2739 2740 std::optional<SvxAutocorrWord> SvxAutocorrWordList::FindAndRemove(const SvxAutocorrWord *pWord) 2741 { 2742 2743 if ( mpImpl->maSortedVector.empty() ) // use the hash 2744 { 2745 AutocorrWordHashType::iterator it = mpImpl->maHash.find( pWord->GetShort() ); 2746 if( it != mpImpl->maHash.end() ) 2747 { 2748 SvxAutocorrWord pMatch = std::move(it->second); 2749 mpImpl->maHash.erase (it); 2750 return pMatch; 2751 } 2752 } 2753 else 2754 { 2755 auto it = std::lower_bound(mpImpl->maSortedVector.begin(), mpImpl->maSortedVector.end(), *pWord, CompareSvxAutocorrWordList()); 2756 if (it != mpImpl->maSortedVector.end() && !CompareSvxAutocorrWordList()(*pWord, *it)) 2757 { 2758 SvxAutocorrWord pMatch = std::move(*it); 2759 mpImpl->maSortedVector.erase (it); 2760 return pMatch; 2761 } 2762 } 2763 return std::optional<SvxAutocorrWord>(); 2764 } 2765 2766 // return the sorted contents - defer sorting until we have to. 2767 const SvxAutocorrWordList::AutocorrWordSetType& SvxAutocorrWordList::getSortedContent() const 2768 { 2769 // convert from hash to set permanently 2770 if ( mpImpl->maSortedVector.empty() ) 2771 { 2772 std::vector<SvxAutocorrWord> tmp; 2773 tmp.reserve(mpImpl->maHash.size()); 2774 for (auto & rPair : mpImpl->maHash) 2775 tmp.emplace_back(std::move(rPair.second)); 2776 mpImpl->maHash.clear(); 2777 // sort twice - this gets the list into mostly-sorted order, which 2778 // reduces the number of times we need to invoke the expensive ICU collate fn. 2779 std::sort(tmp.begin(), tmp.end(), 2780 [] ( SvxAutocorrWord const & lhs, SvxAutocorrWord const & rhs ) 2781 { 2782 return lhs.GetShort() < rhs.GetShort(); 2783 }); 2784 // This beast has some O(N log(N)) in a terribly slow ICU collate fn. 2785 // stable_sort is twice as fast as sort in this situation because it does 2786 // fewer comparison operations. 2787 std::stable_sort(tmp.begin(), tmp.end(), CompareSvxAutocorrWordList()); 2788 mpImpl->maSortedVector = std::move(tmp); 2789 } 2790 return mpImpl->maSortedVector; 2791 } 2792 2793 const SvxAutocorrWord* SvxAutocorrWordList::WordMatches(const SvxAutocorrWord *pFnd, 2794 const OUString &rTxt, 2795 sal_Int32 &rStt, 2796 sal_Int32 nEndPos) const 2797 { 2798 const OUString& rChk = pFnd->GetShort(); 2799 2800 sal_Int32 left_wildcard = rChk.startsWith( ".*" ) ? 2 : 0; // ".*word" pattern? 2801 sal_Int32 right_wildcard = rChk.endsWith( ".*" ) ? 2 : 0; // "word.*" pattern? 2802 sal_Int32 nSttWdPos = nEndPos; 2803 2804 // direct replacement of keywords surrounded by colons (for example, ":name:") 2805 bool bColonNameColon = rTxt.getLength() > nEndPos && 2806 rTxt[nEndPos] == ':' && rChk[0] == ':' && rChk.endsWith(":"); 2807 if ( nEndPos + (bColonNameColon ? 1 : 0) >= rChk.getLength() - left_wildcard - right_wildcard ) 2808 { 2809 2810 bool bWasWordDelim = false; 2811 sal_Int32 nCalcStt = nEndPos - rChk.getLength() + left_wildcard; 2812 if (bColonNameColon) 2813 nCalcStt++; 2814 if( !right_wildcard && ( !nCalcStt || nCalcStt == rStt || left_wildcard || bColonNameColon || 2815 ( nCalcStt < rStt && 2816 IsWordDelim( rTxt[ nCalcStt - 1 ] ))) ) 2817 { 2818 TransliterationWrapper& rCmp = GetIgnoreTranslWrapper(); 2819 OUString sWord = rTxt.copy(nCalcStt, rChk.getLength() - left_wildcard); 2820 if( (!left_wildcard && rCmp.isEqual( rChk, sWord )) || (left_wildcard && rCmp.isEqual( rChk.copy(left_wildcard), sWord) )) 2821 { 2822 rStt = nCalcStt; 2823 if (!left_wildcard) 2824 { 2825 // fdo#33899 avoid "1/2", "1/3".. to be replaced by fractions in dates, eg. 1/2/14 2826 if (rTxt.getLength() > nEndPos && rTxt[nEndPos] == '/' && rChk.indexOf('/') != -1) 2827 return nullptr; 2828 return pFnd; 2829 } 2830 // get the first word delimiter position before the matching ".*word" pattern 2831 while( rStt && !(bWasWordDelim = IsWordDelim( rTxt[ --rStt ]))) 2832 ; 2833 if (bWasWordDelim) rStt++; 2834 OUString left_pattern = rTxt.copy(rStt, nEndPos - rStt - rChk.getLength() + left_wildcard); 2835 // avoid double spaces before simple "word" replacement 2836 left_pattern += (left_pattern.getLength() == 0 && pFnd->GetLong()[0] == 0x20) ? pFnd->GetLong().copy(1) : pFnd->GetLong(); 2837 if( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(rTxt.copy(rStt, nEndPos - rStt), left_pattern) ) ) 2838 return pNew; 2839 } 2840 } else 2841 // match "word.*" or ".*word.*" patterns, eg. "i18n.*", ".*---.*", TODO: add transliteration support 2842 if ( right_wildcard ) 2843 { 2844 2845 OUString sTmp( rChk.copy( left_wildcard, rChk.getLength() - left_wildcard - right_wildcard ) ); 2846 // Get the last word delimiter position 2847 bool not_suffix; 2848 2849 while( nSttWdPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nSttWdPos ]))) 2850 ; 2851 // search the first occurrence (with a left word delimitation, if needed) 2852 sal_Int32 nFndPos = -1; 2853 do { 2854 nFndPos = rTxt.indexOf( sTmp, nFndPos + 1); 2855 if (nFndPos == -1) 2856 break; 2857 not_suffix = bWasWordDelim && (nSttWdPos >= (nFndPos + sTmp.getLength())); 2858 } while ( (!left_wildcard && nFndPos && !IsWordDelim( rTxt[ nFndPos - 1 ])) || not_suffix ); 2859 2860 if ( nFndPos != -1 ) 2861 { 2862 sal_Int32 extra_repl = nFndPos + sTmp.getLength() > nEndPos ? 1: 0; // for patterns with terminating characters, eg. "a:" 2863 2864 if ( left_wildcard ) 2865 { 2866 // get the first word delimiter position before the matching ".*word.*" pattern 2867 while( nFndPos && !(bWasWordDelim = IsWordDelim( rTxt[ --nFndPos ]))) 2868 ; 2869 if (bWasWordDelim) nFndPos++; 2870 } 2871 if (nEndPos + extra_repl <= nFndPos) 2872 { 2873 return nullptr; 2874 } 2875 // store matching pattern and its replacement as a new list item, eg. "i18ns" -> "internationalizations" 2876 OUString aShort = rTxt.copy(nFndPos, nEndPos - nFndPos + extra_repl); 2877 2878 OUString aLong; 2879 rStt = nFndPos; 2880 if ( !left_wildcard ) 2881 { 2882 sal_Int32 siz = nEndPos - nFndPos - sTmp.getLength(); 2883 aLong = pFnd->GetLong() + (siz > 0 ? rTxt.copy(nFndPos + sTmp.getLength(), siz) : ""); 2884 } else { 2885 OUStringBuffer buf; 2886 do { 2887 nSttWdPos = rTxt.indexOf( sTmp, nFndPos); 2888 if (nSttWdPos != -1) 2889 { 2890 sal_Int32 nTmp(nFndPos); 2891 while (nTmp < nSttWdPos && !IsWordDelim(rTxt[nTmp])) 2892 nTmp++; 2893 if (nTmp < nSttWdPos) 2894 break; // word delimiter found 2895 buf.append(std::u16string_view(rTxt).substr(nFndPos, nSttWdPos - nFndPos)).append(pFnd->GetLong()); 2896 nFndPos = nSttWdPos + sTmp.getLength(); 2897 } 2898 } while (nSttWdPos != -1); 2899 if (nEndPos - nFndPos > extra_repl) 2900 buf.append(std::u16string_view(rTxt).substr(nFndPos, nEndPos - nFndPos)); 2901 aLong = buf.makeStringAndClear(); 2902 } 2903 if ( const SvxAutocorrWord* pNew = Insert( SvxAutocorrWord(aShort, aLong) ) ) 2904 { 2905 if ( (rTxt.getLength() > nEndPos && IsWordDelim(rTxt[nEndPos])) || rTxt.getLength() == nEndPos ) 2906 return pNew; 2907 } 2908 } 2909 } 2910 } 2911 return nullptr; 2912 } 2913 2914 const SvxAutocorrWord* SvxAutocorrWordList::SearchWordsInList(const OUString& rTxt, sal_Int32& rStt, 2915 sal_Int32 nEndPos) const 2916 { 2917 for (auto const& elem : mpImpl->maHash) 2918 { 2919 if( const SvxAutocorrWord *pTmp = WordMatches( &elem.second, rTxt, rStt, nEndPos ) ) 2920 return pTmp; 2921 } 2922 2923 for (auto const& elem : mpImpl->maSortedVector) 2924 { 2925 if( const SvxAutocorrWord *pTmp = WordMatches( &elem, rTxt, rStt, nEndPos ) ) 2926 return pTmp; 2927 } 2928 return nullptr; 2929 } 2930 2931 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ 2932
