1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 2 /* 3 * This file is part of the LibreOffice project. 4 * 5 * This Source Code Form is subject to the terms of the Mozilla Public 6 * License, v. 2.0. If a copy of the MPL was not distributed with this 7 * file, You can obtain one at http://mozilla.org/MPL/2.0/. 8 * 9 * This file incorporates work covered by the following license notice: 10 * 11 * Licensed to the Apache Software Foundation (ASF) under one or more 12 * contributor license agreements. See the NOTICE file distributed 13 * with this work for additional information regarding copyright 14 * ownership. The ASF licenses this file to you under the Apache 15 * License, Version 2.0 (the "License"); you may not use this file 16 * except in compliance with the License. You may obtain a copy of 17 * the License at http://www.apache.org/licenses/LICENSE-2.0 . 18 */ 19 20 #include <comphelper/processfactory.hxx> 21 #include <i18nlangtag/languagetag.hxx> 22 #include <i18nutil/unicode.hxx> 23 #include <sot/formats.hxx> 24 #include <sfx2/mieclip.hxx> 25 #include <com/sun/star/i18n/CalendarFieldIndex.hpp> 26 #include <sal/log.hxx> 27 #include <unotools/charclass.hxx> 28 #include <osl/module.hxx> 29 #include <o3tl/string_view.hxx> 30 31 #include <global.hxx> 32 #include <docsh.hxx> 33 #include <undoblk.hxx> 34 #include <rangenam.hxx> 35 #include <tabvwsh.hxx> 36 #include <filter.hxx> 37 #include <asciiopt.hxx> 38 #include <formulacell.hxx> 39 #include <cellform.hxx> 40 #include <progress.hxx> 41 #include <scitems.hxx> 42 #include <editable.hxx> 43 #include <compiler.hxx> 44 #include <warnbox.hxx> 45 #include <clipparam.hxx> 46 #include <impex.hxx> 47 #include <editutil.hxx> 48 #include <patattr.hxx> 49 #include <docpool.hxx> 50 #include <stringutil.hxx> 51 #include <cellvalue.hxx> 52 #include <tokenarray.hxx> 53 #include <documentimport.hxx> 54 #include <refundo.hxx> 55 #include <mtvelements.hxx> 56 57 #include <globstr.hrc> 58 #include <scresid.hxx> 59 #include <o3tl/safeint.hxx> 60 #include <tools/svlibrary.h> 61 #include <unotools/configmgr.hxx> 62 #include <vcl/svapp.hxx> 63 #include <vcl/weld.hxx> 64 #include <editeng/editobj.hxx> 65 #include <svl/numformat.hxx> 66 #include <rtl/character.hxx> 67 #include <rtl/math.hxx> 68 #include <sax/tools/converter.hxx> 69 70 #include <memory> 71 #include <string_view> 72 73 #include <unicode/uchar.h> 74 75 #include <osl/endian.h> 76 77 // We don't want to end up with 2GB read in one line just because of malformed 78 // multiline fields, so chop it _somewhere_, which is twice supported columns 79 // times arbitrary maximum cell content length, 2*1024*64K=128M, and because 80 // it's sal_Unicode that's 256MB. If it's 2GB of data without LF we're out of 81 // luck anyway. 82 constexpr sal_Int32 nArbitraryCellLengthLimit = SAL_MAX_UINT16; 83 constexpr sal_Int32 nArbitraryLineLengthLimit = 2 * MAXCOLCOUNT * nArbitraryCellLengthLimit; 84 85 namespace 86 { 87 const char SYLK_LF[] = "\x1b :"; 88 } 89 90 namespace { 91 92 enum class SylkVersion 93 { 94 SCALC3, // Wrote wrongly quoted strings and unescaped semicolons. 95 OOO32, // Correct strings, plus multiline content. 96 OWN, // Place our new versions, if any, before this value. 97 OTHER // Assume that aliens wrote correct strings. 98 }; 99 100 } 101 102 // Whole document without Undo 103 ScImportExport::ScImportExport( ScDocument& r ) 104 : pDocSh( r.GetDocumentShell() ), rDoc( r ), 105 nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K), 106 cSep( '\t' ), cStr( '"' ), 107 bFormulas( false ), bIncludeFiltered( true ), 108 bAll( true ), bSingle( true ), bUndo( false ), 109 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ), 110 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false) 111 { 112 pUndoDoc = nullptr; 113 pExtOptions = nullptr; 114 } 115 116 // Insert am current cell without range(es) 117 ScImportExport::ScImportExport( ScDocument& r, const ScAddress& rPt ) 118 : pDocSh( r.GetDocumentShell() ), rDoc( r ), 119 aRange( rPt ), 120 nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K), 121 cSep( '\t' ), cStr( '"' ), 122 bFormulas( false ), bIncludeFiltered( true ), 123 bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ), 124 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ), 125 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false) 126 { 127 pUndoDoc = nullptr; 128 pExtOptions = nullptr; 129 } 130 131 // ctor with a range is only used for export 132 //! ctor with a string (and bSingle=true) is also used for DdeSetData 133 ScImportExport::ScImportExport( ScDocument& r, const ScRange& rRange ) 134 : pDocSh( r.GetDocumentShell() ), rDoc( r ), 135 aRange( rRange ), 136 nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K), 137 cSep( '\t' ), cStr( '"' ), 138 bFormulas( false ), bIncludeFiltered( true ), 139 bAll( false ), bSingle( false ), bUndo( pDocSh != nullptr ), 140 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ), 141 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false) 142 { 143 pUndoDoc = nullptr; 144 pExtOptions = nullptr; 145 // Only one sheet (table) supported 146 aRange.aEnd.SetTab( aRange.aStart.Tab() ); 147 } 148 149 // Evaluate input string - either range, cell or the whole document (when error) 150 // If a View exists, the TabNo of the view will be used. 151 ScImportExport::ScImportExport( ScDocument& r, const OUString& rPos ) 152 : pDocSh( r.GetDocumentShell() ), rDoc( r ), 153 nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K), 154 cSep( '\t' ), cStr( '"' ), 155 bFormulas( false ), bIncludeFiltered( true ), 156 bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ), 157 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ), 158 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false) 159 { 160 pUndoDoc = nullptr; 161 pExtOptions = nullptr; 162 163 SCTAB nTab = ScDocShell::GetCurTab(); 164 aRange.aStart.SetTab( nTab ); 165 OUString aPos( rPos ); 166 // Named range? 167 ScRangeName* pRange = rDoc.GetRangeName(); 168 if (pRange) 169 { 170 const ScRangeData* pData = pRange->findByUpperName(ScGlobal::getCharClass().uppercase(aPos)); 171 if (pData) 172 { 173 if( pData->HasType( ScRangeData::Type::RefArea ) 174 || pData->HasType( ScRangeData::Type::AbsArea ) 175 || pData->HasType( ScRangeData::Type::AbsPos ) ) 176 { 177 aPos = pData->GetSymbol(); 178 } 179 } 180 } 181 formula::FormulaGrammar::AddressConvention eConv = rDoc.GetAddressConvention(); 182 // Range? 183 if (aRange.Parse(aPos, rDoc, eConv) & ScRefFlags::VALID) 184 bSingle = false; 185 // Cell? 186 else if (aRange.aStart.Parse(aPos, rDoc, eConv) & ScRefFlags::VALID) 187 aRange.aEnd = aRange.aStart; 188 else 189 bAll = true; 190 } 191 192 ScImportExport::~ScImportExport() COVERITY_NOEXCEPT_FALSE 193 { 194 pUndoDoc.reset(); 195 pExtOptions.reset(); 196 } 197 198 void ScImportExport::SetExtOptions( const ScAsciiOptions& rOpt ) 199 { 200 if ( pExtOptions ) 201 *pExtOptions = rOpt; 202 else 203 pExtOptions.reset(new ScAsciiOptions( rOpt )); 204 205 // "normal" Options 206 207 cSep = ScAsciiOptions::GetWeightedFieldSep( rOpt.GetFieldSeps(), false); 208 cStr = rOpt.GetTextSep(); 209 } 210 211 void ScImportExport::SetFilterOptions(const OUString& rFilterOptions) 212 { 213 maFilterOptions = rFilterOptions; 214 } 215 216 bool ScImportExport::IsFormatSupported( SotClipboardFormatId nFormat ) 217 { 218 return nFormat == SotClipboardFormatId::STRING 219 || nFormat == SotClipboardFormatId::STRING_TSVC 220 || nFormat == SotClipboardFormatId::SYLK 221 || nFormat == SotClipboardFormatId::LINK 222 || nFormat == SotClipboardFormatId::HTML 223 || nFormat == SotClipboardFormatId::HTML_SIMPLE 224 || nFormat == SotClipboardFormatId::DIF; 225 } 226 227 // Prepare for Undo 228 bool ScImportExport::StartPaste() 229 { 230 if ( !bAll ) 231 { 232 ScEditableTester aTester( rDoc, aRange ); 233 if ( !aTester.IsEditable() ) 234 { 235 std::unique_ptr<weld::MessageDialog> xInfoBox(Application::CreateMessageDialog(ScDocShell::GetActiveDialogParent(), 236 VclMessageType::Info, VclButtonsType::Ok, 237 ScResId(aTester.GetMessageId()))); 238 xInfoBox->run(); 239 return false; 240 } 241 } 242 if( bUndo && pDocSh && rDoc.IsUndoEnabled()) 243 { 244 pUndoDoc.reset(new ScDocument( SCDOCMODE_UNDO )); 245 pUndoDoc->InitUndo( rDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() ); 246 rDoc.CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pUndoDoc); 247 } 248 return true; 249 } 250 251 // Create Undo/Redo actions, Invalidate/Repaint 252 void ScImportExport::EndPaste(bool bAutoRowHeight) 253 { 254 bool bHeight = bAutoRowHeight && pDocSh && pDocSh->AdjustRowHeight( 255 aRange.aStart.Row(), aRange.aEnd.Row(), aRange.aStart.Tab() ); 256 257 if( pUndoDoc && rDoc.IsUndoEnabled() && pDocSh ) 258 { 259 ScDocumentUniquePtr pRedoDoc(new ScDocument( SCDOCMODE_UNDO )); 260 pRedoDoc->InitUndo( rDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() ); 261 rDoc.CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pRedoDoc); 262 ScMarkData aDestMark(pRedoDoc->GetSheetLimits()); 263 aDestMark.SetMarkArea(aRange); 264 pDocSh->GetUndoManager()->AddUndoAction( 265 std::make_unique<ScUndoPaste>(pDocSh, aRange, aDestMark, std::move(pUndoDoc), std::move(pRedoDoc), InsertDeleteFlags::ALL, nullptr)); 266 } 267 pUndoDoc.reset(); 268 if( pDocSh ) 269 { 270 if (!bHeight) 271 pDocSh->PostPaint( aRange, PaintPartFlags::Grid ); 272 pDocSh->SetDocumentModified(); 273 } 274 ScTabViewShell* pViewSh = ScTabViewShell::GetActiveViewShell(); 275 if ( pViewSh ) 276 pViewSh->UpdateInputHandler(); 277 278 } 279 280 bool ScImportExport::ExportData( std::u16string_view rMimeType, 281 css::uno::Any & rValue ) 282 { 283 SvMemoryStream aStrm; 284 SotClipboardFormatId fmtId = SotExchange::GetFormatIdFromMimeType(rMimeType); 285 if (fmtId == SotClipboardFormatId::STRING) 286 aStrm.SetStreamCharSet(RTL_TEXTENCODING_UNICODE); 287 // mba: no BaseURL for data exchange 288 if (ExportStream(aStrm, OUString(), fmtId)) 289 { 290 if (fmtId == SotClipboardFormatId::STRING) 291 { 292 assert(aStrm.TellEnd() % sizeof(sal_Unicode) == 0); 293 rValue <<= OUString(static_cast<const sal_Unicode*>(aStrm.GetData()), 294 aStrm.TellEnd() / sizeof(sal_Unicode)); 295 } 296 else 297 { 298 aStrm.WriteUChar(0); 299 rValue <<= css::uno::Sequence<sal_Int8>(static_cast<sal_Int8 const*>(aStrm.GetData()), 300 aStrm.TellEnd()); 301 } 302 return true; 303 } 304 return false; 305 } 306 307 bool ScImportExport::ImportString( const OUString& rText, SotClipboardFormatId nFmt ) 308 { 309 switch ( nFmt ) 310 { 311 // formats supporting unicode 312 case SotClipboardFormatId::STRING : 313 case SotClipboardFormatId::STRING_TSVC : 314 { 315 ScImportStringStream aStrm( rText); 316 return ImportStream( aStrm, OUString(), nFmt ); 317 // ImportStream must handle RTL_TEXTENCODING_UNICODE 318 } 319 default: 320 { 321 rtl_TextEncoding eEnc = osl_getThreadTextEncoding(); 322 OString aTmp( rText.getStr(), rText.getLength(), eEnc ); 323 SvMemoryStream aStrm( const_cast<char *>(aTmp.getStr()), aTmp.getLength() * sizeof(char), StreamMode::READ ); 324 aStrm.SetStreamCharSet( eEnc ); 325 SetNoEndianSwap( aStrm ); //! no swapping in memory 326 return ImportStream( aStrm, OUString(), nFmt ); 327 } 328 } 329 } 330 331 bool ScImportExport::ExportString( OUString& rText, SotClipboardFormatId nFmt ) 332 { 333 if ( nFmt != SotClipboardFormatId::STRING && nFmt != SotClipboardFormatId::STRING_TSVC ) 334 { 335 SAL_WARN("sc.ui", "ScImportExport::ExportString: Unicode not supported for other formats than SotClipboardFormatId::STRING[_TSV]"); 336 rtl_TextEncoding eEnc = osl_getThreadTextEncoding(); 337 OString aTmp; 338 bool bOk = ExportByteString( aTmp, eEnc, nFmt ); 339 rText = OStringToOUString( aTmp, eEnc ); 340 return bOk; 341 } 342 // nSizeLimit not needed for OUString 343 344 SvMemoryStream aStrm; 345 aStrm.SetStreamCharSet( RTL_TEXTENCODING_UNICODE ); 346 SetNoEndianSwap( aStrm ); //! no swapping in memory 347 // mba: no BaseURL for data exc 348 if( ExportStream( aStrm, OUString(), nFmt ) ) 349 { 350 aStrm.WriteUInt16( 0 ); 351 rText = OUString( static_cast<const sal_Unicode*>(aStrm.GetData()) ); 352 return true; 353 } 354 rText.clear(); 355 return false; 356 357 // ExportStream must handle RTL_TEXTENCODING_UNICODE 358 } 359 360 bool ScImportExport::ExportByteString( OString& rText, rtl_TextEncoding eEnc, SotClipboardFormatId nFmt ) 361 { 362 OSL_ENSURE( eEnc != RTL_TEXTENCODING_UNICODE, "ScImportExport::ExportByteString: Unicode not supported" ); 363 if ( eEnc == RTL_TEXTENCODING_UNICODE ) 364 eEnc = osl_getThreadTextEncoding(); 365 366 if (!nSizeLimit) 367 nSizeLimit = SAL_MAX_UINT16; 368 369 SvMemoryStream aStrm; 370 aStrm.SetStreamCharSet( eEnc ); 371 SetNoEndianSwap( aStrm ); //! no swapping in memory 372 // mba: no BaseURL for data exchange 373 if( ExportStream( aStrm, OUString(), nFmt ) ) 374 { 375 aStrm.WriteChar( 0 ); 376 if( aStrm.TellEnd() <= nSizeLimit ) 377 { 378 rText = static_cast<const char*>(aStrm.GetData()); 379 return true; 380 } 381 } 382 rText.clear(); 383 return false; 384 } 385 386 bool ScImportExport::ImportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt ) 387 { 388 if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC ) 389 { 390 if( ExtText2Doc( rStrm ) ) // evaluate pExtOptions 391 return true; 392 } 393 if( nFmt == SotClipboardFormatId::SYLK ) 394 { 395 if( Sylk2Doc( rStrm ) ) 396 return true; 397 } 398 if( nFmt == SotClipboardFormatId::DIF ) 399 { 400 if( Dif2Doc( rStrm ) ) 401 return true; 402 } 403 if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT ) 404 { 405 if( RTF2Doc( rStrm, rBaseURL ) ) 406 return true; 407 } 408 if( nFmt == SotClipboardFormatId::LINK ) 409 return true; // Link-Import? 410 if ( nFmt == SotClipboardFormatId::HTML ) 411 { 412 if( HTML2Doc( rStrm, rBaseURL ) ) 413 return true; 414 } 415 if ( nFmt == SotClipboardFormatId::HTML_SIMPLE ) 416 { 417 MSE40HTMLClipFormatObj aMSE40ClpObj; // needed to skip the header data 418 SvStream* pHTML = aMSE40ClpObj.IsValid( rStrm ); 419 if ( pHTML && HTML2Doc( *pHTML, rBaseURL ) ) 420 return true; 421 } 422 423 return false; 424 } 425 426 bool ScImportExport::ExportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt ) 427 { 428 if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC ) 429 { 430 if( Doc2Text( rStrm ) ) 431 return true; 432 } 433 if( nFmt == SotClipboardFormatId::SYLK ) 434 { 435 if( Doc2Sylk( rStrm ) ) 436 return true; 437 } 438 if( nFmt == SotClipboardFormatId::DIF ) 439 { 440 if( Doc2Dif( rStrm ) ) 441 return true; 442 } 443 if( nFmt == SotClipboardFormatId::LINK && !bAll ) 444 { 445 OUString aDocName; 446 if ( rDoc.IsClipboard() ) 447 aDocName = ScGlobal::GetClipDocName(); 448 else 449 { 450 ScDocShell* pShell = rDoc.GetDocumentShell(); 451 if (pShell) 452 aDocName = pShell->GetTitle( SFX_TITLE_FULLNAME ); 453 } 454 455 OSL_ENSURE( !aDocName.isEmpty(), "ClipBoard document has no name! :-/" ); 456 if( !aDocName.isEmpty() ) 457 { 458 // Always use Calc A1 syntax for paste link. 459 OUString aRefName; 460 ScRefFlags nFlags = ScRefFlags::VALID | ScRefFlags::TAB_3D; 461 if( bSingle ) 462 aRefName = aRange.aStart.Format(nFlags, &rDoc, formula::FormulaGrammar::CONV_OOO); 463 else 464 { 465 if( aRange.aStart.Tab() != aRange.aEnd.Tab() ) 466 nFlags |= ScRefFlags::TAB2_3D; 467 aRefName = aRange.Format(rDoc, nFlags, formula::FormulaGrammar::CONV_OOO); 468 } 469 OUString aAppName = Application::GetAppName(); 470 471 // extra bits are used to tell the client to prefer external 472 // reference link. 473 474 rStrm.WriteUnicodeOrByteText(aAppName, true); 475 rStrm.WriteUnicodeOrByteText(aDocName, true); 476 rStrm.WriteUnicodeOrByteText(aRefName, true); 477 rStrm.WriteUnicodeOrByteText(u"calc:extref", true); 478 return rStrm.WriteUnicodeOrByteText(u"", true); // One more trailing zero 479 } 480 } 481 if( nFmt == SotClipboardFormatId::HTML ) 482 { 483 if( Doc2HTML( rStrm, rBaseURL ) ) 484 return true; 485 } 486 if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT ) 487 { 488 if( Doc2RTF( rStrm ) ) 489 return true; 490 } 491 492 return false; 493 } 494 495 // tdf#104927 496 // http://www.unicode.org/reports/tr11/ 497 sal_Int32 ScImportExport::CountVisualWidth(const OUString& rStr, sal_Int32& nIdx, sal_Int32 nMaxWidth) 498 { 499 sal_Int32 nWidth = 0; 500 while(nIdx < rStr.getLength() && nWidth < nMaxWidth) 501 { 502 sal_uInt32 nCode = rStr.iterateCodePoints(&nIdx); 503 504 auto nEaWidth = u_getIntPropertyValue(nCode, UCHAR_EAST_ASIAN_WIDTH); 505 if (nEaWidth == U_EA_FULLWIDTH || nEaWidth == U_EA_WIDE) 506 nWidth += 2; 507 else if (!u_getIntPropertyValue(nCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) 508 nWidth += 1; 509 } 510 511 if (nIdx < rStr.getLength()) 512 { 513 sal_Int32 nTmpIdx = nIdx; 514 sal_uInt32 nCode = rStr.iterateCodePoints(&nTmpIdx); 515 516 if (u_getIntPropertyValue(nCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) 517 nIdx = nTmpIdx; 518 } 519 return nWidth; 520 } 521 522 sal_Int32 ScImportExport::CountVisualWidth(const OUString& rStr) 523 { 524 sal_Int32 nIdx = 0; 525 return CountVisualWidth(rStr, nIdx, SAL_MAX_INT32); 526 } 527 528 void ScImportExport::SetNoEndianSwap( SvStream& rStrm ) 529 { 530 #ifdef OSL_BIGENDIAN 531 rStrm.SetEndian( SvStreamEndian::BIG ); 532 #else 533 rStrm.SetEndian( SvStreamEndian::LITTLE ); 534 #endif 535 } 536 537 static inline bool lcl_isFieldEnd( sal_Unicode c, const sal_Unicode* pSeps ) 538 { 539 return !c || ScGlobal::UnicodeStrChr( pSeps, c); 540 } 541 542 namespace { 543 544 enum QuoteType 545 { 546 FIELDSTART_QUOTE, 547 FIRST_QUOTE, 548 SECOND_QUOTE, 549 FIELDEND_QUOTE, 550 DONTKNOW_QUOTE 551 }; 552 553 } 554 555 /** Determine if *p is a quote that ends a quoted field. 556 557 Precondition: we are parsing a quoted field already and *p is a quote. 558 559 @return 560 FIELDEND_QUOTE if end of field quote 561 DONTKNOW_QUOTE anything else 562 */ 563 static QuoteType lcl_isFieldEndQuote( const sal_Unicode* p, const sal_Unicode* pSeps, sal_Unicode& rcDetectSep ) 564 { 565 // Due to broken CSV generators that don't double embedded quotes check if 566 // a field separator immediately or with trailing spaces follows the quote, 567 // only then end the field, or at end of string. 568 constexpr sal_Unicode cBlank = ' '; 569 if (p[1] == cBlank && ScGlobal::UnicodeStrChr( pSeps, cBlank)) 570 return FIELDEND_QUOTE; 571 // Detect a possible blank separator if it's not already in the list (which 572 // was checked right above for p[1]==cBlank). 573 const bool bBlankSep = (p[1] == cBlank && !rcDetectSep && p[2] && p[2] != cBlank); 574 while (p[1] == cBlank) 575 ++p; 576 if (lcl_isFieldEnd( p[1], pSeps)) 577 return FIELDEND_QUOTE; 578 // Extended separator detection after a closing quote (with or without 579 // blanks). Note that nQuotes is incremented *after* the call so is not yet 580 // even here, and that with separator detection we reach here only if 581 // lcl_isEscapedOrFieldEndQuote() did not already detect FIRST_QUOTE or 582 // SECOND_QUOTE for an escaped embedded quote, thus nQuotes does not have 583 // to be checked. 584 if (!rcDetectSep) 585 { 586 constexpr sal_Unicode vSep[] = { ',', '\t', ';' }; 587 for (const sal_Unicode c : vSep) 588 { 589 if (p[1] == c) 590 { 591 rcDetectSep = c; 592 return FIELDEND_QUOTE; 593 } 594 } 595 } 596 // Blank separator is least significant, after others. 597 if (bBlankSep) 598 { 599 rcDetectSep = cBlank; 600 return FIELDEND_QUOTE; 601 } 602 return DONTKNOW_QUOTE; 603 } 604 605 /** Determine if *p is a quote that is escaped by being doubled or ends a 606 quoted field. 607 608 Precondition: *p is a quote. 609 610 @param nQuotes 611 Quote characters encountered so far. 612 Odd (after opening quote) means either no embedded quotes or only quote 613 pairs so far. 614 Even means either not in a quoted field or already one quote 615 encountered, the first of a pair. 616 617 @return 618 FIELDSTART_QUOTE if first quote in a field, either starting content or 619 embedded so caller should check beforehand. 620 FIRST_QUOTE if first of a doubled quote 621 SECOND_QUOTE if second of a doubled quote 622 FIELDEND_QUOTE if end of field quote 623 DONTKNOW_QUOTE if an unescaped quote we don't consider as end of field, 624 do not increment nQuotes in caller then! 625 */ 626 static QuoteType lcl_isEscapedOrFieldEndQuote( sal_Int32 nQuotes, const sal_Unicode* p, 627 const sal_Unicode* pSeps, sal_Unicode cStr, sal_Unicode& rcDetectSep ) 628 { 629 if ((nQuotes & 1) == 0) 630 { 631 if (p[-1] == cStr) 632 return SECOND_QUOTE; 633 else 634 { 635 SAL_WARN( "sc", "lcl_isEscapedOrFieldEndQuote: really want a FIELDSTART_QUOTE?"); 636 return FIELDSTART_QUOTE; 637 } 638 } 639 if (p[1] == cStr) 640 return FIRST_QUOTE; 641 return lcl_isFieldEndQuote( p, pSeps, rcDetectSep); 642 } 643 644 /** Append characters of [p1,p2) to rField. 645 646 @returns TRUE if ok; FALSE if data overflow, truncated 647 */ 648 static bool lcl_appendLineData( OUString& rField, const sal_Unicode* p1, const sal_Unicode* p2 ) 649 { 650 if (rField.getLength() + (p2 - p1) <= nArbitraryCellLengthLimit) 651 { 652 rField += std::u16string_view( p1, p2 - p1 ); 653 return true; 654 } 655 else 656 { 657 SAL_WARN( "sc", "lcl_appendLineData: data overflow"); 658 rField += std::u16string_view( p1, nArbitraryCellLengthLimit - rField.getLength() ); 659 return false; 660 } 661 } 662 663 namespace { 664 665 enum class DoubledQuoteMode 666 { 667 KEEP_ALL, // both are taken, additionally start and end quote are included in string 668 ESCAPE, // escaped quote, one is taken, one ignored 669 }; 670 671 } 672 673 /** Scan for a quoted string. 674 675 Precondition: initial current position *p is a cStr quote. 676 677 For DoubledQuoteMode::ESCAPE, if after the closing quote there is a field 678 end (with or without trailing blanks and as determined by 679 lcl_isFieldEndQuote()), then the content is appended to rField with quotes 680 processed and removed. Else if no field end after the quoted string was 681 detected, nothing is appended and processing continues and is repeated 682 until the next quote. If no closing quote at a field end was found at all, 683 nothing is appended and the initial position is returned and caller has to 684 decide, usually just taking all as literal data. 685 686 For DoubledQuoteMode::KEEP_ALL, the string up to and including the closing 687 quote is appended to rField and the next position returned, regardless 688 whether there is a field separator following or not. 689 690 */ 691 static const sal_Unicode* lcl_ScanString( const sal_Unicode* p, OUString& rField, 692 const sal_Unicode* pSeps, sal_Unicode cStr, DoubledQuoteMode eMode, bool& rbOverflowCell ) 693 { 694 OUString aString; 695 bool bClosingQuote = (eMode == DoubledQuoteMode::KEEP_ALL); 696 const sal_Unicode* const pStart = p; 697 if (eMode != DoubledQuoteMode::KEEP_ALL) 698 p++; //! jump over opening quote 699 bool bCont; 700 do 701 { 702 bCont = false; 703 const sal_Unicode* p0 = p; 704 for( ;; ) 705 { 706 if (!*p) 707 { 708 // Encountering end of data after an opening quote is not a 709 // quoted string, ReadCsvLine() concatenated lines with '\n' 710 // for a properly quoted embedded linefeed. 711 if (eMode == DoubledQuoteMode::KEEP_ALL) 712 // Caller would append that data anyway, so we can do it 713 // already here. 714 break; 715 716 return pStart; 717 } 718 719 if( *p == cStr ) 720 { 721 if ( *++p != cStr ) 722 { 723 // break or continue for loop 724 if (eMode == DoubledQuoteMode::ESCAPE) 725 { 726 sal_Unicode cDetectSep = 0xffff; // No separator detection here. 727 if (lcl_isFieldEndQuote( p-1, pSeps, cDetectSep) == FIELDEND_QUOTE) 728 { 729 bClosingQuote = true; 730 break; 731 } 732 else 733 continue; 734 } 735 else 736 break; 737 } 738 // doubled quote char 739 switch ( eMode ) 740 { 741 case DoubledQuoteMode::KEEP_ALL : 742 p++; // both for us (not breaking for-loop) 743 break; 744 case DoubledQuoteMode::ESCAPE : 745 p++; // one for us (breaking for-loop) 746 bCont = true; // and more 747 break; 748 } 749 if ( eMode == DoubledQuoteMode::ESCAPE ) 750 break; 751 } 752 else 753 p++; 754 } 755 if ( p0 < p ) 756 { 757 if (!lcl_appendLineData( aString, p0, ((eMode != DoubledQuoteMode::KEEP_ALL && (*p || *(p-1) == cStr)) ? p-1 : p))) 758 rbOverflowCell = true; 759 } 760 } while ( bCont ); 761 762 if (!bClosingQuote) 763 return pStart; 764 765 if (!aString.isEmpty()) 766 rField += aString; 767 768 return p; 769 } 770 771 static void lcl_UnescapeSylk( OUString & rString, SylkVersion eVersion ) 772 { 773 // Older versions didn't escape the semicolon. 774 // Older versions quoted the string and doubled embedded quotes, but not 775 // the semicolons, which was plain wrong. 776 if (eVersion >= SylkVersion::OOO32) 777 rString = rString.replaceAll(";;", ";"); 778 else 779 rString = rString.replaceAll("\"\"", "\""); 780 781 rString = rString.replaceAll(SYLK_LF, "\n"); 782 } 783 784 static const sal_Unicode* lcl_ScanSylkString( const sal_Unicode* p, 785 OUString& rString, SylkVersion eVersion ) 786 { 787 const sal_Unicode* pStartQuote = p; 788 const sal_Unicode* pEndQuote = nullptr; 789 while( *(++p) ) 790 { 791 if( *p == '"' ) 792 { 793 pEndQuote = p; 794 if (eVersion >= SylkVersion::OOO32) 795 { 796 if (*(p+1) == ';') 797 { 798 if (*(p+2) == ';') 799 { 800 p += 2; // escaped ';' 801 pEndQuote = nullptr; 802 } 803 else 804 break; // end field 805 } 806 } 807 else 808 { 809 if (*(p+1) == '"') 810 { 811 ++p; // escaped '"' 812 pEndQuote = nullptr; 813 } 814 else if (*(p+1) == ';') 815 break; // end field 816 } 817 } 818 } 819 if (!pEndQuote) 820 pEndQuote = p; // Take all data as string. 821 rString += std::u16string_view(pStartQuote + 1, pEndQuote - pStartQuote - 1 ); 822 lcl_UnescapeSylk( rString, eVersion); 823 return p; 824 } 825 826 static const sal_Unicode* lcl_ScanSylkFormula( const sal_Unicode* p, 827 OUString& rString, SylkVersion eVersion ) 828 { 829 const sal_Unicode* pStart = p; 830 if (eVersion >= SylkVersion::OOO32) 831 { 832 while (*p) 833 { 834 if (*p == ';') 835 { 836 if (*(p+1) == ';') 837 ++p; // escaped ';' 838 else 839 break; // end field 840 } 841 ++p; 842 } 843 rString += std::u16string_view( pStart, p - pStart); 844 lcl_UnescapeSylk( rString, eVersion); 845 } 846 else 847 { 848 // Nasty. If in old versions the formula contained a semicolon, it was 849 // quoted and embedded quotes were doubled, but semicolons were not. If 850 // there was no semicolon, it could still contain quotes and doubled 851 // embedded quotes if it was something like ="a""b", which was saved as 852 // E"a""b" as is and has to be preserved, even if older versions 853 // couldn't even load it correctly. However, theoretically another 854 // field might follow and thus the line contain a semicolon again, such 855 // as ...;E"a""b";... 856 bool bQuoted = false; 857 if (*p == '"') 858 { 859 // May be a quoted expression or just a string constant expression 860 // with quotes. 861 while (*(++p)) 862 { 863 if (*p == '"') 864 { 865 if (*(p+1) == '"') 866 ++p; // escaped '"' 867 else 868 break; // closing '"', had no ';' yet 869 } 870 else if (*p == ';') 871 { 872 bQuoted = true; // ';' within quoted expression 873 break; 874 } 875 } 876 p = pStart; 877 } 878 if (bQuoted) 879 p = lcl_ScanSylkString( p, rString, eVersion); 880 else 881 { 882 while (*p && *p != ';') 883 ++p; 884 rString += std::u16string_view( pStart, p - pStart); 885 } 886 } 887 return p; 888 } 889 890 static void lcl_WriteString( SvStream& rStrm, OUString& rString, sal_Unicode cQuote, sal_Unicode cEsc ) 891 { 892 if (cEsc) 893 { 894 // the goal is to replace cStr by cStr+cStr 895 OUString strFrom(cEsc); 896 OUString strTo = strFrom + strFrom; 897 rString = rString.replaceAll(strFrom, strTo); 898 } 899 900 if (cQuote) 901 { 902 rString = OUStringChar(cQuote) + rString + OUStringChar(cQuote); 903 } 904 905 rStrm.WriteUnicodeOrByteText(rString); 906 } 907 908 bool ScImportExport::Text2Doc( SvStream& rStrm ) 909 { 910 bool bOk = true; 911 912 sal_Unicode pSeps[2]; 913 pSeps[0] = cSep; 914 pSeps[1] = 0; 915 916 ScSetStringParam aSetStringParam; 917 aSetStringParam.mbCheckLinkFormula = true; 918 919 SCCOL nStartCol = aRange.aStart.Col(); 920 SCROW nStartRow = aRange.aStart.Row(); 921 SCCOL nEndCol = aRange.aEnd.Col(); 922 SCROW nEndRow = aRange.aEnd.Row(); 923 sal_uInt64 nOldPos = rStrm.Tell(); 924 rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() ); 925 bool bData = !bSingle; 926 if( !bSingle) 927 bOk = StartPaste(); 928 929 while( bOk ) 930 { 931 OUString aLine; 932 OUString aCell; 933 SCROW nRow = nStartRow; 934 rStrm.Seek( nOldPos ); 935 for( ;; ) 936 { 937 rStrm.ReadUniOrByteStringLine( aLine, rStrm.GetStreamCharSet(), nArbitraryLineLengthLimit ); 938 // tdf#125440 When inserting tab separated string, consider quotes as field markers 939 DoubledQuoteMode mode = aLine.indexOf("\t") >= 0 ? DoubledQuoteMode::ESCAPE : DoubledQuoteMode::KEEP_ALL; 940 if( rStrm.eof() ) 941 break; 942 SCCOL nCol = nStartCol; 943 const sal_Unicode* p = aLine.getStr(); 944 while( *p ) 945 { 946 aCell.clear(); 947 const sal_Unicode* q = p; 948 if (*p == cStr) 949 { 950 // Look for a pairing quote. 951 q = p = lcl_ScanString( p, aCell, pSeps, cStr, mode, bOverflowCell ); 952 } 953 // All until next separator. 954 while (*p && *p != cSep) 955 ++p; 956 if (!lcl_appendLineData( aCell, q, p)) 957 bOverflowCell = true; // display warning on import 958 if (*p) 959 ++p; 960 if (rDoc.ValidCol(nCol) && rDoc.ValidRow(nRow) ) 961 { 962 if( bSingle ) 963 { 964 if (nCol>nEndCol) nEndCol = nCol; 965 if (nRow>nEndRow) nEndRow = nRow; 966 } 967 if( bData && nCol <= nEndCol && nRow <= nEndRow ) 968 rDoc.SetString( nCol, nRow, aRange.aStart.Tab(), aCell, &aSetStringParam ); 969 } 970 else // too many columns/rows 971 { 972 if (!rDoc.ValidRow(nRow)) 973 bOverflowRow = true; // display warning on import 974 if (!rDoc.ValidCol(nCol)) 975 bOverflowCol = true; // display warning on import 976 } 977 ++nCol; 978 } 979 ++nRow; 980 } 981 982 if( !bData ) 983 { 984 aRange.aEnd.SetCol( nEndCol ); 985 aRange.aEnd.SetRow( nEndRow ); 986 bOk = StartPaste(); 987 bData = true; 988 } 989 else 990 break; 991 } 992 993 EndPaste(); 994 if (bOk && mbImportBroadcast) 995 { 996 rDoc.BroadcastCells(aRange, SfxHintId::ScDataChanged); 997 pDocSh->PostDataChanged(); 998 } 999 1000 return bOk; 1001 } 1002 1003 // Extended Ascii-Import 1004 1005 static bool lcl_PutString( 1006 ScDocumentImport& rDocImport, bool bUseDocImport, 1007 SCCOL nCol, SCROW nRow, SCTAB nTab, const OUString& rStr, sal_uInt8 nColFormat, 1008 SvNumberFormatter* pFormatter, bool bDetectNumFormat, bool bDetectSciNumFormat, bool bEvaluateFormulas, bool bSkipEmptyCells, 1009 const ::utl::TransliterationWrapper& rTransliteration, CalendarWrapper& rCalendar, 1010 const ::utl::TransliterationWrapper* pSecondTransliteration, CalendarWrapper* pSecondCalendar ) 1011 { 1012 ScDocument& rDoc = rDocImport.getDoc(); 1013 bool bMultiLine = false; 1014 if ( nColFormat == SC_COL_SKIP || !rDoc.ValidCol(nCol) || !rDoc.ValidRow(nRow) ) 1015 return bMultiLine; 1016 if ( rStr.isEmpty() ) 1017 { 1018 if ( !bSkipEmptyCells ) 1019 { // delete destination cell 1020 if ( bUseDocImport ) 1021 rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr ); 1022 else 1023 rDoc.SetString( nCol, nRow, nTab, rStr ); 1024 } 1025 return false; 1026 } 1027 1028 const bool bForceFormulaText = (!bEvaluateFormulas && rStr[0] == '='); 1029 if (nColFormat == SC_COL_TEXT || bForceFormulaText) 1030 { 1031 if ( bUseDocImport ) 1032 { 1033 double fDummy; 1034 sal_uInt32 nIndex = 0; 1035 if (bForceFormulaText || rDoc.GetFormatTable()->IsNumberFormat(rStr, nIndex, fDummy)) 1036 { 1037 // Set the format of this cell to Text. 1038 // This is only necessary for ScDocumentImport, 1039 // ScDocument::SetTextCell() forces it by ScSetStringParam. 1040 sal_uInt32 nFormat = rDoc.GetFormatTable()->GetStandardFormat(SvNumFormatType::TEXT); 1041 ScPatternAttr aNewAttrs(rDoc.GetPool()); 1042 SfxItemSet& rSet = aNewAttrs.GetItemSet(); 1043 rSet.Put( SfxUInt32Item(ATTR_VALUE_FORMAT, nFormat) ); 1044 rDoc.ApplyPattern(nCol, nRow, nTab, aNewAttrs); 1045 } 1046 if (ScStringUtil::isMultiline(rStr)) 1047 { 1048 ScFieldEditEngine& rEngine = rDoc.GetEditEngine(); 1049 rEngine.SetTextCurrentDefaults(rStr); 1050 rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject()); 1051 return true; 1052 } 1053 else 1054 { 1055 rDocImport.setStringCell(ScAddress(nCol, nRow, nTab), rStr); 1056 return false; 1057 } 1058 } 1059 else 1060 { 1061 rDoc.SetTextCell(ScAddress(nCol, nRow, nTab), rStr); 1062 return bMultiLine; 1063 } 1064 } 1065 1066 if ( nColFormat == SC_COL_ENGLISH ) 1067 { 1068 //! SetString with Extra-Flag ??? 1069 1070 SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable(); 1071 sal_uInt32 nEnglish = pDocFormatter->GetStandardIndex(LANGUAGE_ENGLISH_US); 1072 double fVal; 1073 if ( pDocFormatter->IsNumberFormat( rStr, nEnglish, fVal ) ) 1074 { 1075 // Numberformat will not be set to English 1076 if ( bUseDocImport ) 1077 rDocImport.setNumericCell( ScAddress( nCol, nRow, nTab ), fVal ); 1078 else 1079 rDoc.SetValue( nCol, nRow, nTab, fVal ); 1080 return bMultiLine; 1081 } 1082 // else, continue with SetString 1083 } 1084 else if ( nColFormat != SC_COL_STANDARD ) // Datumformats 1085 { 1086 const sal_uInt16 nMaxNumberParts = 7; // Y-M-D h:m:s.t 1087 const sal_Int32 nLen = rStr.getLength(); 1088 sal_Int32 nStart[nMaxNumberParts]; 1089 sal_Int32 nEnd[nMaxNumberParts]; 1090 1091 bool bIso; 1092 sal_uInt16 nDP, nMP, nYP; 1093 switch ( nColFormat ) 1094 { 1095 case SC_COL_YMD: nDP = 2; nMP = 1; nYP = 0; bIso = true; break; 1096 case SC_COL_MDY: nDP = 1; nMP = 0; nYP = 2; bIso = false; break; 1097 case SC_COL_DMY: 1098 default: nDP = 0; nMP = 1; nYP = 2; bIso = false; break; 1099 } 1100 1101 sal_uInt16 nFound = 0; 1102 bool bInNum = false; 1103 for (sal_Int32 nPos = 0; nPos < nLen && (bInNum || nFound < nMaxNumberParts); ++nPos) 1104 { 1105 bool bLetter = false; 1106 if (rtl::isAsciiDigit(rStr[nPos]) || 1107 (((!bInNum && nFound==nMP) || (bInNum && nFound==nMP+1)) 1108 && (bLetter = ScGlobal::getCharClass().isLetterNumeric( rStr, nPos)))) 1109 { 1110 if (!bInNum) 1111 { 1112 bInNum = true; 1113 nStart[nFound] = nPos; 1114 ++nFound; 1115 } 1116 nEnd[nFound-1] = nPos; 1117 if (bIso && (bLetter || (2 <= nFound && nFound <= 6 && nPos > nStart[nFound-1] + 1))) 1118 // Each M,D,h,m,s at most 2 digits. 1119 bIso = false; 1120 } 1121 else 1122 { 1123 bInNum = false; 1124 if (bIso) 1125 { 1126 // ([+-])YYYY-MM-DD([T ]hh:mm(:ss(.fff)))(([+-])TZ) 1127 // XXX NOTE: timezone is accepted here, but number 1128 // formatter parser will not, so the end result will be 1129 // type Text to preserve timezone information. 1130 switch (rStr[nPos]) 1131 { 1132 case '+': 1133 if (nFound >= 5 && nPos == nEnd[nFound-1] + 1) 1134 // Accept timezone offset. 1135 ; 1136 else if (nPos > 0) 1137 // Accept one leading sign. 1138 bIso = false; 1139 break; 1140 case '-': 1141 if (nFound >= 5 && nPos == nEnd[nFound-1] + 1) 1142 // Accept timezone offset. 1143 ; 1144 else if (nFound == 0 && nPos > 0) 1145 // Accept one leading sign. 1146 bIso = false; 1147 else if (nFound < 1 || 2 < nFound || nPos != nEnd[nFound-1] + 1) 1148 // Not immediately after 1 or 1-2 1149 bIso = false; 1150 break; 1151 case 'T': 1152 case ' ': 1153 if (nFound != 3 || nPos != nEnd[nFound-1] + 1) 1154 // Not immediately after 1-2-3 1155 bIso = false; 1156 break; 1157 case ':': 1158 if (nFound < 4 || 5 < nFound || nPos != nEnd[nFound-1] + 1) 1159 // Not at 1-2-3T4:5: 1160 bIso = false; 1161 break; 1162 case '.': 1163 case ',': 1164 if (nFound != 6 || nPos != nEnd[nFound-1] + 1) 1165 // Not at 1-2-3T4:5:6. 1166 bIso = false; 1167 break; 1168 case 'Z': 1169 if (nFound >= 5 && nPos == nEnd[nFound-1] + 1) 1170 // Accept Zero timezone. 1171 ; 1172 else 1173 bIso = false; 1174 break; 1175 default: 1176 bIso = false; 1177 } 1178 } 1179 } 1180 } 1181 1182 if (nFound < 3) 1183 bIso = false; 1184 1185 if (bIso) 1186 { 1187 // Leave conversion and detection of various possible number 1188 // formats to the number formatter. ISO is recognized in any locale 1189 // so we can directly use the document's formatter. 1190 sal_uInt32 nFormat = 0; 1191 double fVal = 0.0; 1192 SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable(); 1193 if (pDocFormatter->IsNumberFormat( rStr, nFormat, fVal)) 1194 { 1195 if (pDocFormatter->GetType(nFormat) & SvNumFormatType::DATE) 1196 { 1197 ScAddress aPos(nCol,nRow,nTab); 1198 if (bUseDocImport) 1199 rDocImport.setNumericCell(aPos, fVal); 1200 else 1201 rDoc.SetValue(aPos, fVal); 1202 rDoc.SetNumberFormat(aPos, nFormat); 1203 1204 return bMultiLine; // success 1205 } 1206 } 1207 // If we reach here it is type Text (e.g. timezone or trailing 1208 // characters). Handled below. 1209 } 1210 1211 if ( nFound == 1 ) 1212 { 1213 // try to break one number (without separators) into date fields 1214 1215 sal_Int32 nDateStart = nStart[0]; 1216 sal_Int32 nDateLen = nEnd[0] + 1 - nDateStart; 1217 1218 if ( nDateLen >= 5 && nDateLen <= 8 && 1219 ScGlobal::getCharClass().isNumeric( rStr.copy( nDateStart, nDateLen ) ) ) 1220 { 1221 // 6 digits: 2 each for day, month, year 1222 // 8 digits: 4 for year, 2 each for day and month 1223 // 5 or 7 digits: first field is shortened by 1 1224 1225 bool bLongYear = ( nDateLen >= 7 ); 1226 bool bShortFirst = ( nDateLen == 5 || nDateLen == 7 ); 1227 1228 sal_uInt16 nFieldStart = nDateStart; 1229 for (sal_uInt16 nPos=0; nPos<3; nPos++) 1230 { 1231 sal_uInt16 nFieldEnd = nFieldStart + 1; // default: 2 digits 1232 if ( bLongYear && nPos == nYP ) 1233 nFieldEnd += 2; // 2 extra digits for long year 1234 if ( bShortFirst && nPos == 0 ) 1235 --nFieldEnd; // first field shortened? 1236 1237 nStart[nPos] = nFieldStart; 1238 nEnd[nPos] = nFieldEnd; 1239 nFieldStart = nFieldEnd + 1; 1240 } 1241 nFound = 3; 1242 } 1243 } 1244 1245 if (!bIso && nFound >= 3) 1246 { 1247 using namespace ::com::sun::star; 1248 bool bSecondCal = false; 1249 sal_uInt16 nDay = static_cast<sal_uInt16>(o3tl::toInt32(rStr.subView( nStart[nDP], nEnd[nDP]+1-nStart[nDP] ))); 1250 sal_uInt16 nYear = static_cast<sal_uInt16>(o3tl::toInt32(rStr.subView( nStart[nYP], nEnd[nYP]+1-nStart[nYP] ))); 1251 OUString aMStr = rStr.copy( nStart[nMP], nEnd[nMP]+1-nStart[nMP] ); 1252 sal_Int16 nMonth = static_cast<sal_Int16>(aMStr.toInt32()); 1253 if (!nMonth) 1254 { 1255 static constexpr OUString aSepShortened = u"SEP"_ustr; 1256 uno::Sequence< i18n::CalendarItem2 > xMonths; 1257 sal_Int32 i, nMonthCount; 1258 // first test all month names from local international 1259 xMonths = rCalendar.getMonths(); 1260 nMonthCount = xMonths.getLength(); 1261 for (i=0; i<nMonthCount && !nMonth; i++) 1262 { 1263 if ( rTransliteration.isEqual( aMStr, xMonths[i].FullName ) || 1264 rTransliteration.isEqual( aMStr, xMonths[i].AbbrevName ) ) 1265 nMonth = sal::static_int_cast<sal_Int16>( i+1 ); 1266 else if ( i == 8 && rTransliteration.isEqual( "SEPT", 1267 xMonths[i].AbbrevName ) && 1268 rTransliteration.isEqual( aMStr, aSepShortened ) ) 1269 { // correct English abbreviation is SEPT, 1270 // but data mostly contains SEP only 1271 nMonth = sal::static_int_cast<sal_Int16>( i+1 ); 1272 } 1273 } 1274 // if none found, then test english month names 1275 if ( !nMonth && pSecondCalendar && pSecondTransliteration ) 1276 { 1277 xMonths = pSecondCalendar->getMonths(); 1278 nMonthCount = xMonths.getLength(); 1279 for (i=0; i<nMonthCount && !nMonth; i++) 1280 { 1281 if ( pSecondTransliteration->isEqual( aMStr, xMonths[i].FullName ) || 1282 pSecondTransliteration->isEqual( aMStr, xMonths[i].AbbrevName ) ) 1283 { 1284 nMonth = sal::static_int_cast<sal_Int16>( i+1 ); 1285 bSecondCal = true; 1286 } 1287 else if ( i == 8 && pSecondTransliteration->isEqual( 1288 aMStr, aSepShortened ) ) 1289 { // correct English abbreviation is SEPT, 1290 // but data mostly contains SEP only 1291 nMonth = sal::static_int_cast<sal_Int16>( i+1 ); 1292 bSecondCal = true; 1293 } 1294 } 1295 } 1296 } 1297 1298 SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable(); 1299 if ( nYear < 100 ) 1300 nYear = pDocFormatter->ExpandTwoDigitYear( nYear ); 1301 1302 CalendarWrapper* pCalendar = (bSecondCal ? pSecondCalendar : &rCalendar); 1303 sal_Int16 nNumMonths = pCalendar->getNumberOfMonthsInYear(); 1304 if ( nDay && nMonth && nDay<=31 && nMonth<=nNumMonths ) 1305 { 1306 --nMonth; 1307 pCalendar->setValue( i18n::CalendarFieldIndex::DAY_OF_MONTH, nDay ); 1308 pCalendar->setValue( i18n::CalendarFieldIndex::MONTH, nMonth ); 1309 pCalendar->setValue( i18n::CalendarFieldIndex::YEAR, nYear ); 1310 sal_Int16 nHour, nMinute, nSecond; 1311 // #i14974# The imported value should have no fractional value, so set the 1312 // time fields to zero (ICU calendar instance defaults to current date/time) 1313 nHour = nMinute = nSecond = 0; 1314 if (nFound > 3) 1315 nHour = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[3], nEnd[3]+1-nStart[3]))); 1316 if (nFound > 4) 1317 nMinute = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[4], nEnd[4]+1-nStart[4]))); 1318 if (nFound > 5) 1319 nSecond = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[5], nEnd[5]+1-nStart[5]))); 1320 // do not use calendar's milliseconds, to avoid fractional part truncation 1321 double fFrac = 0.0; 1322 if (nFound > 6) 1323 { 1324 sal_Unicode cDec = '.'; 1325 OUString aT = OUStringChar(cDec) + rStr.subView( nStart[6], nEnd[6]+1-nStart[6]); 1326 rtl_math_ConversionStatus eStatus; 1327 double fV = rtl::math::stringToDouble( aT, cDec, 0, &eStatus ); 1328 if (eStatus == rtl_math_ConversionStatus_Ok) 1329 fFrac = fV / 86400.0; 1330 } 1331 sal_Int32 nPos; 1332 if (nFound > 3 && 1 <= nHour && nHour <= 12 // nHour 0 and >=13 can't be AM/PM 1333 && (nPos = nEnd[nFound-1] + 1) < nLen) 1334 { 1335 // Dreaded AM/PM may be following. 1336 while (nPos < nLen && rStr[nPos] == ' ') 1337 ++nPos; 1338 if (nPos < nLen) 1339 { 1340 sal_Int32 nStop = nPos; 1341 while (nStop < nLen && rStr[nStop] != ' ') 1342 ++nStop; 1343 OUString aAmPm = rStr.copy( nPos, nStop - nPos); 1344 // For AM only 12 needs to be treated, whereas for PM 1345 // it must not. Check both, locale and second/English 1346 // strings. 1347 if (nHour == 12 && 1348 (rTransliteration.isEqual( aAmPm, pFormatter->GetLocaleData()->getTimeAM()) || 1349 (pSecondTransliteration && pSecondTransliteration->isEqual( aAmPm, "AM")))) 1350 { 1351 nHour = 0; 1352 } 1353 else if (nHour < 12 && 1354 (rTransliteration.isEqual( aAmPm, pFormatter->GetLocaleData()->getTimePM()) || 1355 (pSecondTransliteration && pSecondTransliteration->isEqual( aAmPm, "PM")))) 1356 { 1357 nHour += 12; 1358 } 1359 } 1360 } 1361 pCalendar->setValue( i18n::CalendarFieldIndex::HOUR, nHour ); 1362 pCalendar->setValue( i18n::CalendarFieldIndex::MINUTE, nMinute ); 1363 pCalendar->setValue( i18n::CalendarFieldIndex::SECOND, nSecond ); 1364 pCalendar->setValue( i18n::CalendarFieldIndex::MILLISECOND, 0 ); 1365 if ( pCalendar->isValid() ) 1366 { 1367 // Whole days diff. 1368 double fDiff = DateTime::Sub( DateTime(pDocFormatter->GetNullDate()), 1369 pCalendar->getEpochStart()); 1370 // #i14974# must use getLocalDateTime to get the same 1371 // date values as set above 1372 double fDays = pCalendar->getLocalDateTime() + fFrac; 1373 fDays -= fDiff; 1374 1375 LanguageType eLatin, eCjk, eCtl; 1376 rDoc.GetLanguage( eLatin, eCjk, eCtl ); 1377 LanguageType eDocLang = eLatin; //! which language for date formats? 1378 1379 SvNumFormatType nType = (nFound > 3 ? SvNumFormatType::DATETIME : SvNumFormatType::DATE); 1380 sal_uLong nFormat = pDocFormatter->GetStandardFormat( nType, eDocLang ); 1381 // maybe there is a special format including seconds or milliseconds 1382 if (nFound > 5) 1383 nFormat = pDocFormatter->GetStandardFormat( fDays, nFormat, nType, eDocLang); 1384 1385 ScAddress aPos(nCol,nRow,nTab); 1386 if ( bUseDocImport ) 1387 rDocImport.setNumericCell(aPos, fDays); 1388 else 1389 rDoc.SetValue( aPos, fDays ); 1390 rDoc.SetNumberFormat(aPos, nFormat); 1391 1392 return bMultiLine; // success 1393 } 1394 } 1395 } 1396 } 1397 1398 // Standard or date not determined -> SetString / EditCell 1399 if( rStr.indexOf( '\n' ) == -1 ) 1400 { 1401 if (!bDetectNumFormat && nColFormat == SC_COL_STANDARD) 1402 { 1403 // Import a strict ISO 8601 date(+time) string even without 1404 // "Detect special numbers" or "Date (YMD)". 1405 do 1406 { 1407 // Simple pre-check before calling more expensive parser. 1408 // ([+-])(Y)YYYY-MM-DD 1409 if (rStr.getLength() < 10) 1410 break; 1411 const sal_Int32 n1 = rStr.indexOf('-', 1); 1412 if (n1 < 4) 1413 break; 1414 const sal_Int32 n2 = rStr.indexOf('-', n1 + 1); 1415 if (n2 < 7 || n1 + 3 < n2) 1416 break; 1417 1418 css::util::DateTime aDateTime; 1419 if (!sax::Converter::parseDateTime( aDateTime, rStr)) 1420 break; 1421 1422 sal_uInt32 nFormat = 0; 1423 double fVal = 0.0; 1424 SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable(); 1425 if (pDocFormatter->IsNumberFormat( rStr, nFormat, fVal)) 1426 { 1427 if (pDocFormatter->GetType(nFormat) & SvNumFormatType::DATE) 1428 { 1429 ScAddress aPos(nCol,nRow,nTab); 1430 if (bUseDocImport) 1431 rDocImport.setNumericCell(aPos, fVal); 1432 else 1433 rDoc.SetValue(aPos, fVal); 1434 rDoc.SetNumberFormat(aPos, nFormat); 1435 1436 return bMultiLine; // success 1437 } 1438 } 1439 } 1440 while(false); 1441 } 1442 1443 ScSetStringParam aParam; 1444 aParam.mpNumFormatter = pFormatter; 1445 aParam.mbDetectNumberFormat = bDetectNumFormat; 1446 aParam.mbDetectScientificNumberFormat = bDetectSciNumFormat; 1447 aParam.meSetTextNumFormat = ScSetStringParam::SpecialNumberOnly; 1448 aParam.mbHandleApostrophe = false; 1449 aParam.mbCheckLinkFormula = true; 1450 if ( bUseDocImport ) 1451 rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr, &aParam); 1452 else 1453 rDoc.SetString( nCol, nRow, nTab, rStr, &aParam ); 1454 } 1455 else 1456 { 1457 bMultiLine = true; 1458 ScFieldEditEngine& rEngine = rDoc.GetEditEngine(); 1459 rEngine.SetTextCurrentDefaults(rStr); 1460 if ( bUseDocImport ) 1461 rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject()); 1462 else 1463 rDoc.SetEditText( ScAddress( nCol, nRow, nTab ), rEngine.CreateTextObject() ); 1464 } 1465 return bMultiLine; 1466 } 1467 1468 static OUString lcl_GetFixed( const OUString& rLine, sal_Int32 nStart, sal_Int32 nNext, 1469 bool& rbIsQuoted, bool& rbOverflowCell ) 1470 { 1471 sal_Int32 nLen = rLine.getLength(); 1472 if (nNext > nLen) 1473 nNext = nLen; 1474 if ( nNext <= nStart ) 1475 return OUString(); 1476 1477 const sal_Unicode* pStr = rLine.getStr(); 1478 1479 sal_Int32 nSpace = nNext; 1480 while ( nSpace > nStart && pStr[nSpace-1] == ' ' ) 1481 --nSpace; 1482 1483 rbIsQuoted = (pStr[nStart] == '"' && pStr[nSpace-1] == '"'); 1484 if (rbIsQuoted) 1485 { 1486 bool bFits = (nSpace - nStart - 3 <= nArbitraryCellLengthLimit); 1487 if (bFits) 1488 return rLine.copy(nStart+1, std::max< sal_Int32 >(0, nSpace-nStart-2)); 1489 else 1490 { 1491 SAL_WARN( "sc", "lcl_GetFixed: line doesn't fit into data"); 1492 rbOverflowCell = true; 1493 return rLine.copy(nStart+1, nArbitraryCellLengthLimit); 1494 } 1495 } 1496 else 1497 { 1498 bool bFits = (nSpace - nStart <= nArbitraryCellLengthLimit); 1499 if (bFits) 1500 return rLine.copy(nStart, nSpace-nStart); 1501 else 1502 { 1503 SAL_WARN( "sc", "lcl_GetFixed: line doesn't fit into data"); 1504 rbOverflowCell = true; 1505 return rLine.copy(nStart, nArbitraryCellLengthLimit); 1506 } 1507 } 1508 } 1509 1510 bool ScImportExport::ExtText2Doc( SvStream& rStrm ) 1511 { 1512 if (!pExtOptions) 1513 return Text2Doc( rStrm ); 1514 1515 sal_uInt64 const nOldPos = rStrm.Tell(); 1516 sal_uInt64 const nRemaining = rStrm.remainingSize(); 1517 std::unique_ptr<ScProgress> xProgress( new ScProgress( pDocSh, 1518 ScResId( STR_LOAD_DOC ), nRemaining, true )); 1519 rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() ); 1520 // tdf#82254 - check whether to include a byte-order-mark in the output 1521 if (nOldPos != rStrm.Tell()) 1522 mbIncludeBOM = true; 1523 1524 SCCOL nStartCol = aRange.aStart.Col(); 1525 SCCOL nEndCol = aRange.aEnd.Col(); 1526 SCROW nStartRow = aRange.aStart.Row(); 1527 const SCTAB nTab = aRange.aStart.Tab(); 1528 1529 bool bFixed = pExtOptions->IsFixedLen(); 1530 OUString aSeps = pExtOptions->GetFieldSeps(); // Need non-const for ReadCsvLine(), 1531 const sal_Unicode* pSeps = aSeps.getStr(); // but it will be const anyway (asserted below). 1532 bool bMerge = pExtOptions->IsMergeSeps(); 1533 bool bRemoveSpace = pExtOptions->IsRemoveSpace(); 1534 sal_uInt16 nInfoCount = pExtOptions->GetInfoCount(); 1535 const sal_Int32* pColStart = pExtOptions->GetColStart(); 1536 const sal_uInt8* pColFormat = pExtOptions->GetColFormat(); 1537 tools::Long nSkipLines = pExtOptions->GetStartRow(); 1538 1539 LanguageType eDocLang = pExtOptions->GetLanguage(); 1540 SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eDocLang); 1541 bool bDetectNumFormat = pExtOptions->IsDetectSpecialNumber(); 1542 bool bDetectSciNumFormat = pExtOptions->IsDetectScientificNumber(); 1543 bool bEvaluateFormulas = pExtOptions->IsEvaluateFormulas(); 1544 bool bSkipEmptyCells = pExtOptions->IsSkipEmptyCells(); 1545 1546 // For date recognition 1547 ::utl::TransliterationWrapper aTransliteration( 1548 comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE ); 1549 aTransliteration.loadModuleIfNeeded( eDocLang ); 1550 CalendarWrapper aCalendar( comphelper::getProcessComponentContext() ); 1551 aCalendar.loadDefaultCalendar( 1552 LanguageTag::convertToLocale( eDocLang ) ); 1553 std::unique_ptr< ::utl::TransliterationWrapper > pEnglishTransliteration; 1554 std::unique_ptr< CalendarWrapper > pEnglishCalendar; 1555 if ( eDocLang != LANGUAGE_ENGLISH_US ) 1556 { 1557 pEnglishTransliteration.reset(new ::utl::TransliterationWrapper ( 1558 comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE )); 1559 aTransliteration.loadModuleIfNeeded( LANGUAGE_ENGLISH_US ); 1560 pEnglishCalendar.reset(new CalendarWrapper ( comphelper::getProcessComponentContext() )); 1561 pEnglishCalendar->loadDefaultCalendar( 1562 LanguageTag::convertToLocale( LANGUAGE_ENGLISH_US ) ); 1563 } 1564 1565 OUString aLine; 1566 OUString aCell; 1567 sal_uInt16 i; 1568 SCROW nRow = nStartRow; 1569 sal_Unicode cDetectSep = 0xffff; // No separator detection here. 1570 1571 while(--nSkipLines>0) 1572 { 1573 aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep); // content is ignored 1574 if ( rStrm.eof() ) 1575 break; 1576 } 1577 1578 // Determine range for Undo. 1579 // We don't need this during import of a file to a new sheet or document... 1580 bool bDetermineRange = bUndo; 1581 bool bColumnsAreDetermined = false; 1582 1583 // Row heights don't need to be adjusted on the fly if EndPaste() is called 1584 // afterwards, which happens only if bDetermineRange. This variable also 1585 // survives the toggle of bDetermineRange down at the end of the do{} loop. 1586 bool bRangeIsDetermined = bDetermineRange; 1587 1588 bool bQuotedAsText = pExtOptions && pExtOptions->IsQuotedAsText(); 1589 1590 sal_uInt64 nOriginalStreamPos = rStrm.Tell(); 1591 1592 SCROW nFirstUpdateRowHeight = SCROW_MAX; 1593 SCROW nLastUpdateRowHeight = -1; 1594 1595 ScDocumentImport aDocImport(rDoc); 1596 do 1597 { 1598 for( ;; ) 1599 { 1600 aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep); 1601 if ( rStrm.eof() && aLine.isEmpty() ) 1602 break; 1603 1604 assert(pSeps == aSeps.getStr()); 1605 1606 if ( nRow > rDoc.MaxRow() ) 1607 { 1608 bOverflowRow = true; // display warning on import 1609 break; // for 1610 } 1611 1612 if (!bDetermineRange) 1613 EmbeddedNullTreatment( aLine); 1614 1615 sal_Int32 nLineLen = aLine.getLength(); 1616 SCCOL nCol = nStartCol; 1617 bool bMultiLine = false; 1618 if ( bFixed ) // Fixed line length 1619 { 1620 if (bDetermineRange) 1621 { 1622 if (!bColumnsAreDetermined) 1623 { 1624 // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it 1625 // is only an overflow if there is really data following to 1626 // be put behind the last column, which doesn't happen if 1627 // info is SC_COL_SKIP. 1628 for (i=0; i < nInfoCount && nCol <= rDoc.MaxCol()+1; ++i) 1629 { 1630 const sal_uInt8 nFmt = pColFormat[i]; 1631 if (nFmt != SC_COL_SKIP) // otherwise don't increment nCol either 1632 { 1633 if (nCol > rDoc.MaxCol()) 1634 bOverflowCol = true; // display warning on import 1635 ++nCol; 1636 } 1637 } 1638 bColumnsAreDetermined = true; 1639 } 1640 } 1641 else 1642 { 1643 sal_Int32 nStartIdx = 0; 1644 // Same maxcol+1 check reason as above. 1645 for (i=0; i < nInfoCount && nCol <= rDoc.MaxCol()+1; ++i) 1646 { 1647 sal_Int32 nNextIdx = nStartIdx; 1648 if (i + 1 < nInfoCount) 1649 CountVisualWidth( aLine, nNextIdx, pColStart[i+1] - pColStart[i] ); 1650 else 1651 nNextIdx = nLineLen; 1652 sal_uInt8 nFmt = pColFormat[i]; 1653 if (nFmt != SC_COL_SKIP) // otherwise don't increment nCol either 1654 { 1655 if (nCol > rDoc.MaxCol()) 1656 bOverflowCol = true; // display warning on import 1657 else 1658 { 1659 bool bIsQuoted = false; 1660 aCell = lcl_GetFixed( aLine, nStartIdx, nNextIdx, bIsQuoted, bOverflowCell ); 1661 if (bIsQuoted && bQuotedAsText) 1662 nFmt = SC_COL_TEXT; 1663 1664 bMultiLine |= lcl_PutString( 1665 aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt, 1666 &aNumFormatter, bDetectNumFormat, bDetectSciNumFormat, bEvaluateFormulas, bSkipEmptyCells, 1667 aTransliteration, aCalendar, 1668 pEnglishTransliteration.get(), pEnglishCalendar.get()); 1669 } 1670 ++nCol; 1671 } 1672 nStartIdx = nNextIdx; 1673 } 1674 } 1675 } 1676 else // Search for the separator 1677 { 1678 SCCOL nSourceCol = 0; 1679 sal_uInt16 nInfoStart = 0; 1680 const sal_Unicode* p = aLine.getStr(); 1681 // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it is only an 1682 // overflow if there is really data following to be put behind 1683 // the last column, which doesn't happen if info is 1684 // SC_COL_SKIP. 1685 while (*p && nCol <= rDoc.MaxCol()+1) 1686 { 1687 bool bIsQuoted = false; 1688 p = ScImportExport::ScanNextFieldFromString( p, aCell, 1689 cStr, pSeps, bMerge, bIsQuoted, bOverflowCell, bRemoveSpace ); 1690 1691 sal_uInt8 nFmt = SC_COL_STANDARD; 1692 for ( i=nInfoStart; i<nInfoCount; i++ ) 1693 { 1694 if ( pColStart[i] == nSourceCol + 1 ) // pColStart is 1-based 1695 { 1696 nFmt = pColFormat[i]; 1697 nInfoStart = i + 1; // ColInfos are in succession 1698 break; // for 1699 } 1700 } 1701 if ( nFmt != SC_COL_SKIP ) 1702 { 1703 if (nCol > rDoc.MaxCol()) 1704 bOverflowCol = true; // display warning on import 1705 else if (!bDetermineRange) 1706 { 1707 if (bIsQuoted && bQuotedAsText) 1708 nFmt = SC_COL_TEXT; 1709 1710 bMultiLine |= lcl_PutString( 1711 aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt, 1712 &aNumFormatter, bDetectNumFormat, bDetectSciNumFormat, bEvaluateFormulas, bSkipEmptyCells, 1713 aTransliteration, aCalendar, 1714 pEnglishTransliteration.get(), pEnglishCalendar.get()); 1715 } 1716 ++nCol; 1717 } 1718 1719 ++nSourceCol; 1720 } 1721 } 1722 if (nEndCol < nCol) 1723 nEndCol = nCol; //! points to the next free or even rDoc.MaxCol()+2 1724 1725 if (!bDetermineRange) 1726 { 1727 if (bMultiLine && !bRangeIsDetermined && pDocSh) 1728 { // Adjust just once at the end for a whole range. 1729 nFirstUpdateRowHeight = std::min( nFirstUpdateRowHeight, nRow ); 1730 nLastUpdateRowHeight = std::max( nLastUpdateRowHeight, nRow ); 1731 } 1732 xProgress->SetStateOnPercent( rStrm.Tell() - nOldPos ); 1733 } 1734 ++nRow; 1735 } 1736 // so far nRow/nEndCol pointed to the next free 1737 if (nRow > nStartRow) 1738 --nRow; 1739 if (nEndCol > nStartCol) 1740 nEndCol = ::std::min( static_cast<SCCOL>(nEndCol - 1), rDoc.MaxCol()); 1741 1742 if (bDetermineRange) 1743 { 1744 aRange.aEnd.SetCol( nEndCol ); 1745 aRange.aEnd.SetRow( nRow ); 1746 1747 if ( !mbApi && nStartCol != nEndCol && 1748 !rDoc.IsBlockEmpty( nStartCol + 1, nStartRow, nEndCol, nRow, nTab ) ) 1749 { 1750 ScReplaceWarnBox aBox(ScDocShell::GetActiveDialogParent()); 1751 if (aBox.run() != RET_YES) 1752 { 1753 return false; 1754 } 1755 } 1756 1757 rStrm.Seek( nOriginalStreamPos ); 1758 nRow = nStartRow; 1759 if (!StartPaste()) 1760 { 1761 EndPaste(false); 1762 return false; 1763 } 1764 } 1765 1766 bDetermineRange = !bDetermineRange; // toggle 1767 } while (!bDetermineRange); 1768 1769 if ( !mbOverwriting ) 1770 aDocImport.finalize(); 1771 1772 xProgress.reset(); // make room for AdjustRowHeight progress 1773 1774 if( nFirstUpdateRowHeight < nLastUpdateRowHeight && pDocSh ) 1775 pDocSh->AdjustRowHeight( nFirstUpdateRowHeight, nLastUpdateRowHeight, nTab); 1776 1777 if (bRangeIsDetermined) 1778 EndPaste(false); 1779 1780 if (mbImportBroadcast && !mbOverwriting) 1781 { 1782 rDoc.BroadcastCells(aRange, SfxHintId::ScDataChanged); 1783 pDocSh->PostDataChanged(); 1784 } 1785 return true; 1786 } 1787 1788 void ScImportExport::EmbeddedNullTreatment( OUString & rStr ) 1789 { 1790 // A nasty workaround for data with embedded NULL characters. As long as we 1791 // can't handle them properly as cell content (things assume 0-terminated 1792 // strings at too many places) simply strip all NULL characters from raw 1793 // data. Excel does the same. See fdo#57841 for sample data. 1794 1795 // The normal case is no embedded NULL, check first before de-/allocating 1796 // ustring stuff. 1797 sal_Unicode cNull = 0; 1798 if (sal_Int32 pos = rStr.indexOf(cNull); pos >= 0) 1799 { 1800 rStr = rStr.replaceAll(std::u16string_view(&cNull, 1), u"", pos); 1801 } 1802 } 1803 1804 const sal_Unicode* ScImportExport::ScanNextFieldFromString( const sal_Unicode* p, 1805 OUString& rField, sal_Unicode cStr, const sal_Unicode* pSeps, bool bMergeSeps, bool& rbIsQuoted, 1806 bool& rbOverflowCell, bool bRemoveSpace ) 1807 { 1808 rbIsQuoted = false; 1809 rField.clear(); 1810 const sal_Unicode cBlank = ' '; 1811 if (cStr && !ScGlobal::UnicodeStrChr(pSeps, cBlank)) 1812 { 1813 // Cope with broken generators that put leading blanks before a quoted 1814 // field, like "field1", "field2", "..." 1815 // NOTE: this is not in conformance with http://tools.ietf.org/html/rfc4180 1816 const sal_Unicode* pb = p; 1817 while (*pb == cBlank) 1818 ++pb; 1819 if (*pb == cStr) 1820 p = pb; 1821 } 1822 if (cStr && *p == cStr) // String in quotes 1823 { 1824 rbIsQuoted = true; 1825 const sal_Unicode* p1; 1826 p1 = p = lcl_ScanString( p, rField, pSeps, cStr, DoubledQuoteMode::ESCAPE, rbOverflowCell ); 1827 while (!lcl_isFieldEnd( *p, pSeps)) 1828 p++; 1829 // Append remaining unquoted and undelimited data (dirty, dirty) to 1830 // this field. 1831 if (p > p1) 1832 { 1833 const sal_Unicode* ptrim_f = p; 1834 if ( bRemoveSpace ) 1835 { 1836 while ( ptrim_f > p1 && ( *(ptrim_f - 1) == cBlank ) ) 1837 --ptrim_f; 1838 } 1839 if (!lcl_appendLineData( rField, p1, ptrim_f)) 1840 rbOverflowCell = true; 1841 } 1842 if( *p ) 1843 p++; 1844 } 1845 else // up to delimiter 1846 { 1847 const sal_Unicode* p0 = p; 1848 while (!lcl_isFieldEnd( *p, pSeps)) 1849 p++; 1850 const sal_Unicode* ptrim_i = p0; 1851 const sal_Unicode* ptrim_f = p; // [ptrim_i,ptrim_f) is cell data after trimming 1852 if ( bRemoveSpace ) 1853 { 1854 while ( ptrim_i < ptrim_f && *ptrim_i == cBlank ) 1855 ++ptrim_i; 1856 while ( ptrim_f > ptrim_i && ( *(ptrim_f - 1) == cBlank ) ) 1857 --ptrim_f; 1858 } 1859 if (!lcl_appendLineData( rField, ptrim_i, ptrim_f)) 1860 rbOverflowCell = true; 1861 if( *p ) 1862 p++; 1863 } 1864 if ( bMergeSeps ) // skip following delimiters 1865 { 1866 while (*p && ScGlobal::UnicodeStrChr( pSeps, *p)) 1867 p++; 1868 } 1869 return p; 1870 } 1871 1872 namespace { 1873 1874 /** 1875 * Check if a given string has any line break characters or separators. 1876 * 1877 * @param rStr string to inspect. 1878 * @param cSep separator character. 1879 */ 1880 bool hasLineBreaksOrSeps( const OUString& rStr, sal_Unicode cSep ) 1881 { 1882 const sal_Unicode* p = rStr.getStr(); 1883 for (sal_Int32 i = 0, n = rStr.getLength(); i < n; ++i, ++p) 1884 { 1885 sal_Unicode c = *p; 1886 if (c == cSep) 1887 // separator found. 1888 return true; 1889 1890 switch (c) 1891 { 1892 case '\n': 1893 case '\r': 1894 // line break found. 1895 return true; 1896 default: 1897 ; 1898 } 1899 } 1900 return false; 1901 } 1902 1903 } 1904 1905 bool ScImportExport::Doc2Text( SvStream& rStrm ) 1906 { 1907 SCCOL nCol; 1908 SCROW nRow; 1909 SCCOL nStartCol = aRange.aStart.Col(); 1910 SCROW nStartRow = aRange.aStart.Row(); 1911 SCTAB nStartTab = aRange.aStart.Tab(); 1912 SCCOL nEndCol = aRange.aEnd.Col(); 1913 SCROW nEndRow = aRange.aEnd.Row(); 1914 SCTAB nEndTab = aRange.aEnd.Tab(); 1915 1916 if (!rDoc.GetClipParam().isMultiRange() && nStartTab == nEndTab) 1917 if (!rDoc.ShrinkToDataArea( nStartTab, nStartCol, nStartRow, nEndCol, nEndRow )) 1918 return false; 1919 1920 OUString aCellStr; 1921 1922 bool bConvertLF = (GetSystemLineEnd() != LINEEND_LF); 1923 1924 // We need to cache sc::ColumnBlockPosition per each column, tab is always nStartTab. 1925 std::vector< sc::ColumnBlockPosition > blockPos( nEndCol - nStartCol + 1 ); 1926 for( SCCOL i = nStartCol; i <= nEndCol; ++i ) 1927 rDoc.InitColumnBlockPosition( blockPos[ i - nStartCol ], nStartTab, i ); 1928 for (nRow = nStartRow; nRow <= nEndRow; nRow++) 1929 { 1930 if (bIncludeFiltered || !rDoc.RowFiltered( nRow, nStartTab )) 1931 { 1932 for (nCol = nStartCol; nCol <= nEndCol; nCol++) 1933 { 1934 ScAddress aPos(nCol, nRow, nStartTab); 1935 sal_uInt32 nNumFmt = rDoc.GetNumberFormat(aPos); 1936 SvNumberFormatter* pFormatter = rDoc.GetFormatTable(); 1937 1938 ScRefCellValue aCell(rDoc, aPos, blockPos[ nCol - nStartCol ]); 1939 switch (aCell.getType()) 1940 { 1941 case CELLTYPE_FORMULA: 1942 { 1943 if (bFormulas) 1944 { 1945 aCellStr = aCell.getFormula()->GetFormula(); 1946 if( aCellStr.indexOf( cSep ) != -1 ) 1947 lcl_WriteString( rStrm, aCellStr, cStr, cStr ); 1948 else 1949 rStrm.WriteUnicodeOrByteText(aCellStr); 1950 } 1951 else 1952 { 1953 const Color* pColor; 1954 aCellStr = ScCellFormat::GetString(aCell, nNumFmt, &pColor, *pFormatter, rDoc); 1955 1956 bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 ); 1957 if( bMultiLineText ) 1958 { 1959 if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace ) 1960 aCellStr = aCellStr.replaceAll( "\n", " " ); 1961 else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF ) 1962 aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd()); 1963 } 1964 1965 if( mExportTextOptions.mcSeparatorConvertTo && cSep ) 1966 aCellStr = aCellStr.replaceAll( OUStringChar(cSep), OUStringChar(mExportTextOptions.mcSeparatorConvertTo) ); 1967 1968 if( mExportTextOptions.mbAddQuotes && ( aCellStr.indexOf( cSep ) != -1 ) ) 1969 lcl_WriteString( rStrm, aCellStr, cStr, cStr ); 1970 else 1971 rStrm.WriteUnicodeOrByteText(aCellStr); 1972 } 1973 } 1974 break; 1975 case CELLTYPE_VALUE: 1976 { 1977 const Color* pColor; 1978 aCellStr = ScCellFormat::GetString(aCell, nNumFmt, &pColor, *pFormatter, rDoc); 1979 rStrm.WriteUnicodeOrByteText(aCellStr); 1980 } 1981 break; 1982 case CELLTYPE_NONE: 1983 break; 1984 default: 1985 { 1986 const Color* pColor; 1987 aCellStr = ScCellFormat::GetString(aCell, nNumFmt, &pColor, *pFormatter, rDoc); 1988 1989 bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 ); 1990 if( bMultiLineText ) 1991 { 1992 if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace ) 1993 aCellStr = aCellStr.replaceAll( "\n", " " ); 1994 else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF ) 1995 aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd()); 1996 } 1997 1998 if( mExportTextOptions.mcSeparatorConvertTo && cSep ) 1999 aCellStr = aCellStr.replaceAll( OUStringChar(cSep), OUStringChar(mExportTextOptions.mcSeparatorConvertTo) ); 2000 2001 if( mExportTextOptions.mbAddQuotes && hasLineBreaksOrSeps(aCellStr, cSep) ) 2002 lcl_WriteString( rStrm, aCellStr, cStr, cStr ); 2003 else 2004 rStrm.WriteUnicodeOrByteText(aCellStr); 2005 } 2006 } 2007 if( nCol < nEndCol ) 2008 rStrm.WriteUnicodeOrByteText(rtl::OUStringChar(cSep)); 2009 } 2010 // Do not append a line feed for one single cell. 2011 // NOTE: this Doc2Text() is only called for clipboard via 2012 // ScImportExport::ExportStream(). 2013 if (nStartRow != nEndRow || nStartCol != nEndCol) 2014 endlub(rStrm); 2015 if( rStrm.GetError() != ERRCODE_NONE ) 2016 break; 2017 if( nSizeLimit && rStrm.Tell() > nSizeLimit ) 2018 break; 2019 } 2020 } 2021 2022 return rStrm.GetError() == ERRCODE_NONE; 2023 } 2024 2025 bool ScImportExport::Sylk2Doc( SvStream& rStrm ) 2026 { 2027 bool bOk = true; 2028 bool bMyDoc = false; 2029 SylkVersion eVersion = SylkVersion::OTHER; 2030 2031 // US-English separators for StringToDouble 2032 sal_Unicode const cDecSep = '.'; 2033 sal_Unicode const cGrpSep = ','; 2034 2035 SCCOL nStartCol = aRange.aStart.Col(); 2036 SCROW nStartRow = aRange.aStart.Row(); 2037 SCCOL nEndCol = aRange.aEnd.Col(); 2038 SCROW nEndRow = aRange.aEnd.Row(); 2039 sal_uInt64 nOldPos = rStrm.Tell(); 2040 bool bData = !bSingle; 2041 ::std::vector< sal_uInt32 > aFormats; 2042 2043 if( !bSingle) 2044 bOk = StartPaste(); 2045 2046 while( bOk ) 2047 { 2048 OUString aLine; 2049 OUString aText; 2050 OStringBuffer aByteLine; 2051 SCCOL nCol = nStartCol; 2052 SCROW nRow = nStartRow; 2053 SCCOL nRefCol = nCol; 2054 SCROW nRefRow = nRow; 2055 rStrm.Seek( nOldPos ); 2056 for( ;; ) 2057 { 2058 //! allow unicode 2059 rStrm.ReadLine( aByteLine ); 2060 aLine = OStringToOUString(aByteLine, rStrm.GetStreamCharSet()); 2061 if( rStrm.eof() ) 2062 break; 2063 bool bInvalidCol = false; 2064 bool bInvalidRow = false; 2065 const sal_Unicode* p = aLine.getStr(); 2066 sal_Unicode cTag = *p++; 2067 if( cTag == 'C' ) // Content 2068 { 2069 if( *p++ != ';' ) 2070 return false; 2071 2072 bool bInvalidRefCol = false; 2073 bool bInvalidRefRow = false; 2074 while( *p ) 2075 { 2076 sal_Unicode ch = *p++; 2077 ch = ScGlobal::ToUpperAlpha( ch ); 2078 switch( ch ) 2079 { 2080 case 'X': 2081 { 2082 bInvalidCol = false; 2083 bool bFail = o3tl::checked_add<SCCOL>(o3tl::toInt32(std::u16string_view(p)), nStartCol - 1, nCol); 2084 if (bFail || nCol < 0 || rDoc.MaxCol() < nCol) 2085 { 2086 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol); 2087 nCol = std::clamp<SCCOL>(nCol, 0, rDoc.MaxCol()); 2088 bInvalidCol = bOverflowCol = true; 2089 } 2090 break; 2091 } 2092 case 'Y': 2093 { 2094 bInvalidRow = false; 2095 bool bFail = o3tl::checked_add(o3tl::toInt32(std::u16string_view(p)), nStartRow - 1, nRow); 2096 if (bFail || nRow < 0 || nMaxImportRow < nRow) 2097 { 2098 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow); 2099 nRow = std::clamp<SCROW>(nRow, 0, nMaxImportRow); 2100 bInvalidRow = bOverflowRow = true; 2101 } 2102 break; 2103 } 2104 case 'C': 2105 { 2106 bInvalidRefCol = false; 2107 bool bFail = o3tl::checked_add<SCCOL>(o3tl::toInt32(std::u16string_view(p)), nStartCol - 1, nRefCol); 2108 if (bFail || nRefCol < 0 || rDoc.MaxCol() < nRefCol) 2109 { 2110 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;C invalid nRefCol=" << nRefCol); 2111 nRefCol = std::clamp<SCCOL>(nRefCol, 0, rDoc.MaxCol()); 2112 bInvalidRefCol = bOverflowCol = true; 2113 } 2114 break; 2115 } 2116 case 'R': 2117 { 2118 bInvalidRefRow = false; 2119 bool bFail = o3tl::checked_add(o3tl::toInt32(std::u16string_view(p)), nStartRow - 1, nRefRow); 2120 if (bFail || nRefRow < 0 || nMaxImportRow < nRefRow) 2121 { 2122 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;R invalid nRefRow=" << nRefRow); 2123 nRefRow = std::clamp<SCROW>(nRefRow, 0, nMaxImportRow); 2124 bInvalidRefRow = bOverflowRow = true; 2125 } 2126 break; 2127 } 2128 case 'K': 2129 { 2130 if( !bSingle && 2131 ( nCol < nStartCol || nCol > nEndCol 2132 || nRow < nStartRow || nRow > nEndRow 2133 || nCol > rDoc.MaxCol() || nRow > nMaxImportRow 2134 || bInvalidCol || bInvalidRow ) ) 2135 break; 2136 if( !bData ) 2137 { 2138 if( nRow > nEndRow ) 2139 nEndRow = nRow; 2140 if( nCol > nEndCol ) 2141 nEndCol = nCol; 2142 break; 2143 } 2144 bool bText; 2145 if( *p == '"' ) 2146 { 2147 bText = true; 2148 aText.clear(); 2149 p = lcl_ScanSylkString( p, aText, eVersion); 2150 } 2151 else 2152 bText = false; 2153 const sal_Unicode* q = p; 2154 while( *q && *q != ';' ) 2155 q++; 2156 if ( (*q != ';' || *(q+1) != 'I') && !bInvalidCol && !bInvalidRow ) 2157 { // don't ignore value 2158 if( bText ) 2159 { 2160 rDoc.EnsureTable(aRange.aStart.Tab()); 2161 rDoc.SetTextCell( 2162 ScAddress(nCol, nRow, aRange.aStart.Tab()), aText); 2163 } 2164 else 2165 { 2166 double fVal = rtl_math_uStringToDouble( p, 2167 aLine.getStr() + aLine.getLength(), 2168 cDecSep, cGrpSep, nullptr, nullptr ); 2169 rDoc.SetValue( nCol, nRow, aRange.aStart.Tab(), fVal ); 2170 } 2171 } 2172 } 2173 break; 2174 case 'E': 2175 case 'M': 2176 { 2177 if ( ch == 'M' ) 2178 { 2179 if ( nRefCol < nCol ) 2180 nRefCol = nCol; 2181 if ( nRefRow < nRow ) 2182 nRefRow = nRow; 2183 if ( !bData ) 2184 { 2185 if( nRefRow > nEndRow ) 2186 nEndRow = nRefRow; 2187 if( nRefCol > nEndCol ) 2188 nEndCol = nRefCol; 2189 } 2190 } 2191 if( !bMyDoc || !bData ) 2192 break; 2193 aText = "="; 2194 p = lcl_ScanSylkFormula( p, aText, eVersion); 2195 2196 if (bInvalidCol || bInvalidRow || (ch == 'M' && (bInvalidRefCol || bInvalidRefRow))) 2197 break; 2198 2199 ScAddress aPos( nCol, nRow, aRange.aStart.Tab() ); 2200 /* FIXME: do we want GRAM_ODFF_A1 instead? At the 2201 * end it probably should be GRAM_ODFF_R1C1, since 2202 * R1C1 is what Excel writes in SYLK, or even 2203 * better GRAM_ENGLISH_XL_R1C1. */ 2204 const formula::FormulaGrammar::Grammar eGrammar = formula::FormulaGrammar::GRAM_PODF_A1; 2205 ScCompiler aComp(rDoc, aPos, eGrammar); 2206 std::unique_ptr<ScTokenArray> xCode(aComp.CompileString(aText)); // ctor/InsertMatrixFormula did copy TokenArray 2207 rDoc.CheckLinkFormulaNeedingCheck(*xCode); 2208 if ( ch == 'M' ) 2209 { 2210 ScMarkData aMark(rDoc.GetSheetLimits()); 2211 aMark.SelectTable( aPos.Tab(), true ); 2212 rDoc.InsertMatrixFormula( nCol, nRow, nRefCol, 2213 nRefRow, aMark, OUString(), xCode.get() ); 2214 } 2215 else 2216 { 2217 ScFormulaCell* pFCell = new ScFormulaCell( 2218 rDoc, aPos, *xCode, eGrammar, ScMatrixMode::NONE); 2219 rDoc.SetFormulaCell(aPos, pFCell); 2220 } 2221 } 2222 break; 2223 } 2224 while( *p && *p != ';' ) 2225 p++; 2226 if( *p ) 2227 p++; 2228 } 2229 } 2230 else if( cTag == 'F' ) // Format 2231 { 2232 if( *p++ != ';' ) 2233 return false; 2234 sal_Int32 nFormat = -1; 2235 while( *p ) 2236 { 2237 sal_Unicode ch = *p++; 2238 ch = ScGlobal::ToUpperAlpha( ch ); 2239 switch( ch ) 2240 { 2241 case 'X': 2242 { 2243 bInvalidCol = false; 2244 bool bFail = o3tl::checked_add<SCCOL>(o3tl::toInt32(std::u16string_view(p)), nStartCol - 1, nCol); 2245 if (bFail || nCol < 0 || rDoc.MaxCol() < nCol) 2246 { 2247 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol); 2248 nCol = std::clamp<SCCOL>(nCol, 0, rDoc.MaxCol()); 2249 bInvalidCol = bOverflowCol = true; 2250 } 2251 break; 2252 } 2253 case 'Y': 2254 { 2255 bInvalidRow = false; 2256 bool bFail = o3tl::checked_add(o3tl::toInt32(std::u16string_view(p)), nStartRow - 1, nRow); 2257 if (bFail || nRow < 0 || nMaxImportRow < nRow) 2258 { 2259 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow); 2260 nRow = std::clamp<SCROW>(nRow, 0, nMaxImportRow); 2261 bInvalidRow = bOverflowRow = true; 2262 } 2263 break; 2264 } 2265 case 'P' : 2266 if ( bData ) 2267 { 2268 // F;P<n> sets format code of P;P<code> at 2269 // current position, or at ;X;Y if specified. 2270 // Note that ;X;Y may appear after ;P 2271 const sal_Unicode* p0 = p; 2272 while( *p && *p != ';' ) 2273 p++; 2274 OUString aNumber(p0, p - p0); 2275 nFormat = aNumber.toInt32(); 2276 } 2277 break; 2278 } 2279 while( *p && *p != ';' ) 2280 p++; 2281 if( *p ) 2282 p++; 2283 } 2284 if ( !bData ) 2285 { 2286 if( nRow > nEndRow ) 2287 nEndRow = nRow; 2288 if( nCol > nEndCol ) 2289 nEndCol = nCol; 2290 } 2291 if ( 0 <= nFormat && o3tl::make_unsigned(nFormat) < aFormats.size() && !bInvalidCol && !bInvalidRow ) 2292 { 2293 sal_uInt32 nKey = aFormats[nFormat]; 2294 rDoc.ApplyAttr( nCol, nRow, aRange.aStart.Tab(), 2295 SfxUInt32Item( ATTR_VALUE_FORMAT, nKey ) ); 2296 } 2297 } 2298 else if( cTag == 'P' ) 2299 { 2300 if ( bData && *p == ';' && *(p+1) == 'P' ) 2301 { 2302 OUString aCode( p+2 ); 2303 2304 sal_uInt32 nKey; 2305 sal_Int32 nCheckPos; 2306 2307 if (aCode.getLength() > 2048 && utl::ConfigManager::IsFuzzing()) 2308 { 2309 // consider an excessive length as a failure when fuzzing 2310 nCheckPos = 1; 2311 } 2312 else 2313 { 2314 // unescape doubled semicolons 2315 aCode = aCode.replaceAll(";;", ";"); 2316 // get rid of Xcl escape characters 2317 aCode = aCode.replaceAll("\x1b", ""); 2318 SvNumFormatType nType; 2319 rDoc.GetFormatTable()->PutandConvertEntry( aCode, nCheckPos, nType, nKey, 2320 LANGUAGE_ENGLISH_US, ScGlobal::eLnge, false); 2321 } 2322 2323 if ( nCheckPos ) 2324 nKey = 0; 2325 2326 aFormats.push_back( nKey ); 2327 } 2328 } 2329 else if (cTag == 'I' && *p == 'D' && aLine.getLength() > 4) 2330 { 2331 aLine = aLine.copy(4); 2332 if (aLine == "CALCOOO32") 2333 eVersion = SylkVersion::OOO32; 2334 else if (aLine == "SCALC3") 2335 eVersion = SylkVersion::SCALC3; 2336 bMyDoc = (eVersion <= SylkVersion::OWN); 2337 } 2338 else if( cTag == 'E' ) // End 2339 break; 2340 } 2341 if( !bData ) 2342 { 2343 aRange.aEnd.SetCol( nEndCol ); 2344 aRange.aEnd.SetRow( nEndRow ); 2345 bOk = StartPaste(); 2346 bData = true; 2347 } 2348 else 2349 break; 2350 } 2351 2352 EndPaste(); 2353 return bOk; 2354 } 2355 2356 bool ScImportExport::Doc2Sylk( SvStream& rStrm ) 2357 { 2358 SCCOL nCol; 2359 SCROW nRow; 2360 SCCOL nStartCol = aRange.aStart.Col(); 2361 SCROW nStartRow = aRange.aStart.Row(); 2362 SCCOL nEndCol = aRange.aEnd.Col(); 2363 SCROW nEndRow = aRange.aEnd.Row(); 2364 OUString aCellStr; 2365 OUString aValStr; 2366 rStrm.WriteUnicodeOrByteText(u"ID;PCALCOOO32"); 2367 endlub(rStrm); 2368 2369 for (nRow = nStartRow; nRow <= nEndRow; nRow++) 2370 { 2371 for (nCol = nStartCol; nCol <= nEndCol; nCol++) 2372 { 2373 OUString aBufStr; 2374 double nVal; 2375 bool bForm = false; 2376 SCROW r = nRow - nStartRow + 1; 2377 SCCOL c = nCol - nStartCol + 1; 2378 ScRefCellValue aCell(rDoc, ScAddress(nCol, nRow, aRange.aStart.Tab())); 2379 CellType eType = aCell.getType(); 2380 switch( eType ) 2381 { 2382 case CELLTYPE_FORMULA: 2383 bForm = bFormulas; 2384 if( rDoc.HasValueData( nCol, nRow, aRange.aStart.Tab()) ) 2385 goto hasvalue; 2386 else 2387 goto hasstring; 2388 2389 case CELLTYPE_VALUE: 2390 hasvalue: 2391 nVal = rDoc.GetValue( nCol, nRow, aRange.aStart.Tab() ); 2392 2393 aValStr = ::rtl::math::doubleToUString( nVal, 2394 rtl_math_StringFormat_Automatic, 2395 rtl_math_DecimalPlaces_Max, '.', true ); 2396 2397 aBufStr = "C;X" 2398 + OUString::number( c ) 2399 + ";Y" 2400 + OUString::number( r ) 2401 + ";K" 2402 + aValStr; 2403 rStrm.WriteUnicodeOrByteText(aBufStr); 2404 goto checkformula; 2405 2406 case CELLTYPE_STRING: 2407 case CELLTYPE_EDIT: 2408 hasstring: 2409 aCellStr = rDoc.GetString(nCol, nRow, aRange.aStart.Tab()); 2410 aCellStr = aCellStr.replaceAll("\n", SYLK_LF); 2411 2412 aBufStr = "C;X" 2413 + OUString::number( c ) 2414 + ";Y" 2415 + OUString::number( r ) 2416 + ";K"; 2417 rStrm.WriteUnicodeOrByteText(aBufStr); 2418 lcl_WriteString( rStrm, aCellStr, '"', ';' ); 2419 2420 checkformula: 2421 if( bForm ) 2422 { 2423 const ScFormulaCell* pFCell = aCell.getFormula(); 2424 switch ( pFCell->GetMatrixFlag() ) 2425 { 2426 case ScMatrixMode::Reference : 2427 aCellStr.clear(); 2428 break; 2429 default: 2430 aCellStr = pFCell->GetFormula( formula::FormulaGrammar::GRAM_PODF_A1); 2431 /* FIXME: do we want GRAM_ODFF_A1 instead? At 2432 * the end it probably should be 2433 * GRAM_ODFF_R1C1, since R1C1 is what Excel 2434 * writes in SYLK, or even better 2435 * GRAM_ENGLISH_XL_R1C1. */ 2436 } 2437 if ( pFCell->GetMatrixFlag() != ScMatrixMode::NONE && 2438 aCellStr.startsWith("{") && 2439 aCellStr.endsWith("}") ) 2440 { // cut off matrix {} characters 2441 aCellStr = aCellStr.copy(1, aCellStr.getLength()-2); 2442 } 2443 if ( aCellStr[0] == '=' ) 2444 aCellStr = aCellStr.copy(1); 2445 OUString aPrefix; 2446 switch ( pFCell->GetMatrixFlag() ) 2447 { 2448 case ScMatrixMode::Formula : 2449 { // diff expression with 'M' M$-extension 2450 SCCOL nC; 2451 SCROW nR; 2452 pFCell->GetMatColsRows( nC, nR ); 2453 nC += c - 1; 2454 nR += r - 1; 2455 aPrefix = ";R" 2456 + OUString::number( nR ) 2457 + ";C" 2458 + OUString::number( nC ) 2459 + ";M"; 2460 } 2461 break; 2462 case ScMatrixMode::Reference : 2463 { // diff expression with 'I' M$-extension 2464 ScAddress aPos; 2465 (void)pFCell->GetMatrixOrigin( rDoc, aPos ); 2466 aPrefix = ";I;R" 2467 + OUString::number( aPos.Row() - nStartRow + 1 ) 2468 + ";C" 2469 + OUString::number( aPos.Col() - nStartCol + 1 ); 2470 } 2471 break; 2472 default: 2473 // formula Expression 2474 aPrefix = ";E"; 2475 } 2476 rStrm.WriteUnicodeOrByteText(aPrefix); 2477 if ( !aCellStr.isEmpty() ) 2478 lcl_WriteString( rStrm, aCellStr, 0, ';' ); 2479 } 2480 endlub(rStrm); 2481 break; 2482 2483 default: 2484 { 2485 // added to avoid warnings 2486 } 2487 } 2488 } 2489 } 2490 rStrm.WriteUnicodeOrByteText(u"E"); 2491 endlub(rStrm); 2492 return rStrm.GetError() == ERRCODE_NONE; 2493 } 2494 2495 bool ScImportExport::Doc2HTML( SvStream& rStrm, const OUString& rBaseURL ) 2496 { 2497 // rtl_TextEncoding is ignored in ScExportHTML, read from Load/Save HTML options 2498 ScFormatFilter::Get().ScExportHTML( rStrm, rBaseURL, &rDoc, aRange, RTL_TEXTENCODING_DONTKNOW, bAll, 2499 aStreamPath, aNonConvertibleChars, maFilterOptions ); 2500 return rStrm.GetError() == ERRCODE_NONE; 2501 } 2502 2503 bool ScImportExport::Doc2RTF( SvStream& rStrm ) 2504 { 2505 // rtl_TextEncoding is ignored in ScExportRTF 2506 ScFormatFilter::Get().ScExportRTF( rStrm, &rDoc, aRange, RTL_TEXTENCODING_DONTKNOW ); 2507 return rStrm.GetError() == ERRCODE_NONE; 2508 } 2509 2510 bool ScImportExport::Doc2Dif( SvStream& rStrm ) 2511 { 2512 // for DIF in the clipboard, IBM_850 is always used 2513 ScFormatFilter::Get().ScExportDif( rStrm, &rDoc, aRange, RTL_TEXTENCODING_IBM_850 ); 2514 return true; 2515 } 2516 2517 bool ScImportExport::Dif2Doc( SvStream& rStrm ) 2518 { 2519 SCTAB nTab = aRange.aStart.Tab(); 2520 ScDocumentUniquePtr pImportDoc( new ScDocument( SCDOCMODE_UNDO ) ); 2521 pImportDoc->InitUndo( rDoc, nTab, nTab ); 2522 2523 // for DIF in the clipboard, IBM_850 is always used 2524 ScFormatFilter::Get().ScImportDif( rStrm, pImportDoc.get(), aRange.aStart, RTL_TEXTENCODING_IBM_850 ); 2525 2526 SCCOL nEndCol; 2527 SCROW nEndRow; 2528 pImportDoc->GetCellArea( nTab, nEndCol, nEndRow ); 2529 // if there are no cells in the imported content, nEndCol/nEndRow may be before the start 2530 if ( nEndCol < aRange.aStart.Col() ) 2531 nEndCol = aRange.aStart.Col(); 2532 if ( nEndRow < aRange.aStart.Row() ) 2533 nEndRow = aRange.aStart.Row(); 2534 aRange.aEnd = ScAddress( nEndCol, nEndRow, nTab ); 2535 2536 bool bOk = StartPaste(); 2537 if (bOk) 2538 { 2539 InsertDeleteFlags nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES; 2540 rDoc.DeleteAreaTab( aRange, nFlags ); 2541 pImportDoc->CopyToDocument(aRange, nFlags, false, rDoc); 2542 EndPaste(); 2543 } 2544 2545 return bOk; 2546 } 2547 2548 bool ScImportExport::RTF2Doc( SvStream& rStrm, const OUString& rBaseURL ) 2549 { 2550 std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateRTFImport( &rDoc, aRange ); 2551 if (!pImp) 2552 return false; 2553 pImp->Read( rStrm, rBaseURL ); 2554 aRange = pImp->GetRange(); 2555 2556 bool bOk = StartPaste(); 2557 if (bOk) 2558 { 2559 InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES; 2560 rDoc.DeleteAreaTab( aRange, nFlags ); 2561 pImp->WriteToDocument(); 2562 EndPaste(); 2563 } 2564 return bOk; 2565 } 2566 2567 bool ScImportExport::HTML2Doc( SvStream& rStrm, const OUString& rBaseURL ) 2568 { 2569 std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateHTMLImport( &rDoc, rBaseURL, aRange); 2570 if (!pImp) 2571 return false; 2572 pImp->Read( rStrm, rBaseURL ); 2573 aRange = pImp->GetRange(); 2574 2575 bool bOk = StartPaste(); 2576 if (bOk) 2577 { 2578 // ScHTMLImport may call ScDocument::InitDrawLayer, resulting in 2579 // a Draw Layer but no Draw View -> create Draw Layer and View here 2580 if (pDocSh) 2581 pDocSh->MakeDrawLayer(); 2582 2583 InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES; 2584 rDoc.DeleteAreaTab( aRange, nFlags ); 2585 2586 if (pExtOptions) 2587 { 2588 // Pick up import options if available. 2589 LanguageType eLang = pExtOptions->GetLanguage(); 2590 SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eLang); 2591 bool bSpecialNumber = pExtOptions->IsDetectSpecialNumber(); 2592 bool bScientificNumber = pExtOptions->IsDetectScientificNumber(); 2593 pImp->WriteToDocument(false, 1.0, &aNumFormatter, bSpecialNumber, bScientificNumber); 2594 } 2595 else 2596 // Regular import, with no options. 2597 pImp->WriteToDocument(); 2598 2599 EndPaste(); 2600 } 2601 return bOk; 2602 } 2603 2604 #ifndef DISABLE_DYNLOADING 2605 2606 extern "C" { static void thisModule() {} } 2607 2608 #else 2609 2610 extern "C" { 2611 ScFormatFilterPlugin* ScFilterCreate(); 2612 } 2613 2614 #endif 2615 2616 typedef ScFormatFilterPlugin * (*FilterFn)(); 2617 ScFormatFilterPlugin &ScFormatFilter::Get() 2618 { 2619 static ScFormatFilterPlugin *plugin = []() 2620 { 2621 #ifndef DISABLE_DYNLOADING 2622 OUString sFilterLib(SVLIBRARY("scfilt")); 2623 static ::osl::Module aModule; 2624 bool bLoaded = aModule.is(); 2625 if (!bLoaded) 2626 bLoaded = aModule.loadRelative(&thisModule, sFilterLib); 2627 if (!bLoaded) 2628 bLoaded = aModule.load(sFilterLib); 2629 if (bLoaded) 2630 { 2631 oslGenericFunction fn = aModule.getFunctionSymbol( "ScFilterCreate" ); 2632 if (fn != nullptr) 2633 return reinterpret_cast<FilterFn>(fn)(); 2634 } 2635 assert(false); 2636 return static_cast<ScFormatFilterPlugin*>(nullptr); 2637 #else 2638 return ScFilterCreate(); 2639 #endif 2640 }(); 2641 2642 return *plugin; 2643 } 2644 2645 // Precondition: pStr is guaranteed to be non-NULL and points to a 0-terminated 2646 // array. 2647 static const sal_Unicode* lcl_UnicodeStrChr( const sal_Unicode* pStr, 2648 sal_Unicode c ) 2649 { 2650 while (*pStr) 2651 { 2652 if (*pStr == c) 2653 return pStr; 2654 ++pStr; 2655 } 2656 return nullptr; 2657 } 2658 2659 ScImportStringStream::ScImportStringStream( const OUString& rStr ) 2660 : SvMemoryStream( const_cast<sal_Unicode *>(rStr.getStr()), 2661 rStr.getLength() * sizeof(sal_Unicode), StreamMode::READ) 2662 { 2663 SetStreamCharSet( RTL_TEXTENCODING_UNICODE ); 2664 #ifdef OSL_BIGENDIAN 2665 SetEndian(SvStreamEndian::BIG); 2666 #else 2667 SetEndian(SvStreamEndian::LITTLE); 2668 #endif 2669 } 2670 2671 OUString ReadCsvLine( SvStream &rStream, bool bEmbeddedLineBreak, 2672 OUString& rFieldSeparators, sal_Unicode cFieldQuote, sal_Unicode& rcDetectSep, sal_uInt32 nMaxSourceLines ) 2673 { 2674 enum RetryState 2675 { 2676 FORBID, 2677 ALLOW, 2678 RETRY, 2679 RETRIED 2680 } eRetryState = (bEmbeddedLineBreak && rcDetectSep == 0 ? RetryState::ALLOW : RetryState::FORBID); 2681 2682 sal_uInt64 nStreamPos = (eRetryState == RetryState::ALLOW ? rStream.Tell() : 0); 2683 2684 Label_RetryWithNewSep: 2685 2686 if (eRetryState == RetryState::RETRY) 2687 { 2688 eRetryState = RetryState::RETRIED; 2689 rStream.Seek( nStreamPos); 2690 } 2691 2692 OUString aStr; 2693 rStream.ReadUniOrByteStringLine(aStr, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit); 2694 2695 if (bEmbeddedLineBreak) 2696 { 2697 sal_Int32 nFirstLineLength = aStr.getLength(); 2698 sal_uInt64 nFirstLineStreamPos = rStream.Tell(); 2699 sal_uInt32 nLine = 0; 2700 2701 const sal_Unicode* pSeps = rFieldSeparators.getStr(); 2702 2703 QuoteType eQuoteState = FIELDEND_QUOTE; 2704 bool bFieldStart = true; 2705 2706 sal_Int32 nLastOffset = 0; 2707 sal_Int32 nQuotes = 0; 2708 while (!rStream.eof() && aStr.getLength() < nArbitraryLineLengthLimit) 2709 { 2710 const sal_Unicode * p = aStr.getStr() + nLastOffset; 2711 const sal_Unicode * const pStop = aStr.getStr() + aStr.getLength(); 2712 while (p < pStop) 2713 { 2714 if (!*p) 2715 { 2716 // Skip embedded null-characters. They don't change 2717 // anything and are handled at a higher level. 2718 ++p; 2719 continue; 2720 } 2721 2722 if (nQuotes) 2723 { 2724 if (*p == cFieldQuote) 2725 { 2726 if (bFieldStart) 2727 { 2728 ++nQuotes; 2729 bFieldStart = false; 2730 eQuoteState = FIELDSTART_QUOTE; 2731 nFirstLineLength = aStr.getLength(); 2732 nFirstLineStreamPos = rStream.Tell(); 2733 } 2734 // Do not detect a FIELDSTART_QUOTE if not in 2735 // bFieldStart mode, in which case for unquoted content 2736 // we are in FIELDEND_QUOTE state. 2737 else if (eQuoteState != FIELDEND_QUOTE) 2738 { 2739 eQuoteState = lcl_isEscapedOrFieldEndQuote( nQuotes, p, pSeps, cFieldQuote, rcDetectSep); 2740 2741 if (eRetryState == RetryState::ALLOW && rcDetectSep) 2742 { 2743 eRetryState = RetryState::RETRY; 2744 rFieldSeparators += OUStringChar(rcDetectSep); 2745 pSeps = rFieldSeparators.getStr(); 2746 goto Label_RetryWithNewSep; 2747 } 2748 2749 // DONTKNOW_QUOTE is an embedded unescaped quote we 2750 // don't count for pairing. 2751 if (eQuoteState != DONTKNOW_QUOTE) 2752 ++nQuotes; 2753 } 2754 } 2755 else if (eQuoteState == FIELDEND_QUOTE) 2756 { 2757 if (bFieldStart) 2758 // If blank is a separator it starts a field, if it 2759 // is not and thus maybe leading before quote we 2760 // are still at start of field regarding quotes. 2761 bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr); 2762 else 2763 bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr); 2764 } 2765 } 2766 else 2767 { 2768 if (*p == cFieldQuote && bFieldStart) 2769 { 2770 nQuotes = 1; 2771 eQuoteState = FIELDSTART_QUOTE; 2772 bFieldStart = false; 2773 nFirstLineLength = aStr.getLength(); 2774 nFirstLineStreamPos = rStream.Tell(); 2775 } 2776 else if (eQuoteState == FIELDEND_QUOTE) 2777 { 2778 // This also skips leading blanks at beginning of line 2779 // if followed by a quote. It's debatable whether we 2780 // actually want that or not, but congruent with what 2781 // ScanNextFieldFromString() does. 2782 if (bFieldStart) 2783 bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr); 2784 else 2785 bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr); 2786 } 2787 } 2788 // A quote character inside a field content does not start 2789 // a quote. 2790 ++p; 2791 } 2792 2793 if ((nQuotes & 1) == 0) 2794 // We still have a (theoretical?) problem here if due to 2795 // nArbitraryLineLengthLimit (or nMaxSourceLines below) we 2796 // split a string right between a doubled quote pair. 2797 break; 2798 else if (eQuoteState == DONTKNOW_QUOTE) 2799 // A single unescaped quote somewhere in a quote started 2800 // field, most likely that was not meant to have embedded 2801 // linefeeds either. 2802 break; 2803 else if (++nLine >= nMaxSourceLines && nMaxSourceLines > 0) 2804 // Unconditionally increment nLine even if nMaxSourceLines==0 2805 // so it can be observed in debugger. 2806 break; 2807 else 2808 { 2809 nLastOffset = aStr.getLength(); 2810 OUString aNext; 2811 rStream.ReadUniOrByteStringLine(aNext, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit); 2812 if (!rStream.eof()) 2813 aStr += "\n" + aNext; 2814 } 2815 } 2816 if (nQuotes & 1) 2817 { 2818 // No closing quote at all. A single quote at field start => no 2819 // embedded linefeeds for that field, take only first logical line. 2820 aStr = aStr.copy( 0, nFirstLineLength); 2821 rStream.Seek( nFirstLineStreamPos); 2822 } 2823 } 2824 return aStr; 2825 } 2826 2827 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ 2828
