1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 2 /* 3 * This file is part of the LibreOffice project. 4 * 5 * This Source Code Form is subject to the terms of the Mozilla Public 6 * License, v. 2.0. If a copy of the MPL was not distributed with this 7 * file, You can obtain one at http://mozilla.org/MPL/2.0/. 8 * 9 * This file incorporates work covered by the following license notice: 10 * 11 * Licensed to the Apache Software Foundation (ASF) under one or more 12 * contributor license agreements. See the NOTICE file distributed 13 * with this work for additional information regarding copyright 14 * ownership. The ASF licenses this file to you under the Apache 15 * License, Version 2.0 (the "License"); you may not use this file 16 * except in compliance with the License. You may obtain a copy of 17 * the License at http://www.apache.org/licenses/LICENSE-2.0 . 18 */ 19 20 #include <comphelper/processfactory.hxx> 21 #include <i18nlangtag/languagetag.hxx> 22 #include <sot/formats.hxx> 23 #include <sfx2/mieclip.hxx> 24 #include <com/sun/star/i18n/CalendarFieldIndex.hpp> 25 #include <sal/log.hxx> 26 #include <unotools/charclass.hxx> 27 #include <osl/module.hxx> 28 29 #include <global.hxx> 30 #include <docsh.hxx> 31 #include <undoblk.hxx> 32 #include <rangenam.hxx> 33 #include <tabvwsh.hxx> 34 #include <filter.hxx> 35 #include <asciiopt.hxx> 36 #include <formulacell.hxx> 37 #include <cellform.hxx> 38 #include <progress.hxx> 39 #include <scitems.hxx> 40 #include <editable.hxx> 41 #include <compiler.hxx> 42 #include <warnbox.hxx> 43 #include <clipparam.hxx> 44 #include <impex.hxx> 45 #include <editutil.hxx> 46 #include <patattr.hxx> 47 #include <docpool.hxx> 48 #include <stringutil.hxx> 49 #include <cellvalue.hxx> 50 #include <tokenarray.hxx> 51 #include <documentimport.hxx> 52 #include <refundo.hxx> 53 #include <mtvelements.hxx> 54 55 #include <globstr.hrc> 56 #include <scresid.hxx> 57 #include <o3tl/safeint.hxx> 58 #include <tools/svlibrary.h> 59 #include <unotools/configmgr.hxx> 60 #include <vcl/svapp.hxx> 61 #include <vcl/weld.hxx> 62 #include <editeng/editobj.hxx> 63 64 #include <memory> 65 #include <osl/endian.h> 66 67 // We don't want to end up with 2GB read in one line just because of malformed 68 // multiline fields, so chop it _somewhere_, which is twice supported columns 69 // times maximum cell content length, 2*1024*64K=128M, and because it's 70 // sal_Unicode that's 256MB. If it's 2GB of data without LF we're out of luck 71 // anyway. 72 static const sal_Int32 nArbitraryLineLengthLimit = 2 * MAXCOLCOUNT * 65536; 73 74 namespace 75 { 76 const char SYLK_LF[] = "\x1b :"; 77 78 bool lcl_IsEndianSwap( const SvStream& rStrm ) 79 { 80 #ifdef OSL_BIGENDIAN 81 return rStrm.GetEndian() != SvStreamEndian::BIG; 82 #else 83 return rStrm.GetEndian() != SvStreamEndian::LITTLE; 84 #endif 85 } 86 } 87 88 enum class SylkVersion 89 { 90 SCALC3, // Wrote wrongly quoted strings and unescaped semicolons. 91 OOO32, // Correct strings, plus multiline content. 92 OWN, // Place our new versions, if any, before this value. 93 OTHER // Assume that aliens wrote correct strings. 94 }; 95 96 // Whole document without Undo 97 ScImportExport::ScImportExport( ScDocument* p ) 98 : pDocSh( dynamic_cast< ScDocShell* >(p->GetDocumentShell()) ), pDoc( p ), 99 nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? MAXROW : SCROWS32K), 100 cSep( '\t' ), cStr( '"' ), 101 bFormulas( false ), bIncludeFiltered( true ), 102 bAll( true ), bSingle( true ), bUndo( false ), 103 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ), 104 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), 105 mExportTextOptions() 106 { 107 pUndoDoc = nullptr; 108 pExtOptions = nullptr; 109 } 110 111 // Insert am current cell without range(es) 112 ScImportExport::ScImportExport( ScDocument* p, const ScAddress& rPt ) 113 : pDocSh( dynamic_cast< ScDocShell* >(p->GetDocumentShell()) ), pDoc( p ), 114 aRange( rPt ), 115 nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? MAXROW : SCROWS32K), 116 cSep( '\t' ), cStr( '"' ), 117 bFormulas( false ), bIncludeFiltered( true ), 118 bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ), 119 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ), 120 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), 121 mExportTextOptions() 122 { 123 pUndoDoc = nullptr; 124 pExtOptions = nullptr; 125 } 126 127 // ctor with a range is only used for export 128 //! ctor with a string (and bSingle=true) is also used for DdeSetData 129 ScImportExport::ScImportExport( ScDocument* p, const ScRange& r ) 130 : pDocSh( dynamic_cast<ScDocShell* >(p->GetDocumentShell()) ), pDoc( p ), 131 aRange( r ), 132 nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? MAXROW : SCROWS32K), 133 cSep( '\t' ), cStr( '"' ), 134 bFormulas( false ), bIncludeFiltered( true ), 135 bAll( false ), bSingle( false ), bUndo( pDocSh != nullptr ), 136 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ), 137 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), 138 mExportTextOptions() 139 { 140 pUndoDoc = nullptr; 141 pExtOptions = nullptr; 142 // Only one sheet (table) supported 143 aRange.aEnd.SetTab( aRange.aStart.Tab() ); 144 } 145 146 // Evaluate input string - either range, cell or the whole document (when error) 147 // If a View exists, the TabNo of the view will be used. 148 ScImportExport::ScImportExport( ScDocument* p, const OUString& rPos ) 149 : pDocSh( dynamic_cast< ScDocShell* >(p->GetDocumentShell()) ), pDoc( p ), 150 nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? MAXROW : SCROWS32K), 151 cSep( '\t' ), cStr( '"' ), 152 bFormulas( false ), bIncludeFiltered( true ), 153 bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ), 154 bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ), 155 mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), 156 mExportTextOptions() 157 { 158 pUndoDoc = nullptr; 159 pExtOptions = nullptr; 160 161 SCTAB nTab = ScDocShell::GetCurTab(); 162 aRange.aStart.SetTab( nTab ); 163 OUString aPos( rPos ); 164 // Named range? 165 ScRangeName* pRange = pDoc->GetRangeName(); 166 if (pRange) 167 { 168 const ScRangeData* pData = pRange->findByUpperName(ScGlobal::pCharClass->uppercase(aPos)); 169 if (pData) 170 { 171 if( pData->HasType( ScRangeData::Type::RefArea ) 172 || pData->HasType( ScRangeData::Type::AbsArea ) 173 || pData->HasType( ScRangeData::Type::AbsPos ) ) 174 { 175 pData->GetSymbol(aPos); 176 } 177 } 178 } 179 formula::FormulaGrammar::AddressConvention eConv = pDoc->GetAddressConvention(); 180 // Range? 181 if (aRange.Parse(aPos, pDoc, eConv) & ScRefFlags::VALID) 182 bSingle = false; 183 // Cell? 184 else if (aRange.aStart.Parse(aPos, pDoc, eConv) & ScRefFlags::VALID) 185 aRange.aEnd = aRange.aStart; 186 else 187 bAll = true; 188 } 189 190 ScImportExport::~ScImportExport() COVERITY_NOEXCEPT_FALSE 191 { 192 pUndoDoc.reset(); 193 pExtOptions.reset(); 194 } 195 196 void ScImportExport::SetExtOptions( const ScAsciiOptions& rOpt ) 197 { 198 if ( pExtOptions ) 199 *pExtOptions = rOpt; 200 else 201 pExtOptions.reset(new ScAsciiOptions( rOpt )); 202 203 // "normal" Options 204 205 cSep = ScAsciiOptions::GetWeightedFieldSep( rOpt.GetFieldSeps(), false); 206 cStr = rOpt.GetTextSep(); 207 } 208 209 void ScImportExport::SetFilterOptions(const OUString& rFilterOptions) 210 { 211 maFilterOptions = rFilterOptions; 212 } 213 214 bool ScImportExport::IsFormatSupported( SotClipboardFormatId nFormat ) 215 { 216 return nFormat == SotClipboardFormatId::STRING 217 || nFormat == SotClipboardFormatId::STRING_TSVC 218 || nFormat == SotClipboardFormatId::SYLK 219 || nFormat == SotClipboardFormatId::LINK 220 || nFormat == SotClipboardFormatId::HTML 221 || nFormat == SotClipboardFormatId::HTML_SIMPLE 222 || nFormat == SotClipboardFormatId::DIF; 223 } 224 225 // Prepare for Undo 226 bool ScImportExport::StartPaste() 227 { 228 if ( !bAll ) 229 { 230 ScEditableTester aTester( pDoc, aRange ); 231 if ( !aTester.IsEditable() ) 232 { 233 vcl::Window* pWin = Application::GetDefDialogParent(); 234 std::unique_ptr<weld::MessageDialog> xInfoBox(Application::CreateMessageDialog(pWin ? pWin->GetFrameWeld() : nullptr, 235 VclMessageType::Info, VclButtonsType::Ok, 236 ScResId(aTester.GetMessageId()))); 237 xInfoBox->run(); 238 return false; 239 } 240 } 241 if( bUndo && pDocSh && pDoc->IsUndoEnabled()) 242 { 243 pUndoDoc.reset(new ScDocument( SCDOCMODE_UNDO )); 244 pUndoDoc->InitUndo( pDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() ); 245 pDoc->CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pUndoDoc); 246 } 247 return true; 248 } 249 250 // Create Undo/Redo actions, Invalidate/Repaint 251 void ScImportExport::EndPaste(bool bAutoRowHeight) 252 { 253 bool bHeight = bAutoRowHeight && pDocSh && pDocSh->AdjustRowHeight( 254 aRange.aStart.Row(), aRange.aEnd.Row(), aRange.aStart.Tab() ); 255 256 if( pUndoDoc && pDoc->IsUndoEnabled() && pDocSh ) 257 { 258 ScDocumentUniquePtr pRedoDoc(new ScDocument( SCDOCMODE_UNDO )); 259 pRedoDoc->InitUndo( pDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() ); 260 pDoc->CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pRedoDoc); 261 ScMarkData aDestMark; 262 aDestMark.SetMarkArea(aRange); 263 pDocSh->GetUndoManager()->AddUndoAction( 264 std::make_unique<ScUndoPaste>(pDocSh, aRange, aDestMark, std::move(pUndoDoc), std::move(pRedoDoc), InsertDeleteFlags::ALL, nullptr)); 265 } 266 pUndoDoc.reset(); 267 if( pDocSh ) 268 { 269 if (!bHeight) 270 pDocSh->PostPaint( aRange, PaintPartFlags::Grid ); 271 pDocSh->SetDocumentModified(); 272 } 273 ScTabViewShell* pViewSh = ScTabViewShell::GetActiveViewShell(); 274 if ( pViewSh ) 275 pViewSh->UpdateInputHandler(); 276 277 } 278 279 bool ScImportExport::ExportData( const OUString& rMimeType, 280 css::uno::Any & rValue ) 281 { 282 SvMemoryStream aStrm; 283 // mba: no BaseURL for data exchange 284 if( ExportStream( aStrm, OUString(), 285 SotExchange::GetFormatIdFromMimeType( rMimeType ) )) 286 { 287 aStrm.WriteUChar( 0 ); 288 rValue <<= css::uno::Sequence< sal_Int8 >( 289 static_cast<sal_Int8 const *>(aStrm.GetData()), 290 aStrm.TellEnd() ); 291 return true; 292 } 293 return false; 294 } 295 296 bool ScImportExport::ImportString( const OUString& rText, SotClipboardFormatId nFmt ) 297 { 298 switch ( nFmt ) 299 { 300 // formats supporting unicode 301 case SotClipboardFormatId::STRING : 302 case SotClipboardFormatId::STRING_TSVC : 303 { 304 ScImportStringStream aStrm( rText); 305 return ImportStream( aStrm, OUString(), nFmt ); 306 // ImportStream must handle RTL_TEXTENCODING_UNICODE 307 } 308 default: 309 { 310 rtl_TextEncoding eEnc = osl_getThreadTextEncoding(); 311 OString aTmp( rText.getStr(), rText.getLength(), eEnc ); 312 SvMemoryStream aStrm( const_cast<char *>(aTmp.getStr()), aTmp.getLength() * sizeof(sal_Char), StreamMode::READ ); 313 aStrm.SetStreamCharSet( eEnc ); 314 SetNoEndianSwap( aStrm ); //! no swapping in memory 315 return ImportStream( aStrm, OUString(), nFmt ); 316 } 317 } 318 } 319 320 bool ScImportExport::ExportString( OUString& rText, SotClipboardFormatId nFmt ) 321 { 322 if ( nFmt != SotClipboardFormatId::STRING && nFmt != SotClipboardFormatId::STRING_TSVC ) 323 { 324 SAL_WARN("sc.ui", "ScImportExport::ExportString: Unicode not supported for other formats than SotClipboardFormatId::STRING[_TSV]"); 325 rtl_TextEncoding eEnc = osl_getThreadTextEncoding(); 326 OString aTmp; 327 bool bOk = ExportByteString( aTmp, eEnc, nFmt ); 328 rText = OStringToOUString( aTmp, eEnc ); 329 return bOk; 330 } 331 // nSizeLimit not needed for OUString 332 333 SvMemoryStream aStrm; 334 aStrm.SetStreamCharSet( RTL_TEXTENCODING_UNICODE ); 335 SetNoEndianSwap( aStrm ); //! no swapping in memory 336 // mba: no BaseURL for data exc 337 if( ExportStream( aStrm, OUString(), nFmt ) ) 338 { 339 aStrm.WriteUInt16( 0 ); 340 rText = OUString( static_cast<const sal_Unicode*>(aStrm.GetData()) ); 341 return true; 342 } 343 rText.clear(); 344 return false; 345 346 // ExportStream must handle RTL_TEXTENCODING_UNICODE 347 } 348 349 bool ScImportExport::ExportByteString( OString& rText, rtl_TextEncoding eEnc, SotClipboardFormatId nFmt ) 350 { 351 OSL_ENSURE( eEnc != RTL_TEXTENCODING_UNICODE, "ScImportExport::ExportByteString: Unicode not supported" ); 352 if ( eEnc == RTL_TEXTENCODING_UNICODE ) 353 eEnc = osl_getThreadTextEncoding(); 354 355 if (!nSizeLimit) 356 nSizeLimit = SAL_MAX_UINT16; 357 358 SvMemoryStream aStrm; 359 aStrm.SetStreamCharSet( eEnc ); 360 SetNoEndianSwap( aStrm ); //! no swapping in memory 361 // mba: no BaseURL for data exchange 362 if( ExportStream( aStrm, OUString(), nFmt ) ) 363 { 364 aStrm.WriteChar( 0 ); 365 if( aStrm.TellEnd() <= nSizeLimit ) 366 { 367 rText = static_cast<const sal_Char*>(aStrm.GetData()); 368 return true; 369 } 370 } 371 rText.clear(); 372 return false; 373 } 374 375 bool ScImportExport::ImportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt ) 376 { 377 if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC ) 378 { 379 if( ExtText2Doc( rStrm ) ) // evaluate pExtOptions 380 return true; 381 } 382 if( nFmt == SotClipboardFormatId::SYLK ) 383 { 384 if( Sylk2Doc( rStrm ) ) 385 return true; 386 } 387 if( nFmt == SotClipboardFormatId::DIF ) 388 { 389 if( Dif2Doc( rStrm ) ) 390 return true; 391 } 392 if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT ) 393 { 394 if( RTF2Doc( rStrm, rBaseURL ) ) 395 return true; 396 } 397 if( nFmt == SotClipboardFormatId::LINK ) 398 return true; // Link-Import? 399 if ( nFmt == SotClipboardFormatId::HTML ) 400 { 401 if( HTML2Doc( rStrm, rBaseURL ) ) 402 return true; 403 } 404 if ( nFmt == SotClipboardFormatId::HTML_SIMPLE ) 405 { 406 MSE40HTMLClipFormatObj aMSE40ClpObj; // needed to skip the header data 407 SvStream* pHTML = aMSE40ClpObj.IsValid( rStrm ); 408 if ( pHTML && HTML2Doc( *pHTML, rBaseURL ) ) 409 return true; 410 } 411 412 return false; 413 } 414 415 bool ScImportExport::ExportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt ) 416 { 417 if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC ) 418 { 419 if( Doc2Text( rStrm ) ) 420 return true; 421 } 422 if( nFmt == SotClipboardFormatId::SYLK ) 423 { 424 if( Doc2Sylk( rStrm ) ) 425 return true; 426 } 427 if( nFmt == SotClipboardFormatId::DIF ) 428 { 429 if( Doc2Dif( rStrm ) ) 430 return true; 431 } 432 if( nFmt == SotClipboardFormatId::LINK && !bAll ) 433 { 434 OUString aDocName; 435 if ( pDoc->IsClipboard() ) 436 aDocName = ScGlobal::GetClipDocName(); 437 else 438 { 439 SfxObjectShell* pShell = pDoc->GetDocumentShell(); 440 if (pShell) 441 aDocName = pShell->GetTitle( SFX_TITLE_FULLNAME ); 442 } 443 444 OSL_ENSURE( !aDocName.isEmpty(), "ClipBoard document has no name! :-/" ); 445 if( !aDocName.isEmpty() ) 446 { 447 // Always use Calc A1 syntax for paste link. 448 OUString aRefName; 449 ScRefFlags nFlags = ScRefFlags::VALID | ScRefFlags::TAB_3D; 450 if( bSingle ) 451 aRefName = aRange.aStart.Format(nFlags, pDoc, formula::FormulaGrammar::CONV_OOO); 452 else 453 { 454 if( aRange.aStart.Tab() != aRange.aEnd.Tab() ) 455 nFlags |= ScRefFlags::TAB2_3D; 456 aRefName = aRange.Format(nFlags, pDoc, formula::FormulaGrammar::CONV_OOO); 457 } 458 OUString aAppName = Application::GetAppName(); 459 460 // extra bits are used to tell the client to prefer external 461 // reference link. 462 OUString const aExtraBits("calc:extref"); 463 464 WriteUnicodeOrByteString( rStrm, aAppName, true ); 465 WriteUnicodeOrByteString( rStrm, aDocName, true ); 466 WriteUnicodeOrByteString( rStrm, aRefName, true ); 467 WriteUnicodeOrByteString( rStrm, aExtraBits, true ); 468 if ( rStrm.GetStreamCharSet() == RTL_TEXTENCODING_UNICODE ) 469 rStrm.WriteUInt16( 0 ); 470 else 471 rStrm.WriteChar( 0 ); 472 return rStrm.GetError() == ERRCODE_NONE; 473 } 474 } 475 if( nFmt == SotClipboardFormatId::HTML ) 476 { 477 if( Doc2HTML( rStrm, rBaseURL ) ) 478 return true; 479 } 480 if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT ) 481 { 482 if( Doc2RTF( rStrm ) ) 483 return true; 484 } 485 486 return false; 487 } 488 489 void ScImportExport::WriteUnicodeOrByteString( SvStream& rStrm, const OUString& rString, bool bZero ) 490 { 491 rtl_TextEncoding eEnc = rStrm.GetStreamCharSet(); 492 if ( eEnc == RTL_TEXTENCODING_UNICODE ) 493 { 494 if ( !lcl_IsEndianSwap( rStrm ) ) 495 rStrm.WriteBytes(rString.getStr(), rString.getLength() * sizeof(sal_Unicode)); 496 else 497 { 498 const sal_Unicode* p = rString.getStr(); 499 const sal_Unicode* const pStop = p + rString.getLength(); 500 while ( p < pStop ) 501 { 502 rStrm.WriteUInt16( *p ); 503 } 504 } 505 if ( bZero ) 506 rStrm.WriteUInt16( 0 ); 507 } 508 else 509 { 510 OString aByteStr(OUStringToOString(rString, eEnc)); 511 rStrm.WriteCharPtr( aByteStr.getStr() ); 512 if ( bZero ) 513 rStrm.WriteChar( 0 ); 514 } 515 } 516 517 // This function could be replaced by endlub() 518 void ScImportExport::WriteUnicodeOrByteEndl( SvStream& rStrm ) 519 { 520 if ( rStrm.GetStreamCharSet() == RTL_TEXTENCODING_UNICODE ) 521 { // same as endl() but unicode 522 switch ( rStrm.GetLineDelimiter() ) 523 { 524 case LINEEND_CR : 525 rStrm.WriteUInt16( '\r' ); 526 break; 527 case LINEEND_LF : 528 rStrm.WriteUInt16( '\n' ); 529 break; 530 default: 531 rStrm.WriteUInt16( '\r' ).WriteUInt16( '\n' ); 532 } 533 } 534 else 535 endl( rStrm ); 536 } 537 538 void ScImportExport::SetNoEndianSwap( SvStream& rStrm ) 539 { 540 #ifdef OSL_BIGENDIAN 541 rStrm.SetEndian( SvStreamEndian::BIG ); 542 #else 543 rStrm.SetEndian( SvStreamEndian::LITTLE ); 544 #endif 545 } 546 547 enum QuoteType 548 { 549 FIELDSTART_QUOTE, 550 FIRST_QUOTE, 551 SECOND_QUOTE, 552 FIELDEND_QUOTE, 553 DONTKNOW_QUOTE 554 }; 555 556 /** Determine if *p is a quote that ends a quoted field. 557 558 Precondition: we are parsing a quoted field already and *p is a quote. 559 560 @return 561 FIELDEND_QUOTE if end of field quote 562 DONTKNOW_QUOTE anything else 563 */ 564 static QuoteType lcl_isFieldEndQuote( const sal_Unicode* p, const sal_Unicode* pSeps, sal_Unicode& rcDetectSep ) 565 { 566 // Due to broken CSV generators that don't double embedded quotes check if 567 // a field separator immediately or with trailing spaces follows the quote, 568 // only then end the field, or at end of string. 569 const sal_Unicode cBlank = ' '; 570 if (p[1] == cBlank && ScGlobal::UnicodeStrChr( pSeps, cBlank)) 571 return FIELDEND_QUOTE; 572 // Detect a possible blank separator if it's not already in the list (which 573 // was checked right above for p[1]==cBlank). 574 if (p[1] == cBlank && !rcDetectSep && p[2] && p[2] != cBlank) 575 rcDetectSep = cBlank; 576 while (p[1] == cBlank) 577 ++p; 578 if (!p[1] || ScGlobal::UnicodeStrChr( pSeps, p[1])) 579 return FIELDEND_QUOTE; 580 return DONTKNOW_QUOTE; 581 } 582 583 /** Determine if *p is a quote that is escaped by being doubled or ends a 584 quoted field. 585 586 Precondition: *p is a quote. 587 588 @param nQuotes 589 Quote characters encountered so far. 590 Odd (after opening quote) means either no embedded quotes or only quote 591 pairs so far. 592 Even means either not in a quoted field or already one quote 593 encountered, the first of a pair. 594 595 @return 596 FIELDSTART_QUOTE if first quote in a field, either starting content or 597 embedded so caller should check beforehand. 598 FIRST_QUOTE if first of a doubled quote 599 SECOND_QUOTE if second of a doubled quote 600 FIELDEND_QUOTE if end of field quote 601 DONTKNOW_QUOTE if an unescaped quote we don't consider as end of field, 602 do not increment nQuotes in caller then! 603 */ 604 static QuoteType lcl_isEscapedOrFieldEndQuote( sal_Int32 nQuotes, const sal_Unicode* p, 605 const sal_Unicode* pSeps, sal_Unicode cStr, sal_Unicode& rcDetectSep ) 606 { 607 if ((nQuotes % 2) == 0) 608 { 609 if (p[-1] == cStr) 610 return SECOND_QUOTE; 611 else 612 { 613 SAL_WARN( "sc", "lcl_isEscapedOrFieldEndQuote: really want a FIELDSTART_QUOTE?"); 614 return FIELDSTART_QUOTE; 615 } 616 } 617 if (p[1] == cStr) 618 return FIRST_QUOTE; 619 return lcl_isFieldEndQuote( p, pSeps, rcDetectSep); 620 } 621 622 /** Append characters of [p1,p2) to rField. 623 624 @returns TRUE if ok; FALSE if data overflow, truncated 625 */ 626 static bool lcl_appendLineData( OUString& rField, const sal_Unicode* p1, const sal_Unicode* p2 ) 627 { 628 OSL_ENSURE( rField.getLength() + (p2 - p1) <= SAL_MAX_UINT16, "lcl_appendLineData: data overflow"); 629 if (rField.getLength() + (p2 - p1) <= SAL_MAX_UINT16) 630 { 631 rField += OUString( p1, sal::static_int_cast<sal_Int32>( p2 - p1 ) ); 632 return true; 633 } 634 else 635 { 636 rField += OUString( p1, SAL_MAX_UINT16 - rField.getLength() ); 637 return false; 638 } 639 } 640 641 enum class DoubledQuoteMode 642 { 643 KEEP_ALL, // both are taken, additionally start and end quote are included in string 644 ESCAPE, // escaped quote, one is taken, one ignored 645 }; 646 647 static const sal_Unicode* lcl_ScanString( const sal_Unicode* p, OUString& rString, 648 const sal_Unicode* pSeps, sal_Unicode cStr, DoubledQuoteMode eMode, bool& rbOverflowCell ) 649 { 650 if (eMode != DoubledQuoteMode::KEEP_ALL) 651 p++; //! jump over opening quote 652 bool bCont; 653 do 654 { 655 bCont = false; 656 const sal_Unicode* p0 = p; 657 for( ;; ) 658 { 659 if( !*p ) 660 break; 661 if( *p == cStr ) 662 { 663 if ( *++p != cStr ) 664 { 665 // break or continue for loop 666 if (eMode == DoubledQuoteMode::ESCAPE) 667 { 668 sal_Unicode cDetectSep = 0xffff; // No separator detection here. 669 if (lcl_isFieldEndQuote( p-1, pSeps, cDetectSep) == FIELDEND_QUOTE) 670 break; 671 else 672 continue; 673 } 674 else 675 break; 676 } 677 // doubled quote char 678 switch ( eMode ) 679 { 680 case DoubledQuoteMode::KEEP_ALL : 681 p++; // both for us (not breaking for-loop) 682 break; 683 case DoubledQuoteMode::ESCAPE : 684 p++; // one for us (breaking for-loop) 685 bCont = true; // and more 686 break; 687 } 688 if ( eMode == DoubledQuoteMode::ESCAPE ) 689 break; 690 } 691 else 692 p++; 693 } 694 if ( p0 < p ) 695 { 696 if (!lcl_appendLineData( rString, p0, ((eMode != DoubledQuoteMode::KEEP_ALL && (*p || *(p-1) == cStr)) ? p-1 : p))) 697 rbOverflowCell = true; 698 } 699 } while ( bCont ); 700 return p; 701 } 702 703 static void lcl_UnescapeSylk( OUString & rString, SylkVersion eVersion ) 704 { 705 // Older versions didn't escape the semicolon. 706 // Older versions quoted the string and doubled embedded quotes, but not 707 // the semicolons, which was plain wrong. 708 if (eVersion >= SylkVersion::OOO32) 709 rString = rString.replaceAll(";;", ";"); 710 else 711 rString = rString.replaceAll("\"\"", "\""); 712 713 rString = rString.replaceAll(SYLK_LF, "\n"); 714 } 715 716 static const sal_Unicode* lcl_ScanSylkString( const sal_Unicode* p, 717 OUString& rString, SylkVersion eVersion ) 718 { 719 const sal_Unicode* pStartQuote = p; 720 const sal_Unicode* pEndQuote = nullptr; 721 while( *(++p) ) 722 { 723 if( *p == '"' ) 724 { 725 pEndQuote = p; 726 if (eVersion >= SylkVersion::OOO32) 727 { 728 if (*(p+1) == ';') 729 { 730 if (*(p+2) == ';') 731 { 732 p += 2; // escaped ';' 733 pEndQuote = nullptr; 734 } 735 else 736 break; // end field 737 } 738 } 739 else 740 { 741 if (*(p+1) == '"') 742 { 743 ++p; // escaped '"' 744 pEndQuote = nullptr; 745 } 746 else if (*(p+1) == ';') 747 break; // end field 748 } 749 } 750 } 751 if (!pEndQuote) 752 pEndQuote = p; // Take all data as string. 753 rString += OUString(pStartQuote + 1, sal::static_int_cast<sal_Int32>( pEndQuote - pStartQuote - 1 ) ); 754 lcl_UnescapeSylk( rString, eVersion); 755 return p; 756 } 757 758 static const sal_Unicode* lcl_ScanSylkFormula( const sal_Unicode* p, 759 OUString& rString, SylkVersion eVersion ) 760 { 761 const sal_Unicode* pStart = p; 762 if (eVersion >= SylkVersion::OOO32) 763 { 764 while (*p) 765 { 766 if (*p == ';') 767 { 768 if (*(p+1) == ';') 769 ++p; // escaped ';' 770 else 771 break; // end field 772 } 773 ++p; 774 } 775 rString += OUString( pStart, sal::static_int_cast<sal_Int32>( p - pStart)); 776 lcl_UnescapeSylk( rString, eVersion); 777 } 778 else 779 { 780 // Nasty. If in old versions the formula contained a semicolon, it was 781 // quoted and embedded quotes were doubled, but semicolons were not. If 782 // there was no semicolon, it could still contain quotes and doubled 783 // embedded quotes if it was something like ="a""b", which was saved as 784 // E"a""b" as is and has to be preserved, even if older versions 785 // couldn't even load it correctly. However, theoretically another 786 // field might follow and thus the line contain a semicolon again, such 787 // as ...;E"a""b";... 788 bool bQuoted = false; 789 if (*p == '"') 790 { 791 // May be a quoted expression or just a string constant expression 792 // with quotes. 793 while (*(++p)) 794 { 795 if (*p == '"') 796 { 797 if (*(p+1) == '"') 798 ++p; // escaped '"' 799 else 800 break; // closing '"', had no ';' yet 801 } 802 else if (*p == ';') 803 { 804 bQuoted = true; // ';' within quoted expression 805 break; 806 } 807 } 808 p = pStart; 809 } 810 if (bQuoted) 811 p = lcl_ScanSylkString( p, rString, eVersion); 812 else 813 { 814 while (*p && *p != ';') 815 ++p; 816 rString += OUString( pStart, sal::static_int_cast<sal_Int32>( p - pStart)); 817 } 818 } 819 return p; 820 } 821 822 static void lcl_DoubleEscapeChar( OUString& rString, sal_Unicode cStr ) 823 { 824 sal_Int32 n = 0; 825 while( ( n = rString.indexOf( cStr, n ) ) != -1 ) 826 { 827 rString = rString.replaceAt( n, 0, OUString(cStr) ); 828 n += 2; 829 } 830 } 831 832 static void lcl_WriteString( SvStream& rStrm, OUString& rString, sal_Unicode cQuote, sal_Unicode cEsc ) 833 { 834 if (cEsc) 835 lcl_DoubleEscapeChar( rString, cEsc ); 836 837 if (cQuote) 838 { 839 rString = OUStringLiteral1(cQuote) + rString + OUStringLiteral1(cQuote); 840 } 841 842 ScImportExport::WriteUnicodeOrByteString( rStrm, rString ); 843 } 844 845 static void lcl_WriteSimpleString( SvStream& rStrm, const OUString& rString ) 846 { 847 ScImportExport::WriteUnicodeOrByteString( rStrm, rString ); 848 } 849 850 bool ScImportExport::Text2Doc( SvStream& rStrm ) 851 { 852 bool bOk = true; 853 854 sal_Unicode pSeps[2]; 855 pSeps[0] = cSep; 856 pSeps[1] = 0; 857 858 ScSetStringParam aSetStringParam; 859 aSetStringParam.mbCheckLinkFormula = true; 860 861 SCCOL nStartCol = aRange.aStart.Col(); 862 SCROW nStartRow = aRange.aStart.Row(); 863 SCCOL nEndCol = aRange.aEnd.Col(); 864 SCROW nEndRow = aRange.aEnd.Row(); 865 sal_uLong nOldPos = rStrm.Tell(); 866 rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() ); 867 bool bData = !bSingle; 868 if( !bSingle) 869 bOk = StartPaste(); 870 871 while( bOk ) 872 { 873 OUString aLine; 874 OUString aCell; 875 SCROW nRow = nStartRow; 876 rStrm.Seek( nOldPos ); 877 for( ;; ) 878 { 879 rStrm.ReadUniOrByteStringLine( aLine, rStrm.GetStreamCharSet(), nArbitraryLineLengthLimit ); 880 if( rStrm.eof() ) 881 break; 882 SCCOL nCol = nStartCol; 883 const sal_Unicode* p = aLine.getStr(); 884 while( *p ) 885 { 886 aCell.clear(); 887 const sal_Unicode* q = p; 888 while (*p && *p != cSep) 889 { 890 // Always look for a pairing quote and ignore separator in between. 891 while (*p && *p == cStr) 892 q = p = lcl_ScanString( p, aCell, pSeps, cStr, DoubledQuoteMode::KEEP_ALL, bOverflowCell ); 893 // All until next separator or quote. 894 while (*p && *p != cSep && *p != cStr) 895 ++p; 896 if (!lcl_appendLineData( aCell, q, p)) 897 bOverflowCell = true; // display warning on import 898 q = p; 899 } 900 if (*p) 901 ++p; 902 if (ValidCol(nCol) && ValidRow(nRow) ) 903 { 904 if( bSingle ) 905 { 906 if (nCol>nEndCol) nEndCol = nCol; 907 if (nRow>nEndRow) nEndRow = nRow; 908 } 909 if( bData && nCol <= nEndCol && nRow <= nEndRow ) 910 pDoc->SetString( nCol, nRow, aRange.aStart.Tab(), aCell, &aSetStringParam ); 911 } 912 else // too many columns/rows 913 { 914 if (!ValidRow(nRow)) 915 bOverflowRow = true; // display warning on import 916 if (!ValidCol(nCol)) 917 bOverflowCol = true; // display warning on import 918 } 919 ++nCol; 920 } 921 ++nRow; 922 } 923 924 if( !bData ) 925 { 926 aRange.aEnd.SetCol( nEndCol ); 927 aRange.aEnd.SetRow( nEndRow ); 928 bOk = StartPaste(); 929 bData = true; 930 } 931 else 932 break; 933 } 934 935 EndPaste(); 936 if (bOk && mbImportBroadcast) 937 { 938 pDoc->BroadcastCells(aRange, SfxHintId::ScDataChanged); 939 pDocSh->PostDataChanged(); 940 } 941 942 return bOk; 943 } 944 945 // Extended Ascii-Import 946 947 static bool lcl_PutString( 948 ScDocumentImport& rDocImport, bool bUseDocImport, 949 SCCOL nCol, SCROW nRow, SCTAB nTab, const OUString& rStr, sal_uInt8 nColFormat, 950 SvNumberFormatter* pFormatter, bool bDetectNumFormat, bool bSkipEmptyCells, 951 const ::utl::TransliterationWrapper& rTransliteration, CalendarWrapper& rCalendar, 952 const ::utl::TransliterationWrapper* pSecondTransliteration, CalendarWrapper* pSecondCalendar ) 953 { 954 ScDocument* pDoc = &rDocImport.getDoc(); 955 bool bMultiLine = false; 956 if ( nColFormat == SC_COL_SKIP || !ValidCol(nCol) || !ValidRow(nRow) ) 957 return bMultiLine; 958 if ( rStr.isEmpty() ) 959 { 960 if ( !bSkipEmptyCells ) 961 { // delete destination cell 962 if ( bUseDocImport ) 963 rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr ); 964 else 965 pDoc->SetString( nCol, nRow, nTab, rStr ); 966 } 967 return false; 968 } 969 970 if ( nColFormat == SC_COL_TEXT ) 971 { 972 double fDummy; 973 sal_uInt32 nIndex = 0; 974 if (pFormatter->IsNumberFormat(rStr, nIndex, fDummy)) 975 { 976 // Set the format of this cell to Text. 977 sal_uInt32 nFormat = pFormatter->GetStandardFormat(SvNumFormatType::TEXT); 978 ScPatternAttr aNewAttrs(pDoc->GetPool()); 979 SfxItemSet& rSet = aNewAttrs.GetItemSet(); 980 rSet.Put( SfxUInt32Item(ATTR_VALUE_FORMAT, nFormat) ); 981 pDoc->ApplyPattern(nCol, nRow, nTab, aNewAttrs); 982 983 } 984 if ( bUseDocImport ) 985 { 986 if(ScStringUtil::isMultiline(rStr)) 987 { 988 ScFieldEditEngine& rEngine = pDoc->GetEditEngine(); 989 rEngine.SetText(rStr); 990 rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject()); 991 return true; 992 } 993 else 994 { 995 rDocImport.setStringCell(ScAddress(nCol, nRow, nTab), rStr); 996 return false; 997 } 998 } else 999 { 1000 pDoc->SetTextCell(ScAddress(nCol, nRow, nTab), rStr); 1001 return bMultiLine; 1002 } 1003 } 1004 1005 if ( nColFormat == SC_COL_ENGLISH ) 1006 { 1007 //! SetString with Extra-Flag ??? 1008 1009 SvNumberFormatter* pDocFormatter = pDoc->GetFormatTable(); 1010 sal_uInt32 nEnglish = pDocFormatter->GetStandardIndex(LANGUAGE_ENGLISH_US); 1011 double fVal; 1012 if ( pDocFormatter->IsNumberFormat( rStr, nEnglish, fVal ) ) 1013 { 1014 // Numberformat will not be set to English 1015 if ( bUseDocImport ) 1016 rDocImport.setNumericCell( ScAddress( nCol, nRow, nTab ), fVal ); 1017 else 1018 pDoc->SetValue( nCol, nRow, nTab, fVal ); 1019 return bMultiLine; 1020 } 1021 // else, continue with SetString 1022 } 1023 else if ( nColFormat != SC_COL_STANDARD ) // Datumformats 1024 { 1025 const sal_uInt16 nMaxNumberParts = 7; // Y-M-D h:m:s.t 1026 sal_Int32 nLen = rStr.getLength(); 1027 sal_Int32 nStart[nMaxNumberParts]; 1028 sal_Int32 nEnd[nMaxNumberParts]; 1029 1030 sal_uInt16 nDP, nMP, nYP; 1031 switch ( nColFormat ) 1032 { 1033 case SC_COL_YMD: nDP = 2; nMP = 1; nYP = 0; break; 1034 case SC_COL_MDY: nDP = 1; nMP = 0; nYP = 2; break; 1035 case SC_COL_DMY: 1036 default: nDP = 0; nMP = 1; nYP = 2; break; 1037 } 1038 1039 sal_uInt16 nFound = 0; 1040 bool bInNum = false; 1041 for ( sal_Int32 nPos=0; nPos<nLen && (bInNum || 1042 nFound<nMaxNumberParts); nPos++ ) 1043 { 1044 if (bInNum && nFound == 3 && nColFormat == SC_COL_YMD && 1045 nPos <= nStart[nFound]+2 && rStr[nPos] == 'T') 1046 bInNum = false; // ISO-8601: YYYY-MM-DDThh:mm... 1047 else if ((((!bInNum && nFound==nMP) || (bInNum && nFound==nMP+1)) 1048 && ScGlobal::pCharClass->isLetterNumeric( rStr, nPos)) 1049 || ScGlobal::pCharClass->isDigit( rStr, nPos)) 1050 { 1051 if (!bInNum) 1052 { 1053 bInNum = true; 1054 nStart[nFound] = nPos; 1055 ++nFound; 1056 } 1057 nEnd[nFound-1] = nPos; 1058 } 1059 else 1060 bInNum = false; 1061 } 1062 1063 if ( nFound == 1 ) 1064 { 1065 // try to break one number (without separators) into date fields 1066 1067 sal_Int32 nDateStart = nStart[0]; 1068 sal_Int32 nDateLen = nEnd[0] + 1 - nDateStart; 1069 1070 if ( nDateLen >= 5 && nDateLen <= 8 && 1071 ScGlobal::pCharClass->isNumeric( rStr.copy( nDateStart, nDateLen ) ) ) 1072 { 1073 // 6 digits: 2 each for day, month, year 1074 // 8 digits: 4 for year, 2 each for day and month 1075 // 5 or 7 digits: first field is shortened by 1 1076 1077 bool bLongYear = ( nDateLen >= 7 ); 1078 bool bShortFirst = ( nDateLen == 5 || nDateLen == 7 ); 1079 1080 sal_uInt16 nFieldStart = nDateStart; 1081 for (sal_uInt16 nPos=0; nPos<3; nPos++) 1082 { 1083 sal_uInt16 nFieldEnd = nFieldStart + 1; // default: 2 digits 1084 if ( bLongYear && nPos == nYP ) 1085 nFieldEnd += 2; // 2 extra digits for long year 1086 if ( bShortFirst && nPos == 0 ) 1087 --nFieldEnd; // first field shortened? 1088 1089 nStart[nPos] = nFieldStart; 1090 nEnd[nPos] = nFieldEnd; 1091 nFieldStart = nFieldEnd + 1; 1092 } 1093 nFound = 3; 1094 } 1095 } 1096 1097 if ( nFound >= 3 ) 1098 { 1099 using namespace ::com::sun::star; 1100 bool bSecondCal = false; 1101 sal_uInt16 nDay = static_cast<sal_uInt16>(rStr.copy( nStart[nDP], nEnd[nDP]+1-nStart[nDP] ).toInt32()); 1102 sal_uInt16 nYear = static_cast<sal_uInt16>(rStr.copy( nStart[nYP], nEnd[nYP]+1-nStart[nYP] ).toInt32()); 1103 OUString aMStr = rStr.copy( nStart[nMP], nEnd[nMP]+1-nStart[nMP] ); 1104 sal_Int16 nMonth = static_cast<sal_Int16>(aMStr.toInt32()); 1105 if (!nMonth) 1106 { 1107 static const char aSepShortened[] = "SEP"; 1108 uno::Sequence< i18n::CalendarItem2 > xMonths; 1109 sal_Int32 i, nMonthCount; 1110 // first test all month names from local international 1111 xMonths = rCalendar.getMonths(); 1112 nMonthCount = xMonths.getLength(); 1113 for (i=0; i<nMonthCount && !nMonth; i++) 1114 { 1115 if ( rTransliteration.isEqual( aMStr, xMonths[i].FullName ) || 1116 rTransliteration.isEqual( aMStr, xMonths[i].AbbrevName ) ) 1117 nMonth = sal::static_int_cast<sal_Int16>( i+1 ); 1118 else if ( i == 8 && rTransliteration.isEqual( "SEPT", 1119 xMonths[i].AbbrevName ) && 1120 rTransliteration.isEqual( aMStr, aSepShortened ) ) 1121 { // correct English abbreviation is SEPT, 1122 // but data mostly contains SEP only 1123 nMonth = sal::static_int_cast<sal_Int16>( i+1 ); 1124 } 1125 } 1126 // if none found, then test english month names 1127 if ( !nMonth && pSecondCalendar && pSecondTransliteration ) 1128 { 1129 xMonths = pSecondCalendar->getMonths(); 1130 nMonthCount = xMonths.getLength(); 1131 for (i=0; i<nMonthCount && !nMonth; i++) 1132 { 1133 if ( pSecondTransliteration->isEqual( aMStr, xMonths[i].FullName ) || 1134 pSecondTransliteration->isEqual( aMStr, xMonths[i].AbbrevName ) ) 1135 { 1136 nMonth = sal::static_int_cast<sal_Int16>( i+1 ); 1137 bSecondCal = true; 1138 } 1139 else if ( i == 8 && pSecondTransliteration->isEqual( 1140 aMStr, aSepShortened ) ) 1141 { // correct English abbreviation is SEPT, 1142 // but data mostly contains SEP only 1143 nMonth = sal::static_int_cast<sal_Int16>( i+1 ); 1144 bSecondCal = true; 1145 } 1146 } 1147 } 1148 } 1149 1150 SvNumberFormatter* pDocFormatter = pDoc->GetFormatTable(); 1151 if ( nYear < 100 ) 1152 nYear = pDocFormatter->ExpandTwoDigitYear( nYear ); 1153 1154 CalendarWrapper* pCalendar = (bSecondCal ? pSecondCalendar : &rCalendar); 1155 sal_Int16 nNumMonths = pCalendar->getNumberOfMonthsInYear(); 1156 if ( nDay && nMonth && nDay<=31 && nMonth<=nNumMonths ) 1157 { 1158 --nMonth; 1159 pCalendar->setValue( i18n::CalendarFieldIndex::DAY_OF_MONTH, nDay ); 1160 pCalendar->setValue( i18n::CalendarFieldIndex::MONTH, nMonth ); 1161 pCalendar->setValue( i18n::CalendarFieldIndex::YEAR, nYear ); 1162 sal_Int16 nHour, nMinute, nSecond; 1163 // #i14974# The imported value should have no fractional value, so set the 1164 // time fields to zero (ICU calendar instance defaults to current date/time) 1165 nHour = nMinute = nSecond = 0; 1166 if (nFound > 3) 1167 nHour = static_cast<sal_Int16>(rStr.copy( nStart[3], nEnd[3]+1-nStart[3]).toInt32()); 1168 if (nFound > 4) 1169 nMinute = static_cast<sal_Int16>(rStr.copy( nStart[4], nEnd[4]+1-nStart[4]).toInt32()); 1170 if (nFound > 5) 1171 nSecond = static_cast<sal_Int16>(rStr.copy( nStart[5], nEnd[5]+1-nStart[5]).toInt32()); 1172 // do not use calendar's milliseconds, to avoid fractional part truncation 1173 double fFrac = 0.0; 1174 if (nFound > 6) 1175 { 1176 sal_Unicode cDec = '.'; 1177 OUString aT( &cDec, 1); 1178 aT += rStr.copy( nStart[6], nEnd[6]+1-nStart[6]); 1179 rtl_math_ConversionStatus eStatus; 1180 double fV = rtl::math::stringToDouble( aT, cDec, 0, &eStatus ); 1181 if (eStatus == rtl_math_ConversionStatus_Ok) 1182 fFrac = fV / 86400.0; 1183 } 1184 pCalendar->setValue( i18n::CalendarFieldIndex::HOUR, nHour ); 1185 pCalendar->setValue( i18n::CalendarFieldIndex::MINUTE, nMinute ); 1186 pCalendar->setValue( i18n::CalendarFieldIndex::SECOND, nSecond ); 1187 pCalendar->setValue( i18n::CalendarFieldIndex::MILLISECOND, 0 ); 1188 if ( pCalendar->isValid() ) 1189 { 1190 double fDiff = DateTime(pDocFormatter->GetNullDate()) - 1191 pCalendar->getEpochStart(); 1192 // #i14974# must use getLocalDateTime to get the same 1193 // date values as set above 1194 double fDays = pCalendar->getLocalDateTime() + fFrac; 1195 fDays -= fDiff; 1196 1197 LanguageType eLatin, eCjk, eCtl; 1198 pDoc->GetLanguage( eLatin, eCjk, eCtl ); 1199 LanguageType eDocLang = eLatin; //! which language for date formats? 1200 1201 SvNumFormatType nType = (nFound > 3 ? SvNumFormatType::DATETIME : SvNumFormatType::DATE); 1202 sal_uLong nFormat = pDocFormatter->GetStandardFormat( nType, eDocLang ); 1203 // maybe there is a special format including seconds or milliseconds 1204 if (nFound > 5) 1205 nFormat = pDocFormatter->GetStandardFormat( fDays, nFormat, nType, eDocLang); 1206 1207 ScAddress aPos(nCol,nRow,nTab); 1208 if ( bUseDocImport ) 1209 rDocImport.setNumericCell(aPos, fDays); 1210 else 1211 pDoc->SetValue( aPos, fDays ); 1212 pDoc->SetNumberFormat(aPos, nFormat); 1213 1214 return bMultiLine; // success 1215 } 1216 } 1217 } 1218 } 1219 1220 // Standard or date not determined -> SetString / EditCell 1221 if( rStr.indexOf( '\n' ) == -1 ) 1222 { 1223 ScSetStringParam aParam; 1224 aParam.mpNumFormatter = pFormatter; 1225 aParam.mbDetectNumberFormat = bDetectNumFormat; 1226 aParam.meSetTextNumFormat = ScSetStringParam::SpecialNumberOnly; 1227 aParam.mbHandleApostrophe = false; 1228 aParam.mbCheckLinkFormula = true; 1229 if ( bUseDocImport ) 1230 rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr, &aParam); 1231 else 1232 pDoc->SetString( nCol, nRow, nTab, rStr, &aParam ); 1233 } 1234 else 1235 { 1236 bMultiLine = true; 1237 ScFieldEditEngine& rEngine = pDoc->GetEditEngine(); 1238 rEngine.SetText(rStr); 1239 if ( bUseDocImport ) 1240 rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject()); 1241 else 1242 pDoc->SetEditText( ScAddress( nCol, nRow, nTab ), rEngine.CreateTextObject() ); 1243 } 1244 return bMultiLine; 1245 } 1246 1247 static OUString lcl_GetFixed( const OUString& rLine, sal_Int32 nStart, sal_Int32 nNext, 1248 bool& rbIsQuoted, bool& rbOverflowCell ) 1249 { 1250 sal_Int32 nLen = rLine.getLength(); 1251 if (nNext > nLen) 1252 nNext = nLen; 1253 if ( nNext <= nStart ) 1254 return EMPTY_OUSTRING; 1255 1256 const sal_Unicode* pStr = rLine.getStr(); 1257 1258 sal_Int32 nSpace = nNext; 1259 while ( nSpace > nStart && pStr[nSpace-1] == ' ' ) 1260 --nSpace; 1261 1262 rbIsQuoted = (pStr[nStart] == '"' && pStr[nSpace-1] == '"'); 1263 if (rbIsQuoted) 1264 { 1265 bool bFits = (nSpace - nStart - 3 <= SAL_MAX_UINT16); 1266 OSL_ENSURE( bFits, "lcl_GetFixed: line doesn't fit into data"); 1267 if (bFits) 1268 return rLine.copy(nStart+1, nSpace-nStart-2); 1269 else 1270 { 1271 rbOverflowCell = true; 1272 return rLine.copy(nStart+1, SAL_MAX_UINT16); 1273 } 1274 } 1275 else 1276 { 1277 bool bFits = (nSpace - nStart <= SAL_MAX_UINT16); 1278 OSL_ENSURE( bFits, "lcl_GetFixed: line doesn't fit into data"); 1279 if (bFits) 1280 return rLine.copy(nStart, nSpace-nStart); 1281 else 1282 { 1283 rbOverflowCell = true; 1284 return rLine.copy(nStart, SAL_MAX_UINT16); 1285 } 1286 } 1287 } 1288 1289 bool ScImportExport::ExtText2Doc( SvStream& rStrm ) 1290 { 1291 if (!pExtOptions) 1292 return Text2Doc( rStrm ); 1293 1294 sal_uInt64 const nOldPos = rStrm.Tell(); 1295 sal_uInt64 const nRemaining = rStrm.remainingSize(); 1296 std::unique_ptr<ScProgress> xProgress( new ScProgress( pDocSh, 1297 ScResId( STR_LOAD_DOC ), nRemaining, true )); 1298 rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() ); 1299 1300 SCCOL nStartCol = aRange.aStart.Col(); 1301 SCCOL nEndCol = aRange.aEnd.Col(); 1302 SCROW nStartRow = aRange.aStart.Row(); 1303 SCTAB nTab = aRange.aStart.Tab(); 1304 1305 bool bFixed = pExtOptions->IsFixedLen(); 1306 OUString aSeps = pExtOptions->GetFieldSeps(); // Need non-const for ReadCsvLine(), 1307 const sal_Unicode* pSeps = aSeps.getStr(); // but it will be const anyway (asserted below). 1308 bool bMerge = pExtOptions->IsMergeSeps(); 1309 bool bRemoveSpace = pExtOptions->IsRemoveSpace(); 1310 sal_uInt16 nInfoCount = pExtOptions->GetInfoCount(); 1311 const sal_Int32* pColStart = pExtOptions->GetColStart(); 1312 const sal_uInt8* pColFormat = pExtOptions->GetColFormat(); 1313 long nSkipLines = pExtOptions->GetStartRow(); 1314 1315 LanguageType eDocLang = pExtOptions->GetLanguage(); 1316 SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eDocLang); 1317 bool bDetectNumFormat = pExtOptions->IsDetectSpecialNumber(); 1318 bool bSkipEmptyCells = pExtOptions->IsSkipEmptyCells(); 1319 1320 // For date recognition 1321 ::utl::TransliterationWrapper aTransliteration( 1322 comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE ); 1323 aTransliteration.loadModuleIfNeeded( eDocLang ); 1324 CalendarWrapper aCalendar( comphelper::getProcessComponentContext() ); 1325 aCalendar.loadDefaultCalendar( 1326 LanguageTag::convertToLocale( eDocLang ) ); 1327 std::unique_ptr< ::utl::TransliterationWrapper > pEnglishTransliteration; 1328 std::unique_ptr< CalendarWrapper > pEnglishCalendar; 1329 if ( eDocLang != LANGUAGE_ENGLISH_US ) 1330 { 1331 pEnglishTransliteration.reset(new ::utl::TransliterationWrapper ( 1332 comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE )); 1333 aTransliteration.loadModuleIfNeeded( LANGUAGE_ENGLISH_US ); 1334 pEnglishCalendar.reset(new CalendarWrapper ( comphelper::getProcessComponentContext() )); 1335 pEnglishCalendar->loadDefaultCalendar( 1336 LanguageTag::convertToLocale( LANGUAGE_ENGLISH_US ) ); 1337 } 1338 1339 OUString aLine; 1340 OUString aCell; 1341 sal_uInt16 i; 1342 SCROW nRow = nStartRow; 1343 sal_Unicode cDetectSep = 0xffff; // No separator detection here. 1344 1345 while(--nSkipLines>0) 1346 { 1347 aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep); // content is ignored 1348 if ( rStrm.eof() ) 1349 break; 1350 } 1351 1352 // Determine range for Undo. 1353 // We don't need this during import of a file to a new sheet or document... 1354 bool bDetermineRange = bUndo; 1355 1356 // Row heights don't need to be adjusted on the fly if EndPaste() is called 1357 // afterwards, which happens only if bDetermineRange. This variable also 1358 // survives the toggle of bDetermineRange down at the end of the do{} loop. 1359 bool bRangeIsDetermined = bDetermineRange; 1360 1361 bool bQuotedAsText = pExtOptions && pExtOptions->IsQuotedAsText(); 1362 1363 sal_uLong nOriginalStreamPos = rStrm.Tell(); 1364 1365 ScDocumentImport aDocImport(*pDoc); 1366 do 1367 { 1368 for( ;; ) 1369 { 1370 aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep); 1371 if ( rStrm.eof() && aLine.isEmpty() ) 1372 break; 1373 1374 assert(pSeps == aSeps.getStr()); 1375 1376 if ( nRow > MAXROW ) 1377 { 1378 bOverflowRow = true; // display warning on import 1379 break; // for 1380 } 1381 1382 EmbeddedNullTreatment( aLine); 1383 1384 sal_Int32 nLineLen = aLine.getLength(); 1385 SCCOL nCol = nStartCol; 1386 bool bMultiLine = false; 1387 if ( bFixed ) // Fixed line length 1388 { 1389 // Yes, the check is nCol<=MAXCOL+1, +1 because it is only an 1390 // overflow if there is really data following to be put behind 1391 // the last column, which doesn't happen if info is 1392 // SC_COL_SKIP. 1393 for ( i=0; i<nInfoCount && nCol <= MAXCOL+1; i++ ) 1394 { 1395 sal_uInt8 nFmt = pColFormat[i]; 1396 if (nFmt != SC_COL_SKIP) // otherwise don't increment nCol either 1397 { 1398 if (nCol > MAXCOL) 1399 bOverflowCol = true; // display warning on import 1400 else if (!bDetermineRange) 1401 { 1402 sal_Int32 nStart = pColStart[i]; 1403 sal_Int32 nNext = ( i+1 < nInfoCount ) ? pColStart[i+1] : nLineLen; 1404 bool bIsQuoted = false; 1405 aCell = lcl_GetFixed( aLine, nStart, nNext, bIsQuoted, bOverflowCell ); 1406 if (bIsQuoted && bQuotedAsText) 1407 nFmt = SC_COL_TEXT; 1408 1409 bMultiLine |= lcl_PutString( 1410 aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt, 1411 &aNumFormatter, bDetectNumFormat, bSkipEmptyCells, aTransliteration, aCalendar, 1412 pEnglishTransliteration.get(), pEnglishCalendar.get()); 1413 } 1414 ++nCol; 1415 } 1416 } 1417 } 1418 else // Search for the separator 1419 { 1420 SCCOL nSourceCol = 0; 1421 sal_uInt16 nInfoStart = 0; 1422 const sal_Unicode* p = aLine.getStr(); 1423 // Yes, the check is nCol<=MAXCOL+1, +1 because it is only an 1424 // overflow if there is really data following to be put behind 1425 // the last column, which doesn't happen if info is 1426 // SC_COL_SKIP. 1427 while (*p && nCol <= MAXCOL+1) 1428 { 1429 bool bIsQuoted = false; 1430 p = ScImportExport::ScanNextFieldFromString( p, aCell, 1431 cStr, pSeps, bMerge, bIsQuoted, bOverflowCell, bRemoveSpace ); 1432 1433 sal_uInt8 nFmt = SC_COL_STANDARD; 1434 for ( i=nInfoStart; i<nInfoCount; i++ ) 1435 { 1436 if ( pColStart[i] == nSourceCol + 1 ) // pColStart is 1-based 1437 { 1438 nFmt = pColFormat[i]; 1439 nInfoStart = i + 1; // ColInfos are in succession 1440 break; // for 1441 } 1442 } 1443 if ( nFmt != SC_COL_SKIP ) 1444 { 1445 if (nCol > MAXCOL) 1446 bOverflowCol = true; // display warning on import 1447 else if (!bDetermineRange) 1448 { 1449 if (bIsQuoted && bQuotedAsText) 1450 nFmt = SC_COL_TEXT; 1451 1452 bMultiLine |= lcl_PutString( 1453 aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt, 1454 &aNumFormatter, bDetectNumFormat, bSkipEmptyCells, aTransliteration, 1455 aCalendar, pEnglishTransliteration.get(), pEnglishCalendar.get()); 1456 } 1457 ++nCol; 1458 } 1459 1460 ++nSourceCol; 1461 } 1462 } 1463 if (nEndCol < nCol) 1464 nEndCol = nCol; //! points to the next free or even MAXCOL+2 1465 1466 if (!bDetermineRange) 1467 { 1468 if (bMultiLine && !bRangeIsDetermined && pDocSh) 1469 pDocSh->AdjustRowHeight( nRow, nRow, nTab); 1470 xProgress->SetStateOnPercent( rStrm.Tell() - nOldPos ); 1471 } 1472 ++nRow; 1473 } 1474 // so far nRow/nEndCol pointed to the next free 1475 if (nRow > nStartRow) 1476 --nRow; 1477 if (nEndCol > nStartCol) 1478 nEndCol = ::std::min( static_cast<SCCOL>(nEndCol - 1), MAXCOL); 1479 1480 if (bDetermineRange) 1481 { 1482 aRange.aEnd.SetCol( nEndCol ); 1483 aRange.aEnd.SetRow( nRow ); 1484 1485 if ( !mbApi && nStartCol != nEndCol && 1486 !pDoc->IsBlockEmpty( nTab, nStartCol + 1, nStartRow, nEndCol, nRow ) ) 1487 { 1488 ScReplaceWarnBox aBox(ScDocShell::GetActiveDialogParent()); 1489 if (aBox.run() != RET_YES) 1490 { 1491 return false; 1492 } 1493 } 1494 1495 rStrm.Seek( nOriginalStreamPos ); 1496 nRow = nStartRow; 1497 if (!StartPaste()) 1498 { 1499 EndPaste(false); 1500 return false; 1501 } 1502 } 1503 1504 bDetermineRange = !bDetermineRange; // toggle 1505 } while (!bDetermineRange); 1506 if ( !mbOverwriting ) 1507 aDocImport.finalize(); 1508 1509 xProgress.reset(); // make room for AdjustRowHeight progress 1510 if (bRangeIsDetermined) 1511 EndPaste(false); 1512 1513 if (mbImportBroadcast && !mbOverwriting) 1514 { 1515 pDoc->BroadcastCells(aRange, SfxHintId::ScDataChanged); 1516 pDocSh->PostDataChanged(); 1517 } 1518 return true; 1519 } 1520 1521 void ScImportExport::EmbeddedNullTreatment( OUString & rStr ) 1522 { 1523 // A nasty workaround for data with embedded NULL characters. As long as we 1524 // can't handle them properly as cell content (things assume 0-terminated 1525 // strings at too many places) simply strip all NULL characters from raw 1526 // data. Excel does the same. See fdo#57841 for sample data. 1527 1528 // The normal case is no embedded NULL, check first before de-/allocating 1529 // ustring stuff. 1530 sal_Unicode cNull = 0; 1531 if (rStr.indexOf( cNull) >= 0) 1532 { 1533 rStr = rStr.replaceAll( OUString( &cNull, 1), ""); 1534 } 1535 } 1536 1537 const sal_Unicode* ScImportExport::ScanNextFieldFromString( const sal_Unicode* p, 1538 OUString& rField, sal_Unicode cStr, const sal_Unicode* pSeps, bool bMergeSeps, bool& rbIsQuoted, 1539 bool& rbOverflowCell, bool bRemoveSpace ) 1540 { 1541 rbIsQuoted = false; 1542 rField.clear(); 1543 const sal_Unicode cBlank = ' '; 1544 if (!ScGlobal::UnicodeStrChr( pSeps, cBlank)) 1545 { 1546 // Cope with broken generators that put leading blanks before a quoted 1547 // field, like "field1", "field2", "..." 1548 // NOTE: this is not in conformance with http://tools.ietf.org/html/rfc4180 1549 const sal_Unicode* pb = p; 1550 while (*pb == cBlank) 1551 ++pb; 1552 if (*pb == cStr) 1553 p = pb; 1554 } 1555 if ( *p == cStr ) // String in quotes 1556 { 1557 rbIsQuoted = true; 1558 const sal_Unicode* p1; 1559 p1 = p = lcl_ScanString( p, rField, pSeps, cStr, DoubledQuoteMode::ESCAPE, rbOverflowCell ); 1560 while ( *p && !ScGlobal::UnicodeStrChr( pSeps, *p ) ) 1561 p++; 1562 // Append remaining unquoted and undelimited data (dirty, dirty) to 1563 // this field. 1564 if (p > p1) 1565 { 1566 const sal_Unicode* ptrim_f = p; 1567 if ( bRemoveSpace ) 1568 { 1569 while ( ptrim_f > p1 && ( *(ptrim_f - 1) == cBlank ) ) 1570 --ptrim_f; 1571 } 1572 if (!lcl_appendLineData( rField, p1, ptrim_f)) 1573 rbOverflowCell = true; 1574 } 1575 if( *p ) 1576 p++; 1577 } 1578 else // up to delimiter 1579 { 1580 const sal_Unicode* p0 = p; 1581 while ( *p && !ScGlobal::UnicodeStrChr( pSeps, *p ) ) 1582 p++; 1583 const sal_Unicode* ptrim_i = p0; 1584 const sal_Unicode* ptrim_f = p; // [ptrim_i,ptrim_f) is cell data after trimming 1585 if ( bRemoveSpace ) 1586 { 1587 while ( *ptrim_i == cBlank ) 1588 ++ptrim_i; 1589 while ( ptrim_f > ptrim_i && ( *(ptrim_f - 1) == cBlank ) ) 1590 --ptrim_f; 1591 } 1592 if (!lcl_appendLineData( rField, ptrim_i, ptrim_f)) 1593 rbOverflowCell = true; 1594 if( *p ) 1595 p++; 1596 } 1597 if ( bMergeSeps ) // skip following delimiters 1598 { 1599 while ( *p && ScGlobal::UnicodeStrChr( pSeps, *p ) ) 1600 p++; 1601 } 1602 return p; 1603 } 1604 1605 namespace { 1606 1607 /** 1608 * Check if a given string has any line break characters or separators. 1609 * 1610 * @param rStr string to inspect. 1611 * @param cSep separator character. 1612 */ 1613 bool hasLineBreaksOrSeps( const OUString& rStr, sal_Unicode cSep ) 1614 { 1615 const sal_Unicode* p = rStr.getStr(); 1616 for (sal_Int32 i = 0, n = rStr.getLength(); i < n; ++i, ++p) 1617 { 1618 sal_Unicode c = *p; 1619 if (c == cSep) 1620 // separator found. 1621 return true; 1622 1623 switch (c) 1624 { 1625 case '\n': 1626 case '\r': 1627 // line break found. 1628 return true; 1629 default: 1630 ; 1631 } 1632 } 1633 return false; 1634 } 1635 1636 } 1637 1638 bool ScImportExport::Doc2Text( SvStream& rStrm ) 1639 { 1640 SCCOL nCol; 1641 SCROW nRow; 1642 SCCOL nStartCol = aRange.aStart.Col(); 1643 SCROW nStartRow = aRange.aStart.Row(); 1644 SCTAB nStartTab = aRange.aStart.Tab(); 1645 SCCOL nEndCol = aRange.aEnd.Col(); 1646 SCROW nEndRow = aRange.aEnd.Row(); 1647 SCTAB nEndTab = aRange.aEnd.Tab(); 1648 1649 if (!pDoc->GetClipParam().isMultiRange() && nStartTab == nEndTab) 1650 if (!pDoc->ShrinkToDataArea( nStartTab, nStartCol, nStartRow, nEndCol, nEndRow )) 1651 return false; 1652 1653 OUString aCellStr; 1654 1655 bool bConvertLF = (GetSystemLineEnd() != LINEEND_LF); 1656 1657 // We need to cache sc::ColumnBlockPosition per each column, tab is always nStartTab. 1658 std::vector< sc::ColumnBlockPosition > blockPos( nEndCol - nStartCol + 1 ); 1659 for( SCCOL i = nStartCol; i <= nEndCol; ++i ) 1660 pDoc->InitColumnBlockPosition( blockPos[ i - nStartCol ], nStartTab, i ); 1661 for (nRow = nStartRow; nRow <= nEndRow; nRow++) 1662 { 1663 if (bIncludeFiltered || !pDoc->RowFiltered( nRow, nStartTab )) 1664 { 1665 for (nCol = nStartCol; nCol <= nEndCol; nCol++) 1666 { 1667 ScAddress aPos(nCol, nRow, nStartTab); 1668 sal_uInt32 nNumFmt = pDoc->GetNumberFormat(aPos); 1669 SvNumberFormatter* pFormatter = pDoc->GetFormatTable(); 1670 1671 ScRefCellValue aCell(*pDoc, aPos, blockPos[ nCol - nStartCol ]); 1672 switch (aCell.meType) 1673 { 1674 case CELLTYPE_FORMULA: 1675 { 1676 if (bFormulas) 1677 { 1678 aCell.mpFormula->GetFormula( aCellStr ); 1679 if( aCellStr.indexOf( cSep ) != -1 ) 1680 lcl_WriteString( rStrm, aCellStr, cStr, cStr ); 1681 else 1682 lcl_WriteSimpleString( rStrm, aCellStr ); 1683 } 1684 else 1685 { 1686 Color* pColor; 1687 ScCellFormat::GetString(aCell, nNumFmt, aCellStr, &pColor, *pFormatter, pDoc); 1688 1689 bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 ); 1690 if( bMultiLineText ) 1691 { 1692 if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace ) 1693 aCellStr = aCellStr.replaceAll( "\n", " " ); 1694 else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF ) 1695 aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd()); 1696 } 1697 1698 if( mExportTextOptions.mcSeparatorConvertTo && cSep ) 1699 aCellStr = aCellStr.replaceAll( OUStringLiteral1(cSep), OUStringLiteral1(mExportTextOptions.mcSeparatorConvertTo) ); 1700 1701 if( mExportTextOptions.mbAddQuotes && ( aCellStr.indexOf( cSep ) != -1 ) ) 1702 lcl_WriteString( rStrm, aCellStr, cStr, cStr ); 1703 else 1704 lcl_WriteSimpleString( rStrm, aCellStr ); 1705 } 1706 } 1707 break; 1708 case CELLTYPE_VALUE: 1709 { 1710 Color* pColor; 1711 ScCellFormat::GetString(aCell, nNumFmt, aCellStr, &pColor, *pFormatter, pDoc); 1712 lcl_WriteSimpleString( rStrm, aCellStr ); 1713 } 1714 break; 1715 case CELLTYPE_NONE: 1716 break; 1717 default: 1718 { 1719 Color* pColor; 1720 ScCellFormat::GetString(aCell, nNumFmt, aCellStr, &pColor, *pFormatter, pDoc); 1721 1722 bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 ); 1723 if( bMultiLineText ) 1724 { 1725 if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace ) 1726 aCellStr = aCellStr.replaceAll( "\n", " " ); 1727 else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF ) 1728 aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd()); 1729 } 1730 1731 if( mExportTextOptions.mcSeparatorConvertTo && cSep ) 1732 aCellStr = aCellStr.replaceAll( OUStringLiteral1(cSep), OUStringLiteral1(mExportTextOptions.mcSeparatorConvertTo) ); 1733 1734 if( mExportTextOptions.mbAddQuotes && hasLineBreaksOrSeps(aCellStr, cSep) ) 1735 lcl_WriteString( rStrm, aCellStr, cStr, cStr ); 1736 else 1737 lcl_WriteSimpleString( rStrm, aCellStr ); 1738 } 1739 } 1740 if( nCol < nEndCol ) 1741 lcl_WriteSimpleString( rStrm, OUString(cSep) ); 1742 } 1743 WriteUnicodeOrByteEndl( rStrm ); 1744 if( rStrm.GetError() != ERRCODE_NONE ) 1745 break; 1746 if( nSizeLimit && rStrm.Tell() > nSizeLimit ) 1747 break; 1748 } 1749 } 1750 1751 return rStrm.GetError() == ERRCODE_NONE; 1752 } 1753 1754 bool ScImportExport::Sylk2Doc( SvStream& rStrm ) 1755 { 1756 bool bOk = true; 1757 bool bMyDoc = false; 1758 SylkVersion eVersion = SylkVersion::OTHER; 1759 1760 // US-English separators for StringToDouble 1761 sal_Unicode const cDecSep = '.'; 1762 sal_Unicode const cGrpSep = ','; 1763 1764 SCCOL nStartCol = aRange.aStart.Col(); 1765 SCROW nStartRow = aRange.aStart.Row(); 1766 SCCOL nEndCol = aRange.aEnd.Col(); 1767 SCROW nEndRow = aRange.aEnd.Row(); 1768 sal_uLong nOldPos = rStrm.Tell(); 1769 bool bData = !bSingle; 1770 ::std::vector< sal_uInt32 > aFormats; 1771 1772 if( !bSingle) 1773 bOk = StartPaste(); 1774 1775 while( bOk ) 1776 { 1777 OUString aLine; 1778 OUString aText; 1779 OString aByteLine; 1780 SCCOL nCol = nStartCol; 1781 SCROW nRow = nStartRow; 1782 SCCOL nRefCol = nCol; 1783 SCROW nRefRow = nRow; 1784 rStrm.Seek( nOldPos ); 1785 for( ;; ) 1786 { 1787 //! allow unicode 1788 rStrm.ReadLine( aByteLine ); 1789 aLine = OStringToOUString(aByteLine, rStrm.GetStreamCharSet()); 1790 if( rStrm.eof() ) 1791 break; 1792 bool bInvalidCol = false; 1793 bool bInvalidRow = false; 1794 const sal_Unicode* p = aLine.getStr(); 1795 sal_Unicode cTag = *p++; 1796 if( cTag == 'C' ) // Content 1797 { 1798 if( *p++ != ';' ) 1799 return false; 1800 1801 bool bInvalidRefCol = false; 1802 bool bInvalidRefRow = false; 1803 while( *p ) 1804 { 1805 sal_Unicode ch = *p++; 1806 ch = ScGlobal::ToUpperAlpha( ch ); 1807 switch( ch ) 1808 { 1809 case 'X': 1810 { 1811 bInvalidCol = false; 1812 bool bFail = o3tl::checked_add<SCCOL>(OUString(p).toInt32(), nStartCol - 1, nCol); 1813 if (bFail || nCol < 0 || MAXCOL < nCol) 1814 { 1815 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol); 1816 nCol = std::max<SCCOL>(0, std::min<SCCOL>(nCol, MAXCOL)); 1817 bInvalidCol = bOverflowCol = true; 1818 } 1819 break; 1820 } 1821 case 'Y': 1822 { 1823 bInvalidRow = false; 1824 bool bFail = o3tl::checked_add(OUString(p).toInt32(), nStartRow - 1, nRow); 1825 if (bFail || nRow < 0 || nMaxImportRow < nRow) 1826 { 1827 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow); 1828 nRow = std::max<SCROW>(0, std::min<SCROW>(nRow, nMaxImportRow)); 1829 bInvalidRow = bOverflowRow = true; 1830 } 1831 break; 1832 } 1833 case 'C': 1834 { 1835 bInvalidRefCol = false; 1836 bool bFail = o3tl::checked_add<SCCOL>(OUString(p).toInt32(), nStartCol - 1, nRefCol); 1837 if (bFail || nRefCol < 0 || MAXCOL < nRefCol) 1838 { 1839 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;C invalid nRefCol=" << nRefCol); 1840 nRefCol = std::max<SCCOL>(0, std::min<SCCOL>(nRefCol, MAXCOL)); 1841 bInvalidRefCol = bOverflowCol = true; 1842 } 1843 break; 1844 } 1845 case 'R': 1846 { 1847 bInvalidRefRow = false; 1848 bool bFail = o3tl::checked_add(OUString(p).toInt32(), nStartRow - 1, nRefRow); 1849 if (bFail || nRefRow < 0 || nMaxImportRow < nRefRow) 1850 { 1851 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;R invalid nRefRow=" << nRefRow); 1852 nRefRow = std::max<SCROW>(0, std::min<SCROW>(nRefRow, nMaxImportRow)); 1853 bInvalidRefRow = bOverflowRow = true; 1854 } 1855 break; 1856 } 1857 case 'K': 1858 { 1859 if( !bSingle && 1860 ( nCol < nStartCol || nCol > nEndCol 1861 || nRow < nStartRow || nRow > nEndRow 1862 || nCol > MAXCOL || nRow > nMaxImportRow 1863 || bInvalidCol || bInvalidRow ) ) 1864 break; 1865 if( !bData ) 1866 { 1867 if( nRow > nEndRow ) 1868 nEndRow = nRow; 1869 if( nCol > nEndCol ) 1870 nEndCol = nCol; 1871 break; 1872 } 1873 bool bText; 1874 if( *p == '"' ) 1875 { 1876 bText = true; 1877 aText.clear(); 1878 p = lcl_ScanSylkString( p, aText, eVersion); 1879 } 1880 else 1881 bText = false; 1882 const sal_Unicode* q = p; 1883 while( *q && *q != ';' ) 1884 q++; 1885 if ( !(*q == ';' && *(q+1) == 'I') && !bInvalidCol && !bInvalidRow ) 1886 { // don't ignore value 1887 if( bText ) 1888 { 1889 pDoc->EnsureTable(aRange.aStart.Tab()); 1890 pDoc->SetTextCell( 1891 ScAddress(nCol, nRow, aRange.aStart.Tab()), aText); 1892 } 1893 else 1894 { 1895 double fVal = rtl_math_uStringToDouble( p, 1896 aLine.getStr() + aLine.getLength(), 1897 cDecSep, cGrpSep, nullptr, nullptr ); 1898 pDoc->SetValue( nCol, nRow, aRange.aStart.Tab(), fVal ); 1899 } 1900 } 1901 } 1902 break; 1903 case 'E': 1904 case 'M': 1905 { 1906 if ( ch == 'M' ) 1907 { 1908 if ( nRefCol < nCol ) 1909 nRefCol = nCol; 1910 if ( nRefRow < nRow ) 1911 nRefRow = nRow; 1912 if ( !bData ) 1913 { 1914 if( nRefRow > nEndRow ) 1915 nEndRow = nRefRow; 1916 if( nRefCol > nEndCol ) 1917 nEndCol = nRefCol; 1918 } 1919 } 1920 if( !bMyDoc || !bData ) 1921 break; 1922 aText = "="; 1923 p = lcl_ScanSylkFormula( p, aText, eVersion); 1924 1925 if (bInvalidCol || bInvalidRow || (ch == 'M' && (bInvalidRefCol || bInvalidRefRow))) 1926 break; 1927 1928 ScAddress aPos( nCol, nRow, aRange.aStart.Tab() ); 1929 /* FIXME: do we want GRAM_ODFF_A1 instead? At the 1930 * end it probably should be GRAM_ODFF_R1C1, since 1931 * R1C1 is what Excel writes in SYLK, or even 1932 * better GRAM_ENGLISH_XL_R1C1. */ 1933 const formula::FormulaGrammar::Grammar eGrammar = formula::FormulaGrammar::GRAM_PODF_A1; 1934 ScCompiler aComp( pDoc, aPos, eGrammar); 1935 std::unique_ptr<ScTokenArray> xCode(aComp.CompileString(aText)); // ctor/InsertMatrixFormula did copy TokenArray 1936 pDoc->CheckLinkFormulaNeedingCheck(*xCode); 1937 if ( ch == 'M' ) 1938 { 1939 ScMarkData aMark; 1940 aMark.SelectTable( aPos.Tab(), true ); 1941 pDoc->InsertMatrixFormula( nCol, nRow, nRefCol, 1942 nRefRow, aMark, EMPTY_OUSTRING, xCode.get() ); 1943 } 1944 else 1945 { 1946 ScFormulaCell* pFCell = new ScFormulaCell( 1947 pDoc, aPos, *xCode, eGrammar, ScMatrixMode::NONE); 1948 pDoc->SetFormulaCell(aPos, pFCell); 1949 } 1950 } 1951 break; 1952 } 1953 while( *p && *p != ';' ) 1954 p++; 1955 if( *p ) 1956 p++; 1957 } 1958 } 1959 else if( cTag == 'F' ) // Format 1960 { 1961 if( *p++ != ';' ) 1962 return false; 1963 sal_Int32 nFormat = -1; 1964 while( *p ) 1965 { 1966 sal_Unicode ch = *p++; 1967 ch = ScGlobal::ToUpperAlpha( ch ); 1968 switch( ch ) 1969 { 1970 case 'X': 1971 { 1972 bInvalidCol = false; 1973 bool bFail = o3tl::checked_add<SCCOL>(OUString(p).toInt32(), nStartCol - 1, nCol); 1974 if (bFail || nCol < 0 || MAXCOL < nCol) 1975 { 1976 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol); 1977 nCol = std::max<SCCOL>(0, std::min<SCCOL>(nCol, MAXCOL)); 1978 bInvalidCol = bOverflowCol = true; 1979 } 1980 break; 1981 } 1982 case 'Y': 1983 { 1984 bInvalidRow = false; 1985 bool bFail = o3tl::checked_add(OUString(p).toInt32(), nStartRow - 1, nRow); 1986 if (bFail || nRow < 0 || nMaxImportRow < nRow) 1987 { 1988 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow); 1989 nRow = std::max<SCROW>(0, std::min<SCROW>(nRow, nMaxImportRow)); 1990 bInvalidRow = bOverflowRow = true; 1991 } 1992 break; 1993 } 1994 case 'P' : 1995 if ( bData ) 1996 { 1997 // F;P<n> sets format code of P;P<code> at 1998 // current position, or at ;X;Y if specified. 1999 // Note that ;X;Y may appear after ;P 2000 const sal_Unicode* p0 = p; 2001 while( *p && *p != ';' ) 2002 p++; 2003 OUString aNumber(p0, p - p0); 2004 nFormat = aNumber.toInt32(); 2005 } 2006 break; 2007 } 2008 while( *p && *p != ';' ) 2009 p++; 2010 if( *p ) 2011 p++; 2012 } 2013 if ( !bData ) 2014 { 2015 if( nRow > nEndRow ) 2016 nEndRow = nRow; 2017 if( nCol > nEndCol ) 2018 nEndCol = nCol; 2019 } 2020 if ( 0 <= nFormat && nFormat < static_cast<sal_Int32>(aFormats.size()) && !bInvalidCol && !bInvalidRow ) 2021 { 2022 sal_uInt32 nKey = aFormats[nFormat]; 2023 pDoc->ApplyAttr( nCol, nRow, aRange.aStart.Tab(), 2024 SfxUInt32Item( ATTR_VALUE_FORMAT, nKey ) ); 2025 } 2026 } 2027 else if( cTag == 'P' ) 2028 { 2029 if ( bData && *p == ';' && *(p+1) == 'P' ) 2030 { 2031 OUString aCode( p+2 ); 2032 // unescape doubled semicolons 2033 aCode = aCode.replaceAll(";;", ";"); 2034 // get rid of Xcl escape characters 2035 aCode = aCode.replaceAll("\x1b", ""); 2036 sal_Int32 nCheckPos; 2037 SvNumFormatType nType; 2038 sal_uInt32 nKey; 2039 pDoc->GetFormatTable()->PutandConvertEntry( aCode, nCheckPos, nType, nKey, 2040 LANGUAGE_ENGLISH_US, ScGlobal::eLnge, false); 2041 if ( nCheckPos ) 2042 nKey = 0; 2043 aFormats.push_back( nKey ); 2044 } 2045 } 2046 else if( cTag == 'I' && *p == 'D' ) 2047 { 2048 aLine = aLine.copy(4); 2049 if (aLine == "CALCOOO32") 2050 eVersion = SylkVersion::OOO32; 2051 else if (aLine == "SCALC3") 2052 eVersion = SylkVersion::SCALC3; 2053 bMyDoc = (eVersion <= SylkVersion::OWN); 2054 } 2055 else if( cTag == 'E' ) // End 2056 break; 2057 } 2058 if( !bData ) 2059 { 2060 aRange.aEnd.SetCol( nEndCol ); 2061 aRange.aEnd.SetRow( nEndRow ); 2062 bOk = StartPaste(); 2063 bData = true; 2064 } 2065 else 2066 break; 2067 } 2068 2069 EndPaste(); 2070 return bOk; 2071 } 2072 2073 bool ScImportExport::Doc2Sylk( SvStream& rStrm ) 2074 { 2075 SCCOL nCol; 2076 SCROW nRow; 2077 SCCOL nStartCol = aRange.aStart.Col(); 2078 SCROW nStartRow = aRange.aStart.Row(); 2079 SCCOL nEndCol = aRange.aEnd.Col(); 2080 SCROW nEndRow = aRange.aEnd.Row(); 2081 OUString aCellStr; 2082 OUString aValStr; 2083 lcl_WriteSimpleString( rStrm, "ID;PCALCOOO32" ); 2084 WriteUnicodeOrByteEndl( rStrm ); 2085 2086 for (nRow = nStartRow; nRow <= nEndRow; nRow++) 2087 { 2088 for (nCol = nStartCol; nCol <= nEndCol; nCol++) 2089 { 2090 OUString aBufStr; 2091 double nVal; 2092 bool bForm = false; 2093 SCROW r = nRow - nStartRow + 1; 2094 SCCOL c = nCol - nStartCol + 1; 2095 ScRefCellValue aCell(*pDoc, ScAddress(nCol, nRow, aRange.aStart.Tab())); 2096 CellType eType = aCell.meType; 2097 switch( eType ) 2098 { 2099 case CELLTYPE_FORMULA: 2100 bForm = bFormulas; 2101 if( pDoc->HasValueData( nCol, nRow, aRange.aStart.Tab()) ) 2102 goto hasvalue; 2103 else 2104 goto hasstring; 2105 2106 case CELLTYPE_VALUE: 2107 hasvalue: 2108 pDoc->GetValue( nCol, nRow, aRange.aStart.Tab(), nVal ); 2109 2110 aValStr = ::rtl::math::doubleToUString( nVal, 2111 rtl_math_StringFormat_Automatic, 2112 rtl_math_DecimalPlaces_Max, '.', true ); 2113 2114 aBufStr = "C;X" 2115 + OUString::number( c ) 2116 + ";Y" 2117 + OUString::number( r ) 2118 + ";K" 2119 + aValStr; 2120 lcl_WriteSimpleString( rStrm, aBufStr ); 2121 goto checkformula; 2122 2123 case CELLTYPE_STRING: 2124 case CELLTYPE_EDIT: 2125 hasstring: 2126 aCellStr = pDoc->GetString(nCol, nRow, aRange.aStart.Tab()); 2127 aCellStr = aCellStr.replaceAll("\n", SYLK_LF); 2128 2129 aBufStr = "C;X" 2130 + OUString::number( c ) 2131 + ";Y" 2132 + OUString::number( r ) 2133 + ";K"; 2134 lcl_WriteSimpleString( rStrm, aBufStr ); 2135 lcl_WriteString( rStrm, aCellStr, '"', ';' ); 2136 2137 checkformula: 2138 if( bForm ) 2139 { 2140 const ScFormulaCell* pFCell = aCell.mpFormula; 2141 switch ( pFCell->GetMatrixFlag() ) 2142 { 2143 case ScMatrixMode::Reference : 2144 aCellStr.clear(); 2145 break; 2146 default: 2147 OUString aOUCellStr; 2148 pFCell->GetFormula( aOUCellStr,formula::FormulaGrammar::GRAM_PODF_A1); 2149 aCellStr = aOUCellStr; 2150 /* FIXME: do we want GRAM_ODFF_A1 instead? At 2151 * the end it probably should be 2152 * GRAM_ODFF_R1C1, since R1C1 is what Excel 2153 * writes in SYLK, or even better 2154 * GRAM_ENGLISH_XL_R1C1. */ 2155 } 2156 if ( pFCell->GetMatrixFlag() != ScMatrixMode::NONE && 2157 aCellStr.startsWith("{") && 2158 aCellStr.endsWith("}") ) 2159 { // cut off matrix {} characters 2160 aCellStr = aCellStr.copy(1, aCellStr.getLength()-2); 2161 } 2162 if ( aCellStr[0] == '=' ) 2163 aCellStr = aCellStr.copy(1); 2164 OUString aPrefix; 2165 switch ( pFCell->GetMatrixFlag() ) 2166 { 2167 case ScMatrixMode::Formula : 2168 { // diff expression with 'M' M$-extension 2169 SCCOL nC; 2170 SCROW nR; 2171 pFCell->GetMatColsRows( nC, nR ); 2172 nC += c - 1; 2173 nR += r - 1; 2174 aPrefix = ";R" 2175 + OUString::number( nR ) 2176 + ";C" 2177 + OUString::number( nC ) 2178 + ";M"; 2179 } 2180 break; 2181 case ScMatrixMode::Reference : 2182 { // diff expression with 'I' M$-extension 2183 ScAddress aPos; 2184 (void)pFCell->GetMatrixOrigin( aPos ); 2185 aPrefix = ";I;R" 2186 + OUString::number( aPos.Row() - nStartRow + 1 ) 2187 + ";C" 2188 + OUString::number( aPos.Col() - nStartCol + 1 ); 2189 } 2190 break; 2191 default: 2192 // formula Expression 2193 aPrefix = ";E"; 2194 } 2195 lcl_WriteSimpleString( rStrm, aPrefix ); 2196 if ( !aCellStr.isEmpty() ) 2197 lcl_WriteString( rStrm, aCellStr, 0, ';' ); 2198 } 2199 WriteUnicodeOrByteEndl( rStrm ); 2200 break; 2201 2202 default: 2203 { 2204 // added to avoid warnings 2205 } 2206 } 2207 } 2208 } 2209 lcl_WriteSimpleString( rStrm, OUString( 'E' ) ); 2210 WriteUnicodeOrByteEndl( rStrm ); 2211 return rStrm.GetError() == ERRCODE_NONE; 2212 } 2213 2214 bool ScImportExport::Doc2HTML( SvStream& rStrm, const OUString& rBaseURL ) 2215 { 2216 // rtl_TextEncoding is ignored in ScExportHTML, read from Load/Save HTML options 2217 ScFormatFilter::Get().ScExportHTML( rStrm, rBaseURL, pDoc, aRange, RTL_TEXTENCODING_DONTKNOW, bAll, 2218 aStreamPath, aNonConvertibleChars, maFilterOptions ); 2219 return rStrm.GetError() == ERRCODE_NONE; 2220 } 2221 2222 bool ScImportExport::Doc2RTF( SvStream& rStrm ) 2223 { 2224 // rtl_TextEncoding is ignored in ScExportRTF 2225 ScFormatFilter::Get().ScExportRTF( rStrm, pDoc, aRange, RTL_TEXTENCODING_DONTKNOW ); 2226 return rStrm.GetError() == ERRCODE_NONE; 2227 } 2228 2229 bool ScImportExport::Doc2Dif( SvStream& rStrm ) 2230 { 2231 // for DIF in the clipboard, IBM_850 is always used 2232 ScFormatFilter::Get().ScExportDif( rStrm, pDoc, aRange, RTL_TEXTENCODING_IBM_850 ); 2233 return true; 2234 } 2235 2236 bool ScImportExport::Dif2Doc( SvStream& rStrm ) 2237 { 2238 SCTAB nTab = aRange.aStart.Tab(); 2239 ScDocumentUniquePtr pImportDoc( new ScDocument( SCDOCMODE_UNDO ) ); 2240 pImportDoc->InitUndo( pDoc, nTab, nTab ); 2241 2242 // for DIF in the clipboard, IBM_850 is always used 2243 ScFormatFilter::Get().ScImportDif( rStrm, pImportDoc.get(), aRange.aStart, RTL_TEXTENCODING_IBM_850 ); 2244 2245 SCCOL nEndCol; 2246 SCROW nEndRow; 2247 pImportDoc->GetCellArea( nTab, nEndCol, nEndRow ); 2248 // if there are no cells in the imported content, nEndCol/nEndRow may be before the start 2249 if ( nEndCol < aRange.aStart.Col() ) 2250 nEndCol = aRange.aStart.Col(); 2251 if ( nEndRow < aRange.aStart.Row() ) 2252 nEndRow = aRange.aStart.Row(); 2253 aRange.aEnd = ScAddress( nEndCol, nEndRow, nTab ); 2254 2255 bool bOk = StartPaste(); 2256 if (bOk) 2257 { 2258 InsertDeleteFlags nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES; 2259 pDoc->DeleteAreaTab( aRange, nFlags ); 2260 pImportDoc->CopyToDocument(aRange, nFlags, false, *pDoc); 2261 EndPaste(); 2262 } 2263 2264 return bOk; 2265 } 2266 2267 bool ScImportExport::RTF2Doc( SvStream& rStrm, const OUString& rBaseURL ) 2268 { 2269 std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateRTFImport( pDoc, aRange ); 2270 if (!pImp) 2271 return false; 2272 pImp->Read( rStrm, rBaseURL ); 2273 aRange = pImp->GetRange(); 2274 2275 bool bOk = StartPaste(); 2276 if (bOk) 2277 { 2278 InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES; 2279 pDoc->DeleteAreaTab( aRange, nFlags ); 2280 pImp->WriteToDocument(); 2281 EndPaste(); 2282 } 2283 return bOk; 2284 } 2285 2286 bool ScImportExport::HTML2Doc( SvStream& rStrm, const OUString& rBaseURL ) 2287 { 2288 std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateHTMLImport( pDoc, rBaseURL, aRange); 2289 if (!pImp) 2290 return false; 2291 pImp->Read( rStrm, rBaseURL ); 2292 aRange = pImp->GetRange(); 2293 2294 bool bOk = StartPaste(); 2295 if (bOk) 2296 { 2297 // ScHTMLImport may call ScDocument::InitDrawLayer, resulting in 2298 // a Draw Layer but no Draw View -> create Draw Layer and View here 2299 if (pDocSh) 2300 pDocSh->MakeDrawLayer(); 2301 2302 InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES; 2303 pDoc->DeleteAreaTab( aRange, nFlags ); 2304 2305 if (pExtOptions) 2306 { 2307 // Pick up import options if available. 2308 LanguageType eLang = pExtOptions->GetLanguage(); 2309 SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eLang); 2310 bool bSpecialNumber = pExtOptions->IsDetectSpecialNumber(); 2311 pImp->WriteToDocument(false, 1.0, &aNumFormatter, bSpecialNumber); 2312 } 2313 else 2314 // Regular import, with no options. 2315 pImp->WriteToDocument(); 2316 2317 EndPaste(); 2318 } 2319 return bOk; 2320 } 2321 2322 #ifndef DISABLE_DYNLOADING 2323 2324 extern "C" { static void thisModule() {} } 2325 2326 #else 2327 2328 extern "C" { 2329 ScFormatFilterPlugin* ScFilterCreate(); 2330 } 2331 2332 #endif 2333 2334 typedef ScFormatFilterPlugin * (*FilterFn)(); 2335 ScFormatFilterPlugin &ScFormatFilter::Get() 2336 { 2337 static ScFormatFilterPlugin *plugin = [&]() 2338 { 2339 #ifndef DISABLE_DYNLOADING 2340 OUString sFilterLib(SVLIBRARY("scfilt")); 2341 static ::osl::Module aModule; 2342 bool bLoaded = aModule.is(); 2343 if (!bLoaded) 2344 bLoaded = aModule.loadRelative(&thisModule, sFilterLib); 2345 if (!bLoaded) 2346 bLoaded = aModule.load(sFilterLib); 2347 if (bLoaded) 2348 { 2349 oslGenericFunction fn = aModule.getFunctionSymbol( "ScFilterCreate" ); 2350 if (fn != nullptr) 2351 return reinterpret_cast<FilterFn>(fn)(); 2352 } 2353 assert(false); 2354 return static_cast<ScFormatFilterPlugin*>(nullptr); 2355 #else 2356 return ScFilterCreate(); 2357 #endif 2358 }(); 2359 2360 return *plugin; 2361 } 2362 2363 // Precondition: pStr is guaranteed to be non-NULL and points to a 0-terminated 2364 // array. 2365 static const sal_Unicode* lcl_UnicodeStrChr( const sal_Unicode* pStr, 2366 sal_Unicode c ) 2367 { 2368 while (*pStr) 2369 { 2370 if (*pStr == c) 2371 return pStr; 2372 ++pStr; 2373 } 2374 return nullptr; 2375 } 2376 2377 ScImportStringStream::ScImportStringStream( const OUString& rStr ) 2378 : SvMemoryStream( const_cast<sal_Unicode *>(rStr.getStr()), 2379 rStr.getLength() * sizeof(sal_Unicode), StreamMode::READ) 2380 { 2381 SetStreamCharSet( RTL_TEXTENCODING_UNICODE ); 2382 #ifdef OSL_BIGENDIAN 2383 SetEndian(SvStreamEndian::BIG); 2384 #else 2385 SetEndian(SvStreamEndian::LITTLE); 2386 #endif 2387 } 2388 2389 OUString ReadCsvLine( SvStream &rStream, bool bEmbeddedLineBreak, 2390 OUString& rFieldSeparators, sal_Unicode cFieldQuote, sal_Unicode& rcDetectSep ) 2391 { 2392 enum RetryState 2393 { 2394 FORBID, 2395 ALLOW, 2396 RETRY, 2397 RETRIED 2398 } eRetryState = (bEmbeddedLineBreak && rcDetectSep == 0 ? RetryState::ALLOW : RetryState::FORBID); 2399 2400 sal_uInt64 nStreamPos = (eRetryState == RetryState::ALLOW ? rStream.Tell() : 0); 2401 2402 Label_RetryWithNewSep: 2403 2404 if (eRetryState == RetryState::RETRY) 2405 { 2406 eRetryState = RetryState::RETRIED; 2407 rStream.Seek( nStreamPos); 2408 } 2409 2410 OUString aStr; 2411 rStream.ReadUniOrByteStringLine(aStr, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit); 2412 2413 if (bEmbeddedLineBreak) 2414 { 2415 const sal_Unicode* pSeps = rFieldSeparators.getStr(); 2416 2417 QuoteType eQuoteState = FIELDEND_QUOTE; 2418 bool bFieldStart = true; 2419 2420 sal_Int32 nLastOffset = 0; 2421 sal_Int32 nQuotes = 0; 2422 while (!rStream.eof() && aStr.getLength() < nArbitraryLineLengthLimit) 2423 { 2424 const sal_Unicode *p, *pStart; 2425 p = pStart = aStr.getStr(); 2426 p += nLastOffset; 2427 while (*p) 2428 { 2429 if (nQuotes) 2430 { 2431 if (*p == cFieldQuote) 2432 { 2433 if (bFieldStart) 2434 { 2435 ++nQuotes; 2436 bFieldStart = false; 2437 eQuoteState = FIELDSTART_QUOTE; 2438 } 2439 // Do not detect a FIELDSTART_QUOTE if not in 2440 // bFieldStart mode, in which case for unquoted content 2441 // we are in FIELDEND_QUOTE state. 2442 else if (eQuoteState != FIELDEND_QUOTE) 2443 { 2444 eQuoteState = lcl_isEscapedOrFieldEndQuote( nQuotes, p, pSeps, cFieldQuote, rcDetectSep); 2445 2446 if (eRetryState == RetryState::ALLOW && rcDetectSep == ' ') 2447 { 2448 eRetryState = RetryState::RETRY; 2449 rFieldSeparators += OUString(' '); 2450 goto Label_RetryWithNewSep; 2451 } 2452 2453 // DONTKNOW_QUOTE is an embedded unescaped quote we 2454 // don't count for pairing. 2455 if (eQuoteState != DONTKNOW_QUOTE) 2456 ++nQuotes; 2457 } 2458 } 2459 else if (eQuoteState == FIELDEND_QUOTE) 2460 { 2461 if (bFieldStart) 2462 // If blank is a separator it starts a field, if it 2463 // is not and thus maybe leading before quote we 2464 // are still at start of field regarding quotes. 2465 bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr); 2466 else 2467 bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr); 2468 } 2469 } 2470 else 2471 { 2472 if (*p == cFieldQuote && bFieldStart) 2473 { 2474 nQuotes = 1; 2475 eQuoteState = FIELDSTART_QUOTE; 2476 bFieldStart = false; 2477 } 2478 else if (eQuoteState == FIELDEND_QUOTE) 2479 { 2480 // This also skips leading blanks at beginning of line 2481 // if followed by a quote. It's debatable whether we 2482 // actually want that or not, but congruent with what 2483 // ScanNextFieldFromString() does. 2484 if (bFieldStart) 2485 bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr); 2486 else 2487 bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr); 2488 } 2489 } 2490 // A quote character inside a field content does not start 2491 // a quote. 2492 ++p; 2493 } 2494 2495 if (nQuotes % 2 == 0) 2496 // We still have a (theoretical?) problem here if due to 2497 // nArbitraryLineLengthLimit we split a string right between a 2498 // doubled quote pair. 2499 break; 2500 else 2501 { 2502 nLastOffset = aStr.getLength(); 2503 OUString aNext; 2504 rStream.ReadUniOrByteStringLine(aNext, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit); 2505 aStr += "\n" + aNext; 2506 } 2507 } 2508 } 2509 return aStr; 2510 } 2511 2512 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ 2513
