xref: /core/sc/source/ui/docshell/impex.cxx (revision 2b4094195)
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <comphelper/processfactory.hxx>
21 #include <i18nlangtag/languagetag.hxx>
22 #include <i18nutil/unicode.hxx>
23 #include <sot/formats.hxx>
24 #include <sfx2/mieclip.hxx>
25 #include <com/sun/star/i18n/CalendarFieldIndex.hpp>
26 #include <sal/log.hxx>
27 #include <unotools/charclass.hxx>
28 #include <osl/module.hxx>
29 #include <o3tl/string_view.hxx>
30 
31 #include <global.hxx>
32 #include <docsh.hxx>
33 #include <undoblk.hxx>
34 #include <rangenam.hxx>
35 #include <tabvwsh.hxx>
36 #include <filter.hxx>
37 #include <asciiopt.hxx>
38 #include <formulacell.hxx>
39 #include <cellform.hxx>
40 #include <progress.hxx>
41 #include <scitems.hxx>
42 #include <editable.hxx>
43 #include <compiler.hxx>
44 #include <warnbox.hxx>
45 #include <clipparam.hxx>
46 #include <impex.hxx>
47 #include <editutil.hxx>
48 #include <patattr.hxx>
49 #include <docpool.hxx>
50 #include <stringutil.hxx>
51 #include <cellvalue.hxx>
52 #include <tokenarray.hxx>
53 #include <documentimport.hxx>
54 #include <refundo.hxx>
55 #include <mtvelements.hxx>
56 
57 #include <globstr.hrc>
58 #include <scresid.hxx>
59 #include <o3tl/safeint.hxx>
60 #include <tools/svlibrary.h>
61 #include <unotools/configmgr.hxx>
62 #include <vcl/svapp.hxx>
63 #include <vcl/weld.hxx>
64 #include <editeng/editobj.hxx>
65 #include <svl/numformat.hxx>
66 #include <rtl/character.hxx>
67 #include <rtl/math.hxx>
68 #include <sax/tools/converter.hxx>
69 
70 #include <memory>
71 #include <string_view>
72 
73 #include <unicode/uchar.h>
74 
75 #include <osl/endian.h>
76 
77 // We don't want to end up with 2GB read in one line just because of malformed
78 // multiline fields, so chop it _somewhere_, which is twice supported columns
79 // times arbitrary maximum cell content length, 2*1024*64K=128M, and because
80 // it's sal_Unicode that's 256MB. If it's 2GB of data without LF we're out of
81 // luck anyway.
82 constexpr sal_Int32 nArbitraryCellLengthLimit = SAL_MAX_UINT16;
83 constexpr sal_Int32 nArbitraryLineLengthLimit = 2 * MAXCOLCOUNT * nArbitraryCellLengthLimit;
84 
85 namespace
86 {
87     const char SYLK_LF[]  = "\x1b :";
88 }
89 
90 namespace {
91 
92 enum class SylkVersion
93 {
94     SCALC3,    // Wrote wrongly quoted strings and unescaped semicolons.
95     OOO32,     // Correct strings, plus multiline content.
96     OWN,       // Place our new versions, if any, before this value.
97     OTHER      // Assume that aliens wrote correct strings.
98 };
99 
100 }
101 
102 // Whole document without Undo
103 ScImportExport::ScImportExport( ScDocument& r )
104     : pDocSh( r.GetDocumentShell() ), rDoc( r ),
105       nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
106       cSep( '\t' ), cStr( '"' ),
107       bFormulas( false ), bIncludeFiltered( true ),
108       bAll( true ), bSingle( true ), bUndo( false ),
109       bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
110       mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
111 {
112     pUndoDoc = nullptr;
113     pExtOptions = nullptr;
114 }
115 
116 // Insert am current cell without range(es)
117 ScImportExport::ScImportExport( ScDocument& r, const ScAddress& rPt )
118     : pDocSh( r.GetDocumentShell() ), rDoc( r ),
119       aRange( rPt ),
120       nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
121       cSep( '\t' ), cStr( '"' ),
122       bFormulas( false ), bIncludeFiltered( true ),
123       bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ),
124       bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
125       mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
126 {
127     pUndoDoc = nullptr;
128     pExtOptions = nullptr;
129 }
130 
131 //  ctor with a range is only used for export
132 //! ctor with a string (and bSingle=true) is also used for DdeSetData
133 ScImportExport::ScImportExport( ScDocument& r, const ScRange& rRange )
134     : pDocSh( r.GetDocumentShell() ), rDoc( r ),
135       aRange( rRange ),
136       nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
137       cSep( '\t' ), cStr( '"' ),
138       bFormulas( false ), bIncludeFiltered( true ),
139       bAll( false ), bSingle( false ), bUndo( pDocSh != nullptr ),
140       bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
141       mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
142 {
143     pUndoDoc = nullptr;
144     pExtOptions = nullptr;
145     // Only one sheet (table) supported
146     aRange.aEnd.SetTab( aRange.aStart.Tab() );
147 }
148 
149 // Evaluate input string - either range, cell or the whole document (when error)
150 // If a View exists, the TabNo of the view will be used.
151 ScImportExport::ScImportExport( ScDocument& r, const OUString& rPos )
152     : pDocSh( r.GetDocumentShell() ), rDoc( r ),
153       nSizeLimit( 0 ), nMaxImportRow(!utl::ConfigManager::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
154       cSep( '\t' ), cStr( '"' ),
155       bFormulas( false ), bIncludeFiltered( true ),
156       bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ),
157       bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
158       mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
159 {
160     pUndoDoc = nullptr;
161     pExtOptions = nullptr;
162 
163     SCTAB nTab = ScDocShell::GetCurTab();
164     aRange.aStart.SetTab( nTab );
165     OUString aPos( rPos );
166     // Named range?
167     ScRangeName* pRange = rDoc.GetRangeName();
168     if (pRange)
169     {
170         const ScRangeData* pData = pRange->findByUpperName(ScGlobal::getCharClass().uppercase(aPos));
171         if (pData)
172         {
173             if( pData->HasType( ScRangeData::Type::RefArea )
174                 || pData->HasType( ScRangeData::Type::AbsArea )
175                 || pData->HasType( ScRangeData::Type::AbsPos ) )
176             {
177                 aPos = pData->GetSymbol();
178             }
179         }
180     }
181     formula::FormulaGrammar::AddressConvention eConv = rDoc.GetAddressConvention();
182     // Range?
183     if (aRange.Parse(aPos, rDoc, eConv) & ScRefFlags::VALID)
184         bSingle = false;
185     // Cell?
186     else if (aRange.aStart.Parse(aPos, rDoc, eConv) & ScRefFlags::VALID)
187         aRange.aEnd = aRange.aStart;
188     else
189         bAll = true;
190 }
191 
192 ScImportExport::~ScImportExport() COVERITY_NOEXCEPT_FALSE
193 {
194     pUndoDoc.reset();
195     pExtOptions.reset();
196 }
197 
198 void ScImportExport::SetExtOptions( const ScAsciiOptions& rOpt )
199 {
200     if ( pExtOptions )
201         *pExtOptions = rOpt;
202     else
203         pExtOptions.reset(new ScAsciiOptions( rOpt ));
204 
205     //  "normal" Options
206 
207     cSep = ScAsciiOptions::GetWeightedFieldSep( rOpt.GetFieldSeps(), false);
208     cStr = rOpt.GetTextSep();
209 }
210 
211 void ScImportExport::SetFilterOptions(const OUString& rFilterOptions)
212 {
213     maFilterOptions = rFilterOptions;
214 }
215 
216 bool ScImportExport::IsFormatSupported( SotClipboardFormatId nFormat )
217 {
218     return nFormat == SotClipboardFormatId::STRING
219               || nFormat == SotClipboardFormatId::STRING_TSVC
220               || nFormat == SotClipboardFormatId::SYLK
221               || nFormat == SotClipboardFormatId::LINK
222               || nFormat == SotClipboardFormatId::HTML
223               || nFormat == SotClipboardFormatId::HTML_SIMPLE
224               || nFormat == SotClipboardFormatId::DIF;
225 }
226 
227 // Prepare for Undo
228 bool ScImportExport::StartPaste()
229 {
230     if ( !bAll )
231     {
232         ScEditableTester aTester( rDoc, aRange );
233         if ( !aTester.IsEditable() )
234         {
235             std::unique_ptr<weld::MessageDialog> xInfoBox(Application::CreateMessageDialog(ScDocShell::GetActiveDialogParent(),
236                                                           VclMessageType::Info, VclButtonsType::Ok,
237                                                           ScResId(aTester.GetMessageId())));
238             xInfoBox->run();
239             return false;
240         }
241     }
242     if( bUndo && pDocSh && rDoc.IsUndoEnabled())
243     {
244         pUndoDoc.reset(new ScDocument( SCDOCMODE_UNDO ));
245         pUndoDoc->InitUndo( rDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() );
246         rDoc.CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pUndoDoc);
247     }
248     return true;
249 }
250 
251 // Create Undo/Redo actions, Invalidate/Repaint
252 void ScImportExport::EndPaste(bool bAutoRowHeight)
253 {
254     bool bHeight = bAutoRowHeight && pDocSh && pDocSh->AdjustRowHeight(
255                     aRange.aStart.Row(), aRange.aEnd.Row(), aRange.aStart.Tab() );
256 
257     if( pUndoDoc && rDoc.IsUndoEnabled() && pDocSh )
258     {
259         ScDocumentUniquePtr pRedoDoc(new ScDocument( SCDOCMODE_UNDO ));
260         pRedoDoc->InitUndo( rDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() );
261         rDoc.CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pRedoDoc);
262         ScMarkData aDestMark(pRedoDoc->GetSheetLimits());
263         aDestMark.SetMarkArea(aRange);
264         pDocSh->GetUndoManager()->AddUndoAction(
265             std::make_unique<ScUndoPaste>(pDocSh, aRange, aDestMark, std::move(pUndoDoc), std::move(pRedoDoc), InsertDeleteFlags::ALL, nullptr));
266     }
267     pUndoDoc.reset();
268     if( pDocSh )
269     {
270         if (!bHeight)
271             pDocSh->PostPaint( aRange, PaintPartFlags::Grid );
272         pDocSh->SetDocumentModified();
273     }
274     ScTabViewShell* pViewSh = ScTabViewShell::GetActiveViewShell();
275     if ( pViewSh )
276         pViewSh->UpdateInputHandler();
277 
278 }
279 
280 bool ScImportExport::ExportData( std::u16string_view rMimeType,
281                                  css::uno::Any & rValue )
282 {
283     SvMemoryStream aStrm;
284     SotClipboardFormatId fmtId = SotExchange::GetFormatIdFromMimeType(rMimeType);
285     if (fmtId == SotClipboardFormatId::STRING)
286         aStrm.SetStreamCharSet(RTL_TEXTENCODING_UNICODE);
287     // mba: no BaseURL for data exchange
288     if (ExportStream(aStrm, OUString(), fmtId))
289     {
290         if (fmtId == SotClipboardFormatId::STRING)
291         {
292             assert(aStrm.TellEnd() % sizeof(sal_Unicode) == 0);
293             rValue <<= OUString(static_cast<const sal_Unicode*>(aStrm.GetData()),
294                                 aStrm.TellEnd() / sizeof(sal_Unicode));
295         }
296         else
297         {
298             aStrm.WriteUChar(0);
299             rValue <<= css::uno::Sequence<sal_Int8>(static_cast<sal_Int8 const*>(aStrm.GetData()),
300                                                     aStrm.TellEnd());
301         }
302         return true;
303     }
304     return false;
305 }
306 
307 bool ScImportExport::ImportString( const OUString& rText, SotClipboardFormatId nFmt )
308 {
309     switch ( nFmt )
310     {
311         // formats supporting unicode
312         case SotClipboardFormatId::STRING :
313         case SotClipboardFormatId::STRING_TSVC :
314         {
315             ScImportStringStream aStrm( rText);
316             return ImportStream( aStrm, OUString(), nFmt );
317             // ImportStream must handle RTL_TEXTENCODING_UNICODE
318         }
319         default:
320         {
321             rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
322             OString aTmp( rText.getStr(), rText.getLength(), eEnc );
323             SvMemoryStream aStrm( const_cast<char *>(aTmp.getStr()), aTmp.getLength() * sizeof(char), StreamMode::READ );
324             aStrm.SetStreamCharSet( eEnc );
325             SetNoEndianSwap( aStrm );       //! no swapping in memory
326             return ImportStream( aStrm, OUString(), nFmt );
327         }
328     }
329 }
330 
331 bool ScImportExport::ExportString( OUString& rText, SotClipboardFormatId nFmt )
332 {
333     if ( nFmt != SotClipboardFormatId::STRING && nFmt != SotClipboardFormatId::STRING_TSVC )
334     {
335         SAL_WARN("sc.ui", "ScImportExport::ExportString: Unicode not supported for other formats than SotClipboardFormatId::STRING[_TSV]");
336         rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
337         OString aTmp;
338         bool bOk = ExportByteString( aTmp, eEnc, nFmt );
339         rText = OStringToOUString( aTmp, eEnc );
340         return bOk;
341     }
342     //  nSizeLimit not needed for OUString
343 
344     SvMemoryStream aStrm;
345     aStrm.SetStreamCharSet( RTL_TEXTENCODING_UNICODE );
346     SetNoEndianSwap( aStrm );       //! no swapping in memory
347     // mba: no BaseURL for data exc
348     if( ExportStream( aStrm, OUString(), nFmt ) )
349     {
350         aStrm.WriteUInt16( 0 );
351         rText = OUString( static_cast<const sal_Unicode*>(aStrm.GetData()) );
352         return true;
353     }
354     rText.clear();
355     return false;
356 
357     // ExportStream must handle RTL_TEXTENCODING_UNICODE
358 }
359 
360 bool ScImportExport::ExportByteString( OString& rText, rtl_TextEncoding eEnc, SotClipboardFormatId nFmt )
361 {
362     OSL_ENSURE( eEnc != RTL_TEXTENCODING_UNICODE, "ScImportExport::ExportByteString: Unicode not supported" );
363     if ( eEnc == RTL_TEXTENCODING_UNICODE )
364         eEnc = osl_getThreadTextEncoding();
365 
366     if (!nSizeLimit)
367         nSizeLimit = SAL_MAX_UINT16;
368 
369     SvMemoryStream aStrm;
370     aStrm.SetStreamCharSet( eEnc );
371     SetNoEndianSwap( aStrm );       //! no swapping in memory
372     // mba: no BaseURL for data exchange
373     if( ExportStream( aStrm, OUString(), nFmt ) )
374     {
375         aStrm.WriteChar( 0 );
376         if( aStrm.TellEnd() <= nSizeLimit )
377         {
378             rText = static_cast<const char*>(aStrm.GetData());
379             return true;
380         }
381     }
382     rText.clear();
383     return false;
384 }
385 
386 bool ScImportExport::ImportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt )
387 {
388     if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC )
389     {
390         if( ExtText2Doc( rStrm ) )      // evaluate pExtOptions
391             return true;
392     }
393     if( nFmt == SotClipboardFormatId::SYLK )
394     {
395         if( Sylk2Doc( rStrm ) )
396             return true;
397     }
398     if( nFmt == SotClipboardFormatId::DIF )
399     {
400         if( Dif2Doc( rStrm ) )
401             return true;
402     }
403     if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT )
404     {
405         if( RTF2Doc( rStrm, rBaseURL ) )
406             return true;
407     }
408     if( nFmt == SotClipboardFormatId::LINK )
409         return true;            // Link-Import?
410     if ( nFmt == SotClipboardFormatId::HTML )
411     {
412         if( HTML2Doc( rStrm, rBaseURL ) )
413             return true;
414     }
415     if ( nFmt == SotClipboardFormatId::HTML_SIMPLE )
416     {
417         MSE40HTMLClipFormatObj aMSE40ClpObj;                // needed to skip the header data
418         SvStream* pHTML = aMSE40ClpObj.IsValid( rStrm );
419         if ( pHTML && HTML2Doc( *pHTML, rBaseURL ) )
420             return true;
421     }
422 
423     return false;
424 }
425 
426 bool ScImportExport::ExportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt )
427 {
428     if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC )
429     {
430         if( Doc2Text( rStrm ) )
431             return true;
432     }
433     if( nFmt == SotClipboardFormatId::SYLK )
434     {
435         if( Doc2Sylk( rStrm ) )
436             return true;
437     }
438     if( nFmt == SotClipboardFormatId::DIF )
439     {
440         if( Doc2Dif( rStrm ) )
441             return true;
442     }
443     if( nFmt == SotClipboardFormatId::LINK && !bAll )
444     {
445         OUString aDocName;
446         if ( rDoc.IsClipboard() )
447             aDocName = ScGlobal::GetClipDocName();
448         else
449         {
450             ScDocShell* pShell = rDoc.GetDocumentShell();
451             if (pShell)
452                 aDocName = pShell->GetTitle( SFX_TITLE_FULLNAME );
453         }
454 
455         OSL_ENSURE( !aDocName.isEmpty(), "ClipBoard document has no name! :-/" );
456         if( !aDocName.isEmpty() )
457         {
458             // Always use Calc A1 syntax for paste link.
459             OUString aRefName;
460             ScRefFlags nFlags = ScRefFlags::VALID | ScRefFlags::TAB_3D;
461             if( bSingle )
462                 aRefName = aRange.aStart.Format(nFlags, &rDoc, formula::FormulaGrammar::CONV_OOO);
463             else
464             {
465                 if( aRange.aStart.Tab() != aRange.aEnd.Tab() )
466                     nFlags |= ScRefFlags::TAB2_3D;
467                 aRefName = aRange.Format(rDoc, nFlags, formula::FormulaGrammar::CONV_OOO);
468             }
469             OUString aAppName = Application::GetAppName();
470 
471             // extra bits are used to tell the client to prefer external
472             // reference link.
473 
474             rStrm.WriteUnicodeOrByteText(aAppName, true);
475             rStrm.WriteUnicodeOrByteText(aDocName, true);
476             rStrm.WriteUnicodeOrByteText(aRefName, true);
477             rStrm.WriteUnicodeOrByteText(u"calc:extref", true);
478             return rStrm.WriteUnicodeOrByteText(u"", true); // One more trailing zero
479         }
480     }
481     if( nFmt == SotClipboardFormatId::HTML )
482     {
483         if( Doc2HTML( rStrm, rBaseURL ) )
484             return true;
485     }
486     if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT )
487     {
488         if( Doc2RTF( rStrm ) )
489             return true;
490     }
491 
492     return false;
493 }
494 
495 // tdf#104927
496 // http://www.unicode.org/reports/tr11/
497 sal_Int32 ScImportExport::CountVisualWidth(const OUString& rStr, sal_Int32& nIdx, sal_Int32 nMaxWidth)
498 {
499     sal_Int32 nWidth = 0;
500     while(nIdx < rStr.getLength() && nWidth < nMaxWidth)
501     {
502         sal_uInt32 nCode = rStr.iterateCodePoints(&nIdx);
503 
504         auto nEaWidth = u_getIntPropertyValue(nCode, UCHAR_EAST_ASIAN_WIDTH);
505         if (nEaWidth == U_EA_FULLWIDTH || nEaWidth == U_EA_WIDE)
506             nWidth += 2;
507         else if (!u_getIntPropertyValue(nCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
508             nWidth += 1;
509     }
510 
511     if (nIdx < rStr.getLength())
512     {
513         sal_Int32 nTmpIdx = nIdx;
514         sal_uInt32 nCode = rStr.iterateCodePoints(&nTmpIdx);
515 
516         if (u_getIntPropertyValue(nCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
517             nIdx = nTmpIdx;
518     }
519     return nWidth;
520 }
521 
522 sal_Int32 ScImportExport::CountVisualWidth(const OUString& rStr)
523 {
524     sal_Int32 nIdx = 0;
525     return CountVisualWidth(rStr, nIdx, SAL_MAX_INT32);
526 }
527 
528 void ScImportExport::SetNoEndianSwap( SvStream& rStrm )
529 {
530 #ifdef OSL_BIGENDIAN
531     rStrm.SetEndian( SvStreamEndian::BIG );
532 #else
533     rStrm.SetEndian( SvStreamEndian::LITTLE );
534 #endif
535 }
536 
537 static inline bool lcl_isFieldEnd( sal_Unicode c, const sal_Unicode* pSeps )
538 {
539     return !c || ScGlobal::UnicodeStrChr( pSeps, c);
540 }
541 
542 namespace {
543 
544 enum QuoteType
545 {
546     FIELDSTART_QUOTE,
547     FIRST_QUOTE,
548     SECOND_QUOTE,
549     FIELDEND_QUOTE,
550     DONTKNOW_QUOTE
551 };
552 
553 }
554 
555 /** Determine if *p is a quote that ends a quoted field.
556 
557     Precondition: we are parsing a quoted field already and *p is a quote.
558 
559     @return
560         FIELDEND_QUOTE if end of field quote
561         DONTKNOW_QUOTE anything else
562  */
563 static QuoteType lcl_isFieldEndQuote( const sal_Unicode* p, const sal_Unicode* pSeps, sal_Unicode& rcDetectSep )
564 {
565     // Due to broken CSV generators that don't double embedded quotes check if
566     // a field separator immediately or with trailing spaces follows the quote,
567     // only then end the field, or at end of string.
568     constexpr sal_Unicode cBlank = ' ';
569     if (p[1] == cBlank && ScGlobal::UnicodeStrChr( pSeps, cBlank))
570         return FIELDEND_QUOTE;
571     // Detect a possible blank separator if it's not already in the list (which
572     // was checked right above for p[1]==cBlank).
573     const bool bBlankSep = (p[1] == cBlank && !rcDetectSep && p[2] && p[2] != cBlank);
574     while (p[1] == cBlank)
575         ++p;
576     if (lcl_isFieldEnd( p[1], pSeps))
577         return FIELDEND_QUOTE;
578     // Extended separator detection after a closing quote (with or without
579     // blanks). Note that nQuotes is incremented *after* the call so is not yet
580     // even here, and that with separator detection we reach here only if
581     // lcl_isEscapedOrFieldEndQuote() did not already detect FIRST_QUOTE or
582     // SECOND_QUOTE for an escaped embedded quote, thus nQuotes does not have
583     // to be checked.
584     if (!rcDetectSep)
585     {
586         constexpr sal_Unicode vSep[] = { ',', '\t', ';' };
587         for (const sal_Unicode c : vSep)
588         {
589             if (p[1] == c)
590             {
591                 rcDetectSep = c;
592                 return FIELDEND_QUOTE;
593             }
594         }
595     }
596     // Blank separator is least significant, after others.
597     if (bBlankSep)
598     {
599         rcDetectSep = cBlank;
600         return FIELDEND_QUOTE;
601     }
602     return DONTKNOW_QUOTE;
603 }
604 
605 /** Determine if *p is a quote that is escaped by being doubled or ends a
606     quoted field.
607 
608     Precondition: *p is a quote.
609 
610     @param nQuotes
611         Quote characters encountered so far.
612         Odd (after opening quote) means either no embedded quotes or only quote
613         pairs so far.
614         Even means either not in a quoted field or already one quote
615         encountered, the first of a pair.
616 
617     @return
618         FIELDSTART_QUOTE if first quote in a field, either starting content or
619                             embedded so caller should check beforehand.
620         FIRST_QUOTE      if first of a doubled quote
621         SECOND_QUOTE     if second of a doubled quote
622         FIELDEND_QUOTE   if end of field quote
623         DONTKNOW_QUOTE   if an unescaped quote we don't consider as end of field,
624                             do not increment nQuotes in caller then!
625  */
626 static QuoteType lcl_isEscapedOrFieldEndQuote( sal_Int32 nQuotes, const sal_Unicode* p,
627         const sal_Unicode* pSeps, sal_Unicode cStr, sal_Unicode& rcDetectSep )
628 {
629     if ((nQuotes & 1) == 0)
630     {
631         if (p[-1] == cStr)
632             return SECOND_QUOTE;
633         else
634         {
635             SAL_WARN( "sc", "lcl_isEscapedOrFieldEndQuote: really want a FIELDSTART_QUOTE?");
636             return FIELDSTART_QUOTE;
637         }
638     }
639     if (p[1] == cStr)
640         return FIRST_QUOTE;
641     return lcl_isFieldEndQuote( p, pSeps, rcDetectSep);
642 }
643 
644 /** Append characters of [p1,p2) to rField.
645 
646     @returns TRUE if ok; FALSE if data overflow, truncated
647  */
648 static bool lcl_appendLineData( OUString& rField, const sal_Unicode* p1, const sal_Unicode* p2 )
649 {
650     if (rField.getLength() + (p2 - p1) <= nArbitraryCellLengthLimit)
651     {
652         rField += std::u16string_view( p1, p2 - p1 );
653         return true;
654     }
655     else
656     {
657         SAL_WARN( "sc", "lcl_appendLineData: data overflow");
658         rField += std::u16string_view( p1, nArbitraryCellLengthLimit - rField.getLength() );
659         return false;
660     }
661 }
662 
663 namespace {
664 
665 enum class DoubledQuoteMode
666 {
667     KEEP_ALL,   // both are taken, additionally start and end quote are included in string
668     ESCAPE,     // escaped quote, one is taken, one ignored
669 };
670 
671 }
672 
673 /** Scan for a quoted string.
674 
675     Precondition: initial current position *p is a cStr quote.
676 
677     For DoubledQuoteMode::ESCAPE, if after the closing quote there is a field
678     end (with or without trailing blanks and as determined by
679     lcl_isFieldEndQuote()), then the content is appended to rField with quotes
680     processed and removed. Else if no field end after the quoted string was
681     detected, nothing is appended and processing continues and is repeated
682     until the next quote. If no closing quote at a field end was found at all,
683     nothing is appended and the initial position is returned and caller has to
684     decide, usually just taking all as literal data.
685 
686     For DoubledQuoteMode::KEEP_ALL, the string up to and including the closing
687     quote is appended to rField and the next position returned, regardless
688     whether there is a field separator following or not.
689 
690  */
691 static const sal_Unicode* lcl_ScanString( const sal_Unicode* p, OUString& rField,
692             const sal_Unicode* pSeps, sal_Unicode cStr, DoubledQuoteMode eMode, bool& rbOverflowCell )
693 {
694     OUString aString;
695     bool bClosingQuote = (eMode == DoubledQuoteMode::KEEP_ALL);
696     const sal_Unicode* const pStart = p;
697     if (eMode != DoubledQuoteMode::KEEP_ALL)
698         p++;    //! jump over opening quote
699     bool bCont;
700     do
701     {
702         bCont = false;
703         const sal_Unicode* p0 = p;
704         for( ;; )
705         {
706             if (!*p)
707             {
708                 // Encountering end of data after an opening quote is not a
709                 // quoted string, ReadCsvLine() concatenated lines with '\n'
710                 // for a properly quoted embedded linefeed.
711                 if (eMode == DoubledQuoteMode::KEEP_ALL)
712                     // Caller would append that data anyway, so we can do it
713                     // already here.
714                     break;
715 
716                 return pStart;
717             }
718 
719             if( *p == cStr )
720             {
721                 if ( *++p != cStr )
722                 {
723                     // break or continue for loop
724                     if (eMode == DoubledQuoteMode::ESCAPE)
725                     {
726                         sal_Unicode cDetectSep = 0xffff;    // No separator detection here.
727                         if (lcl_isFieldEndQuote( p-1, pSeps, cDetectSep) == FIELDEND_QUOTE)
728                         {
729                             bClosingQuote = true;
730                             break;
731                         }
732                         else
733                             continue;
734                     }
735                     else
736                         break;
737                 }
738                 // doubled quote char
739                 switch ( eMode )
740                 {
741                     case DoubledQuoteMode::KEEP_ALL :
742                         p++;            // both for us (not breaking for-loop)
743                     break;
744                     case DoubledQuoteMode::ESCAPE :
745                         p++;            // one for us (breaking for-loop)
746                         bCont = true;   // and more
747                     break;
748                 }
749                 if ( eMode == DoubledQuoteMode::ESCAPE )
750                     break;
751             }
752             else
753                 p++;
754         }
755         if ( p0 < p )
756         {
757             if (!lcl_appendLineData( aString, p0, ((eMode != DoubledQuoteMode::KEEP_ALL && (*p || *(p-1) == cStr)) ? p-1 : p)))
758                 rbOverflowCell = true;
759         }
760     } while ( bCont );
761 
762     if (!bClosingQuote)
763         return pStart;
764 
765     if (!aString.isEmpty())
766         rField += aString;
767 
768     return p;
769 }
770 
771 static void lcl_UnescapeSylk( OUString & rString, SylkVersion eVersion )
772 {
773     // Older versions didn't escape the semicolon.
774     // Older versions quoted the string and doubled embedded quotes, but not
775     // the semicolons, which was plain wrong.
776     if (eVersion >= SylkVersion::OOO32)
777         rString = rString.replaceAll(";;", ";");
778     else
779         rString = rString.replaceAll("\"\"", "\"");
780 
781     rString = rString.replaceAll(SYLK_LF, "\n");
782 }
783 
784 static const sal_Unicode* lcl_ScanSylkString( const sal_Unicode* p,
785         OUString& rString, SylkVersion eVersion )
786 {
787     const sal_Unicode* pStartQuote = p;
788     const sal_Unicode* pEndQuote = nullptr;
789     while( *(++p) )
790     {
791         if( *p == '"' )
792         {
793             pEndQuote = p;
794             if (eVersion >= SylkVersion::OOO32)
795             {
796                 if (*(p+1) == ';')
797                 {
798                     if (*(p+2) == ';')
799                     {
800                         p += 2;     // escaped ';'
801                         pEndQuote = nullptr;
802                     }
803                     else
804                         break;      // end field
805                 }
806             }
807             else
808             {
809                 if (*(p+1) == '"')
810                 {
811                     ++p;            // escaped '"'
812                     pEndQuote = nullptr;
813                 }
814                 else if (*(p+1) == ';')
815                     break;          // end field
816             }
817         }
818     }
819     if (!pEndQuote)
820         pEndQuote = p;  // Take all data as string.
821     rString += std::u16string_view(pStartQuote + 1, pEndQuote - pStartQuote - 1 );
822     lcl_UnescapeSylk( rString, eVersion);
823     return p;
824 }
825 
826 static const sal_Unicode* lcl_ScanSylkFormula( const sal_Unicode* p,
827         OUString& rString, SylkVersion eVersion )
828 {
829     const sal_Unicode* pStart = p;
830     if (eVersion >= SylkVersion::OOO32)
831     {
832         while (*p)
833         {
834             if (*p == ';')
835             {
836                 if (*(p+1) == ';')
837                     ++p;        // escaped ';'
838                 else
839                     break;      // end field
840             }
841             ++p;
842         }
843         rString += std::u16string_view( pStart, p - pStart);
844         lcl_UnescapeSylk( rString, eVersion);
845     }
846     else
847     {
848         // Nasty. If in old versions the formula contained a semicolon, it was
849         // quoted and embedded quotes were doubled, but semicolons were not. If
850         // there was no semicolon, it could still contain quotes and doubled
851         // embedded quotes if it was something like ="a""b", which was saved as
852         // E"a""b" as is and has to be preserved, even if older versions
853         // couldn't even load it correctly. However, theoretically another
854         // field might follow and thus the line contain a semicolon again, such
855         // as ...;E"a""b";...
856         bool bQuoted = false;
857         if (*p == '"')
858         {
859             // May be a quoted expression or just a string constant expression
860             // with quotes.
861             while (*(++p))
862             {
863                 if (*p == '"')
864                 {
865                     if (*(p+1) == '"')
866                         ++p;            // escaped '"'
867                     else
868                         break;          // closing '"', had no ';' yet
869                 }
870                 else if (*p == ';')
871                 {
872                     bQuoted = true;     // ';' within quoted expression
873                     break;
874                 }
875             }
876             p = pStart;
877         }
878         if (bQuoted)
879             p = lcl_ScanSylkString( p, rString, eVersion);
880         else
881         {
882             while (*p && *p != ';')
883                 ++p;
884             rString += std::u16string_view( pStart, p - pStart);
885         }
886     }
887     return p;
888 }
889 
890 static void lcl_WriteString( SvStream& rStrm, OUString& rString, sal_Unicode cQuote, sal_Unicode cEsc )
891 {
892     if (cEsc)
893     {
894         // the goal is to replace cStr by cStr+cStr
895         OUString strFrom(cEsc);
896         OUString strTo = strFrom + strFrom;
897         rString = rString.replaceAll(strFrom, strTo);
898     }
899 
900     if (cQuote)
901     {
902         rString = OUStringChar(cQuote) + rString + OUStringChar(cQuote);
903     }
904 
905     rStrm.WriteUnicodeOrByteText(rString);
906 }
907 
908 bool ScImportExport::Text2Doc( SvStream& rStrm )
909 {
910     bool bOk = true;
911 
912     sal_Unicode pSeps[2];
913     pSeps[0] = cSep;
914     pSeps[1] = 0;
915 
916     ScSetStringParam aSetStringParam;
917     aSetStringParam.mbCheckLinkFormula = true;
918 
919     SCCOL nStartCol = aRange.aStart.Col();
920     SCROW nStartRow = aRange.aStart.Row();
921     SCCOL nEndCol = aRange.aEnd.Col();
922     SCROW nEndRow = aRange.aEnd.Row();
923     sal_uInt64 nOldPos = rStrm.Tell();
924     rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() );
925     bool   bData = !bSingle;
926     if( !bSingle)
927         bOk = StartPaste();
928 
929     while( bOk )
930     {
931         OUString aLine;
932         OUString aCell;
933         SCROW nRow = nStartRow;
934         rStrm.Seek( nOldPos );
935         for( ;; )
936         {
937             rStrm.ReadUniOrByteStringLine( aLine, rStrm.GetStreamCharSet(), nArbitraryLineLengthLimit );
938             // tdf#125440 When inserting tab separated string, consider quotes as field markers
939             DoubledQuoteMode mode = aLine.indexOf("\t") >= 0 ? DoubledQuoteMode::ESCAPE : DoubledQuoteMode::KEEP_ALL;
940             if( rStrm.eof() )
941                 break;
942             SCCOL nCol = nStartCol;
943             const sal_Unicode* p = aLine.getStr();
944             while( *p )
945             {
946                 aCell.clear();
947                 const sal_Unicode* q = p;
948                 if (*p == cStr)
949                 {
950                     // Look for a pairing quote.
951                     q = p = lcl_ScanString( p, aCell, pSeps, cStr, mode, bOverflowCell );
952                 }
953                 // All until next separator.
954                 while (*p && *p != cSep)
955                     ++p;
956                 if (!lcl_appendLineData( aCell, q, p))
957                     bOverflowCell = true;   // display warning on import
958                 if (*p)
959                     ++p;
960                 if (rDoc.ValidCol(nCol) && rDoc.ValidRow(nRow) )
961                 {
962                     if( bSingle )
963                     {
964                         if (nCol>nEndCol) nEndCol = nCol;
965                         if (nRow>nEndRow) nEndRow = nRow;
966                     }
967                     if( bData && nCol <= nEndCol && nRow <= nEndRow )
968                         rDoc.SetString( nCol, nRow, aRange.aStart.Tab(), aCell, &aSetStringParam );
969                 }
970                 else                            // too many columns/rows
971                 {
972                     if (!rDoc.ValidRow(nRow))
973                         bOverflowRow = true;    // display warning on import
974                     if (!rDoc.ValidCol(nCol))
975                         bOverflowCol = true;    // display warning on import
976                 }
977                 ++nCol;
978             }
979             ++nRow;
980         }
981 
982         if( !bData )
983         {
984             aRange.aEnd.SetCol( nEndCol );
985             aRange.aEnd.SetRow( nEndRow );
986             bOk = StartPaste();
987             bData = true;
988         }
989         else
990             break;
991     }
992 
993     EndPaste();
994     if (bOk && mbImportBroadcast)
995     {
996         rDoc.BroadcastCells(aRange, SfxHintId::ScDataChanged);
997         pDocSh->PostDataChanged();
998     }
999 
1000     return bOk;
1001 }
1002 
1003 //  Extended Ascii-Import
1004 
1005 static bool lcl_PutString(
1006     ScDocumentImport& rDocImport, bool bUseDocImport,
1007     SCCOL nCol, SCROW nRow, SCTAB nTab, const OUString& rStr, sal_uInt8 nColFormat,
1008     SvNumberFormatter* pFormatter, bool bDetectNumFormat, bool bDetectSciNumFormat, bool bEvaluateFormulas, bool bSkipEmptyCells,
1009     const ::utl::TransliterationWrapper& rTransliteration, CalendarWrapper& rCalendar,
1010     const ::utl::TransliterationWrapper* pSecondTransliteration, CalendarWrapper* pSecondCalendar )
1011 {
1012     ScDocument& rDoc = rDocImport.getDoc();
1013     bool bMultiLine = false;
1014     if ( nColFormat == SC_COL_SKIP || !rDoc.ValidCol(nCol) || !rDoc.ValidRow(nRow) )
1015         return bMultiLine;
1016     if ( rStr.isEmpty() )
1017     {
1018         if ( !bSkipEmptyCells )
1019         {   // delete destination cell
1020             if ( bUseDocImport )
1021                 rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr );
1022             else
1023                 rDoc.SetString( nCol, nRow, nTab, rStr );
1024         }
1025         return false;
1026     }
1027 
1028     const bool bForceFormulaText = (!bEvaluateFormulas && rStr[0] == '=');
1029     if (nColFormat == SC_COL_TEXT || bForceFormulaText)
1030     {
1031         if ( bUseDocImport )
1032         {
1033             double fDummy;
1034             sal_uInt32 nIndex = 0;
1035             if (bForceFormulaText || rDoc.GetFormatTable()->IsNumberFormat(rStr, nIndex, fDummy))
1036             {
1037                 // Set the format of this cell to Text.
1038                 // This is only necessary for ScDocumentImport,
1039                 // ScDocument::SetTextCell() forces it by ScSetStringParam.
1040                 sal_uInt32 nFormat = rDoc.GetFormatTable()->GetStandardFormat(SvNumFormatType::TEXT);
1041                 ScPatternAttr aNewAttrs(rDoc.GetPool());
1042                 SfxItemSet& rSet = aNewAttrs.GetItemSet();
1043                 rSet.Put( SfxUInt32Item(ATTR_VALUE_FORMAT, nFormat) );
1044                 rDoc.ApplyPattern(nCol, nRow, nTab, aNewAttrs);
1045             }
1046             if (ScStringUtil::isMultiline(rStr))
1047             {
1048                 ScFieldEditEngine& rEngine = rDoc.GetEditEngine();
1049                 rEngine.SetTextCurrentDefaults(rStr);
1050                 rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject());
1051                 return true;
1052             }
1053             else
1054             {
1055                 rDocImport.setStringCell(ScAddress(nCol, nRow, nTab), rStr);
1056                 return false;
1057             }
1058         }
1059         else
1060         {
1061             rDoc.SetTextCell(ScAddress(nCol, nRow, nTab), rStr);
1062             return bMultiLine;
1063         }
1064     }
1065 
1066     if ( nColFormat == SC_COL_ENGLISH )
1067     {
1068         //! SetString with Extra-Flag ???
1069 
1070         SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
1071         sal_uInt32 nEnglish = pDocFormatter->GetStandardIndex(LANGUAGE_ENGLISH_US);
1072         double fVal;
1073         if ( pDocFormatter->IsNumberFormat( rStr, nEnglish, fVal ) )
1074         {
1075             // Numberformat will not be set to English
1076             if ( bUseDocImport )
1077                 rDocImport.setNumericCell( ScAddress( nCol, nRow, nTab ), fVal );
1078             else
1079                 rDoc.SetValue( nCol, nRow, nTab, fVal );
1080             return bMultiLine;
1081         }
1082         // else, continue with SetString
1083     }
1084     else if ( nColFormat != SC_COL_STANDARD ) // Datumformats
1085     {
1086         const sal_uInt16 nMaxNumberParts = 7; // Y-M-D h:m:s.t
1087         const sal_Int32 nLen = rStr.getLength();
1088         sal_Int32 nStart[nMaxNumberParts];
1089         sal_Int32 nEnd[nMaxNumberParts];
1090 
1091         bool bIso;
1092         sal_uInt16 nDP, nMP, nYP;
1093         switch ( nColFormat )
1094         {
1095             case SC_COL_YMD: nDP = 2; nMP = 1; nYP = 0; bIso = true; break;
1096             case SC_COL_MDY: nDP = 1; nMP = 0; nYP = 2; bIso = false; break;
1097             case SC_COL_DMY:
1098             default:         nDP = 0; nMP = 1; nYP = 2; bIso = false; break;
1099         }
1100 
1101         sal_uInt16 nFound = 0;
1102         bool bInNum = false;
1103         for (sal_Int32 nPos = 0; nPos < nLen && (bInNum || nFound < nMaxNumberParts); ++nPos)
1104         {
1105             bool bLetter = false;
1106             if (rtl::isAsciiDigit(rStr[nPos]) ||
1107                     (((!bInNum && nFound==nMP) || (bInNum && nFound==nMP+1))
1108                      && (bLetter = ScGlobal::getCharClass().isLetterNumeric( rStr, nPos))))
1109             {
1110                 if (!bInNum)
1111                 {
1112                     bInNum = true;
1113                     nStart[nFound] = nPos;
1114                     ++nFound;
1115                 }
1116                 nEnd[nFound-1] = nPos;
1117                 if (bIso && (bLetter || (2 <= nFound && nFound <= 6 && nPos > nStart[nFound-1] + 1)))
1118                     // Each M,D,h,m,s at most 2 digits.
1119                     bIso = false;
1120             }
1121             else
1122             {
1123                 bInNum = false;
1124                 if (bIso)
1125                 {
1126                     // ([+-])YYYY-MM-DD([T ]hh:mm(:ss(.fff)))(([+-])TZ)
1127                     // XXX NOTE: timezone is accepted here, but number
1128                     // formatter parser will not, so the end result will be
1129                     // type Text to preserve timezone information.
1130                     switch (rStr[nPos])
1131                     {
1132                         case '+':
1133                             if (nFound >= 5 && nPos == nEnd[nFound-1] + 1)
1134                                 // Accept timezone offset.
1135                                 ;
1136                             else if (nPos > 0)
1137                                 // Accept one leading sign.
1138                                 bIso = false;
1139                         break;
1140                         case '-':
1141                             if (nFound >= 5 && nPos == nEnd[nFound-1] + 1)
1142                                 // Accept timezone offset.
1143                                 ;
1144                             else if (nFound == 0 && nPos > 0)
1145                                 // Accept one leading sign.
1146                                 bIso = false;
1147                             else if (nFound < 1 || 2 < nFound || nPos != nEnd[nFound-1] + 1)
1148                                 // Not immediately after 1 or 1-2
1149                                 bIso = false;
1150                         break;
1151                         case 'T':
1152                         case ' ':
1153                             if (nFound != 3 || nPos != nEnd[nFound-1] + 1)
1154                                 // Not immediately after 1-2-3
1155                                 bIso = false;
1156                         break;
1157                         case ':':
1158                             if (nFound < 4 || 5 < nFound || nPos != nEnd[nFound-1] + 1)
1159                                 // Not at 1-2-3T4:5:
1160                                 bIso = false;
1161                         break;
1162                         case '.':
1163                         case ',':
1164                             if (nFound != 6 || nPos != nEnd[nFound-1] + 1)
1165                                 // Not at 1-2-3T4:5:6.
1166                                 bIso = false;
1167                         break;
1168                         case 'Z':
1169                             if (nFound >= 5 && nPos == nEnd[nFound-1] + 1)
1170                                 // Accept Zero timezone.
1171                                 ;
1172                             else
1173                                 bIso = false;
1174                         break;
1175                         default:
1176                             bIso = false;
1177                     }
1178                 }
1179             }
1180         }
1181 
1182         if (nFound < 3)
1183             bIso = false;
1184 
1185         if (bIso)
1186         {
1187             // Leave conversion and detection of various possible number
1188             // formats to the number formatter. ISO is recognized in any locale
1189             // so we can directly use the document's formatter.
1190             sal_uInt32 nFormat = 0;
1191             double fVal = 0.0;
1192             SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
1193             if (pDocFormatter->IsNumberFormat( rStr, nFormat, fVal))
1194             {
1195                 if (pDocFormatter->GetType(nFormat) & SvNumFormatType::DATE)
1196                 {
1197                     ScAddress aPos(nCol,nRow,nTab);
1198                     if (bUseDocImport)
1199                         rDocImport.setNumericCell(aPos, fVal);
1200                     else
1201                         rDoc.SetValue(aPos, fVal);
1202                     rDoc.SetNumberFormat(aPos, nFormat);
1203 
1204                     return bMultiLine;     // success
1205                 }
1206             }
1207             // If we reach here it is type Text (e.g. timezone or trailing
1208             // characters). Handled below.
1209         }
1210 
1211         if ( nFound == 1 )
1212         {
1213             //  try to break one number (without separators) into date fields
1214 
1215             sal_Int32 nDateStart = nStart[0];
1216             sal_Int32 nDateLen = nEnd[0] + 1 - nDateStart;
1217 
1218             if ( nDateLen >= 5 && nDateLen <= 8 &&
1219                     ScGlobal::getCharClass().isNumeric( rStr.copy( nDateStart, nDateLen ) ) )
1220             {
1221                 //  6 digits: 2 each for day, month, year
1222                 //  8 digits: 4 for year, 2 each for day and month
1223                 //  5 or 7 digits: first field is shortened by 1
1224 
1225                 bool bLongYear = ( nDateLen >= 7 );
1226                 bool bShortFirst = ( nDateLen == 5 || nDateLen == 7 );
1227 
1228                 sal_uInt16 nFieldStart = nDateStart;
1229                 for (sal_uInt16 nPos=0; nPos<3; nPos++)
1230                 {
1231                     sal_uInt16 nFieldEnd = nFieldStart + 1;     // default: 2 digits
1232                     if ( bLongYear && nPos == nYP )
1233                         nFieldEnd += 2;                     // 2 extra digits for long year
1234                     if ( bShortFirst && nPos == 0 )
1235                         --nFieldEnd;                        // first field shortened?
1236 
1237                     nStart[nPos] = nFieldStart;
1238                     nEnd[nPos]   = nFieldEnd;
1239                     nFieldStart  = nFieldEnd + 1;
1240                 }
1241                 nFound = 3;
1242             }
1243         }
1244 
1245         if (!bIso && nFound >= 3)
1246         {
1247             using namespace ::com::sun::star;
1248             bool bSecondCal = false;
1249             sal_uInt16 nDay  = static_cast<sal_uInt16>(o3tl::toInt32(rStr.subView( nStart[nDP], nEnd[nDP]+1-nStart[nDP] )));
1250             sal_uInt16 nYear = static_cast<sal_uInt16>(o3tl::toInt32(rStr.subView( nStart[nYP], nEnd[nYP]+1-nStart[nYP] )));
1251             OUString aMStr = rStr.copy( nStart[nMP], nEnd[nMP]+1-nStart[nMP] );
1252             sal_Int16 nMonth = static_cast<sal_Int16>(aMStr.toInt32());
1253             if (!nMonth)
1254             {
1255                 static constexpr OUString aSepShortened = u"SEP"_ustr;
1256                 uno::Sequence< i18n::CalendarItem2 > xMonths;
1257                 sal_Int32 i, nMonthCount;
1258                 //  first test all month names from local international
1259                 xMonths = rCalendar.getMonths();
1260                 nMonthCount = xMonths.getLength();
1261                 for (i=0; i<nMonthCount && !nMonth; i++)
1262                 {
1263                     if ( rTransliteration.isEqual( aMStr, xMonths[i].FullName ) ||
1264                          rTransliteration.isEqual( aMStr, xMonths[i].AbbrevName ) )
1265                         nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1266                     else if ( i == 8 && rTransliteration.isEqual( "SEPT",
1267                                 xMonths[i].AbbrevName ) &&
1268                             rTransliteration.isEqual( aMStr, aSepShortened ) )
1269                     {   // correct English abbreviation is SEPT,
1270                         // but data mostly contains SEP only
1271                         nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1272                     }
1273                 }
1274                 //  if none found, then test english month names
1275                 if ( !nMonth && pSecondCalendar && pSecondTransliteration )
1276                 {
1277                     xMonths = pSecondCalendar->getMonths();
1278                     nMonthCount = xMonths.getLength();
1279                     for (i=0; i<nMonthCount && !nMonth; i++)
1280                     {
1281                         if ( pSecondTransliteration->isEqual( aMStr, xMonths[i].FullName ) ||
1282                              pSecondTransliteration->isEqual( aMStr, xMonths[i].AbbrevName ) )
1283                         {
1284                             nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1285                             bSecondCal = true;
1286                         }
1287                         else if ( i == 8 && pSecondTransliteration->isEqual(
1288                                     aMStr, aSepShortened ) )
1289                         {   // correct English abbreviation is SEPT,
1290                             // but data mostly contains SEP only
1291                             nMonth = sal::static_int_cast<sal_Int16>( i+1 );
1292                             bSecondCal = true;
1293                         }
1294                     }
1295                 }
1296             }
1297 
1298             SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
1299             if ( nYear < 100 )
1300                 nYear = pDocFormatter->ExpandTwoDigitYear( nYear );
1301 
1302             CalendarWrapper* pCalendar = (bSecondCal ? pSecondCalendar : &rCalendar);
1303             sal_Int16 nNumMonths = pCalendar->getNumberOfMonthsInYear();
1304             if ( nDay && nMonth && nDay<=31 && nMonth<=nNumMonths )
1305             {
1306                 --nMonth;
1307                 pCalendar->setValue( i18n::CalendarFieldIndex::DAY_OF_MONTH, nDay );
1308                 pCalendar->setValue( i18n::CalendarFieldIndex::MONTH, nMonth );
1309                 pCalendar->setValue( i18n::CalendarFieldIndex::YEAR, nYear );
1310                 sal_Int16 nHour, nMinute, nSecond;
1311                 // #i14974# The imported value should have no fractional value, so set the
1312                 // time fields to zero (ICU calendar instance defaults to current date/time)
1313                 nHour = nMinute = nSecond = 0;
1314                 if (nFound > 3)
1315                     nHour = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[3], nEnd[3]+1-nStart[3])));
1316                 if (nFound > 4)
1317                     nMinute = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[4], nEnd[4]+1-nStart[4])));
1318                 if (nFound > 5)
1319                     nSecond = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[5], nEnd[5]+1-nStart[5])));
1320                 // do not use calendar's milliseconds, to avoid fractional part truncation
1321                 double fFrac = 0.0;
1322                 if (nFound > 6)
1323                 {
1324                     sal_Unicode cDec = '.';
1325                     OUString aT = OUStringChar(cDec) + rStr.subView( nStart[6], nEnd[6]+1-nStart[6]);
1326                     rtl_math_ConversionStatus eStatus;
1327                     double fV = rtl::math::stringToDouble( aT, cDec, 0, &eStatus );
1328                     if (eStatus == rtl_math_ConversionStatus_Ok)
1329                         fFrac = fV / 86400.0;
1330                 }
1331                 sal_Int32 nPos;
1332                 if (nFound > 3 && 1 <= nHour && nHour <= 12  // nHour 0 and >=13 can't be AM/PM
1333                         && (nPos = nEnd[nFound-1] + 1) < nLen)
1334                 {
1335                     // Dreaded AM/PM may be following.
1336                     while (nPos < nLen && rStr[nPos] == ' ')
1337                         ++nPos;
1338                     if (nPos < nLen)
1339                     {
1340                         sal_Int32 nStop = nPos;
1341                         while (nStop < nLen && rStr[nStop] != ' ')
1342                             ++nStop;
1343                         OUString aAmPm = rStr.copy( nPos, nStop - nPos);
1344                         // For AM only 12 needs to be treated, whereas for PM
1345                         // it must not. Check both, locale and second/English
1346                         // strings.
1347                         if (nHour == 12 &&
1348                                 (rTransliteration.isEqual( aAmPm, pFormatter->GetLocaleData()->getTimeAM()) ||
1349                                  (pSecondTransliteration && pSecondTransliteration->isEqual( aAmPm, "AM"))))
1350                         {
1351                             nHour = 0;
1352                         }
1353                         else if (nHour < 12 &&
1354                                 (rTransliteration.isEqual( aAmPm, pFormatter->GetLocaleData()->getTimePM()) ||
1355                                  (pSecondTransliteration && pSecondTransliteration->isEqual( aAmPm, "PM"))))
1356                         {
1357                             nHour += 12;
1358                         }
1359                     }
1360                 }
1361                 pCalendar->setValue( i18n::CalendarFieldIndex::HOUR, nHour );
1362                 pCalendar->setValue( i18n::CalendarFieldIndex::MINUTE, nMinute );
1363                 pCalendar->setValue( i18n::CalendarFieldIndex::SECOND, nSecond );
1364                 pCalendar->setValue( i18n::CalendarFieldIndex::MILLISECOND, 0 );
1365                 if ( pCalendar->isValid() )
1366                 {
1367                     // Whole days diff.
1368                     double fDiff = DateTime::Sub( DateTime(pDocFormatter->GetNullDate()),
1369                             pCalendar->getEpochStart());
1370                     // #i14974# must use getLocalDateTime to get the same
1371                     // date values as set above
1372                     double fDays = pCalendar->getLocalDateTime() + fFrac;
1373                     fDays -= fDiff;
1374 
1375                     LanguageType eLatin, eCjk, eCtl;
1376                     rDoc.GetLanguage( eLatin, eCjk, eCtl );
1377                     LanguageType eDocLang = eLatin;     //! which language for date formats?
1378 
1379                     SvNumFormatType nType = (nFound > 3 ? SvNumFormatType::DATETIME : SvNumFormatType::DATE);
1380                     sal_uLong nFormat = pDocFormatter->GetStandardFormat( nType, eDocLang );
1381                     // maybe there is a special format including seconds or milliseconds
1382                     if (nFound > 5)
1383                         nFormat = pDocFormatter->GetStandardFormat( fDays, nFormat, nType, eDocLang);
1384 
1385                     ScAddress aPos(nCol,nRow,nTab);
1386                     if ( bUseDocImport )
1387                         rDocImport.setNumericCell(aPos, fDays);
1388                     else
1389                         rDoc.SetValue( aPos, fDays );
1390                     rDoc.SetNumberFormat(aPos, nFormat);
1391 
1392                     return bMultiLine;     // success
1393                 }
1394             }
1395         }
1396     }
1397 
1398     // Standard or date not determined -> SetString / EditCell
1399     if( rStr.indexOf( '\n' ) == -1 )
1400     {
1401         if (!bDetectNumFormat && nColFormat == SC_COL_STANDARD)
1402         {
1403             // Import a strict ISO 8601 date(+time) string even without
1404             // "Detect special numbers" or "Date (YMD)".
1405             do
1406             {
1407                 // Simple pre-check before calling more expensive parser.
1408                 // ([+-])(Y)YYYY-MM-DD
1409                 if (rStr.getLength() < 10)
1410                     break;
1411                 const sal_Int32 n1 = rStr.indexOf('-', 1);
1412                 if (n1 < 4)
1413                     break;
1414                 const sal_Int32 n2 = rStr.indexOf('-', n1 + 1);
1415                 if (n2 < 7 || n1 + 3 < n2)
1416                     break;
1417 
1418                 css::util::DateTime aDateTime;
1419                 if (!sax::Converter::parseDateTime( aDateTime, rStr))
1420                     break;
1421 
1422                 sal_uInt32 nFormat = 0;
1423                 double fVal = 0.0;
1424                 SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
1425                 if (pDocFormatter->IsNumberFormat( rStr, nFormat, fVal))
1426                 {
1427                     if (pDocFormatter->GetType(nFormat) & SvNumFormatType::DATE)
1428                     {
1429                         ScAddress aPos(nCol,nRow,nTab);
1430                         if (bUseDocImport)
1431                             rDocImport.setNumericCell(aPos, fVal);
1432                         else
1433                             rDoc.SetValue(aPos, fVal);
1434                         rDoc.SetNumberFormat(aPos, nFormat);
1435 
1436                         return bMultiLine;     // success
1437                     }
1438                 }
1439             }
1440             while(false);
1441         }
1442 
1443         ScSetStringParam aParam;
1444         aParam.mpNumFormatter = pFormatter;
1445         aParam.mbDetectNumberFormat = bDetectNumFormat;
1446         aParam.mbDetectScientificNumberFormat = bDetectSciNumFormat;
1447         aParam.meSetTextNumFormat = ScSetStringParam::SpecialNumberOnly;
1448         aParam.mbHandleApostrophe = false;
1449         aParam.mbCheckLinkFormula = true;
1450         if ( bUseDocImport )
1451             rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr, &aParam);
1452         else
1453             rDoc.SetString( nCol, nRow, nTab, rStr, &aParam );
1454     }
1455     else
1456     {
1457         bMultiLine = true;
1458         ScFieldEditEngine& rEngine = rDoc.GetEditEngine();
1459         rEngine.SetTextCurrentDefaults(rStr);
1460         if ( bUseDocImport )
1461             rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject());
1462         else
1463             rDoc.SetEditText( ScAddress( nCol, nRow, nTab ), rEngine.CreateTextObject() );
1464     }
1465     return bMultiLine;
1466 }
1467 
1468 static OUString lcl_GetFixed( const OUString& rLine, sal_Int32 nStart, sal_Int32 nNext,
1469                      bool& rbIsQuoted, bool& rbOverflowCell )
1470 {
1471     sal_Int32 nLen = rLine.getLength();
1472     if (nNext > nLen)
1473         nNext = nLen;
1474     if ( nNext <= nStart )
1475         return OUString();
1476 
1477     const sal_Unicode* pStr = rLine.getStr();
1478 
1479     sal_Int32 nSpace = nNext;
1480     while ( nSpace > nStart && pStr[nSpace-1] == ' ' )
1481         --nSpace;
1482 
1483     rbIsQuoted = (pStr[nStart] == '"' && pStr[nSpace-1] == '"');
1484     if (rbIsQuoted)
1485     {
1486         bool bFits = (nSpace - nStart - 3 <= nArbitraryCellLengthLimit);
1487         if (bFits)
1488             return rLine.copy(nStart+1, std::max< sal_Int32 >(0, nSpace-nStart-2));
1489         else
1490         {
1491             SAL_WARN( "sc", "lcl_GetFixed: line doesn't fit into data");
1492             rbOverflowCell = true;
1493             return rLine.copy(nStart+1, nArbitraryCellLengthLimit);
1494         }
1495     }
1496     else
1497     {
1498         bool bFits = (nSpace - nStart <= nArbitraryCellLengthLimit);
1499         if (bFits)
1500             return rLine.copy(nStart, nSpace-nStart);
1501         else
1502         {
1503             SAL_WARN( "sc", "lcl_GetFixed: line doesn't fit into data");
1504             rbOverflowCell = true;
1505             return rLine.copy(nStart, nArbitraryCellLengthLimit);
1506         }
1507     }
1508 }
1509 
1510 bool ScImportExport::ExtText2Doc( SvStream& rStrm )
1511 {
1512     if (!pExtOptions)
1513         return Text2Doc( rStrm );
1514 
1515     sal_uInt64 const nOldPos = rStrm.Tell();
1516     sal_uInt64 const nRemaining = rStrm.remainingSize();
1517     std::unique_ptr<ScProgress> xProgress( new ScProgress( pDocSh,
1518             ScResId( STR_LOAD_DOC ), nRemaining, true ));
1519     rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() );
1520     // tdf#82254 - check whether to include a byte-order-mark in the output
1521     if (nOldPos != rStrm.Tell())
1522         mbIncludeBOM = true;
1523 
1524     SCCOL nStartCol = aRange.aStart.Col();
1525     SCCOL nEndCol = aRange.aEnd.Col();
1526     SCROW nStartRow = aRange.aStart.Row();
1527     const SCTAB nTab = aRange.aStart.Tab();
1528 
1529     bool    bFixed              = pExtOptions->IsFixedLen();
1530     OUString aSeps              = pExtOptions->GetFieldSeps();  // Need non-const for ReadCsvLine(),
1531     const sal_Unicode* pSeps    = aSeps.getStr();               // but it will be const anyway (asserted below).
1532     bool    bMerge              = pExtOptions->IsMergeSeps();
1533     bool    bRemoveSpace        = pExtOptions->IsRemoveSpace();
1534     sal_uInt16  nInfoCount      = pExtOptions->GetInfoCount();
1535     const sal_Int32* pColStart  = pExtOptions->GetColStart();
1536     const sal_uInt8* pColFormat = pExtOptions->GetColFormat();
1537     tools::Long nSkipLines             = pExtOptions->GetStartRow();
1538 
1539     LanguageType eDocLang = pExtOptions->GetLanguage();
1540     SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eDocLang);
1541     bool bDetectNumFormat = pExtOptions->IsDetectSpecialNumber();
1542     bool bDetectSciNumFormat = pExtOptions->IsDetectScientificNumber();
1543     bool bEvaluateFormulas = pExtOptions->IsEvaluateFormulas();
1544     bool bSkipEmptyCells = pExtOptions->IsSkipEmptyCells();
1545 
1546     // For date recognition
1547     ::utl::TransliterationWrapper aTransliteration(
1548         comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE );
1549     aTransliteration.loadModuleIfNeeded( eDocLang );
1550     CalendarWrapper aCalendar( comphelper::getProcessComponentContext() );
1551     aCalendar.loadDefaultCalendar(
1552         LanguageTag::convertToLocale( eDocLang ) );
1553     std::unique_ptr< ::utl::TransliterationWrapper > pEnglishTransliteration;
1554     std::unique_ptr< CalendarWrapper > pEnglishCalendar;
1555     if ( eDocLang != LANGUAGE_ENGLISH_US )
1556     {
1557         pEnglishTransliteration.reset(new ::utl::TransliterationWrapper (
1558             comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE ));
1559         aTransliteration.loadModuleIfNeeded( LANGUAGE_ENGLISH_US );
1560         pEnglishCalendar.reset(new CalendarWrapper ( comphelper::getProcessComponentContext() ));
1561         pEnglishCalendar->loadDefaultCalendar(
1562             LanguageTag::convertToLocale( LANGUAGE_ENGLISH_US ) );
1563     }
1564 
1565     OUString aLine;
1566     OUString aCell;
1567     sal_uInt16 i;
1568     SCROW nRow = nStartRow;
1569     sal_Unicode cDetectSep = 0xffff;    // No separator detection here.
1570 
1571     while(--nSkipLines>0)
1572     {
1573         aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep); // content is ignored
1574         if ( rStrm.eof() )
1575             break;
1576     }
1577 
1578     // Determine range for Undo.
1579     // We don't need this during import of a file to a new sheet or document...
1580     bool bDetermineRange = bUndo;
1581     bool bColumnsAreDetermined = false;
1582 
1583     // Row heights don't need to be adjusted on the fly if EndPaste() is called
1584     // afterwards, which happens only if bDetermineRange. This variable also
1585     // survives the toggle of bDetermineRange down at the end of the do{} loop.
1586     bool bRangeIsDetermined = bDetermineRange;
1587 
1588     bool bQuotedAsText = pExtOptions && pExtOptions->IsQuotedAsText();
1589 
1590     sal_uInt64 nOriginalStreamPos = rStrm.Tell();
1591 
1592     SCROW nFirstUpdateRowHeight = SCROW_MAX;
1593     SCROW nLastUpdateRowHeight = -1;
1594 
1595     ScDocumentImport aDocImport(rDoc);
1596     do
1597     {
1598         for( ;; )
1599         {
1600             aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep);
1601             if ( rStrm.eof() && aLine.isEmpty() )
1602                 break;
1603 
1604             assert(pSeps == aSeps.getStr());
1605 
1606             if ( nRow > rDoc.MaxRow() )
1607             {
1608                 bOverflowRow = true;    // display warning on import
1609                 break;  // for
1610             }
1611 
1612             if (!bDetermineRange)
1613                 EmbeddedNullTreatment( aLine);
1614 
1615             sal_Int32 nLineLen = aLine.getLength();
1616             SCCOL nCol = nStartCol;
1617             bool bMultiLine = false;
1618             if ( bFixed ) //  Fixed line length
1619             {
1620                 if (bDetermineRange)
1621                 {
1622                     if (!bColumnsAreDetermined)
1623                     {
1624                         // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it
1625                         // is only an overflow if there is really data following to
1626                         // be put behind the last column, which doesn't happen if
1627                         // info is SC_COL_SKIP.
1628                         for (i=0; i < nInfoCount && nCol <= rDoc.MaxCol()+1; ++i)
1629                         {
1630                             const sal_uInt8 nFmt = pColFormat[i];
1631                             if (nFmt != SC_COL_SKIP)        // otherwise don't increment nCol either
1632                             {
1633                                 if (nCol > rDoc.MaxCol())
1634                                     bOverflowCol = true;    // display warning on import
1635                                 ++nCol;
1636                             }
1637                         }
1638                         bColumnsAreDetermined = true;
1639                     }
1640                 }
1641                 else
1642                 {
1643                     sal_Int32 nStartIdx = 0;
1644                     // Same maxcol+1 check reason as above.
1645                     for (i=0; i < nInfoCount && nCol <= rDoc.MaxCol()+1; ++i)
1646                     {
1647                         sal_Int32 nNextIdx = nStartIdx;
1648                         if (i + 1 < nInfoCount)
1649                             CountVisualWidth( aLine, nNextIdx, pColStart[i+1] - pColStart[i] );
1650                         else
1651                             nNextIdx = nLineLen;
1652                         sal_uInt8 nFmt = pColFormat[i];
1653                         if (nFmt != SC_COL_SKIP)        // otherwise don't increment nCol either
1654                         {
1655                             if (nCol > rDoc.MaxCol())
1656                                 bOverflowCol = true;    // display warning on import
1657                             else
1658                             {
1659                                 bool bIsQuoted = false;
1660                                 aCell = lcl_GetFixed( aLine, nStartIdx, nNextIdx, bIsQuoted, bOverflowCell );
1661                                 if (bIsQuoted && bQuotedAsText)
1662                                     nFmt = SC_COL_TEXT;
1663 
1664                                 bMultiLine |= lcl_PutString(
1665                                         aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt,
1666                                         &aNumFormatter, bDetectNumFormat, bDetectSciNumFormat, bEvaluateFormulas, bSkipEmptyCells,
1667                                         aTransliteration, aCalendar,
1668                                         pEnglishTransliteration.get(), pEnglishCalendar.get());
1669                             }
1670                             ++nCol;
1671                         }
1672                         nStartIdx = nNextIdx;
1673                     }
1674                 }
1675             }
1676             else // Search for the separator
1677             {
1678                 SCCOL nSourceCol = 0;
1679                 sal_uInt16 nInfoStart = 0;
1680                 const sal_Unicode* p = aLine.getStr();
1681                 // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it is only an
1682                 // overflow if there is really data following to be put behind
1683                 // the last column, which doesn't happen if info is
1684                 // SC_COL_SKIP.
1685                 while (*p && nCol <= rDoc.MaxCol()+1)
1686                 {
1687                     bool bIsQuoted = false;
1688                     p = ScImportExport::ScanNextFieldFromString( p, aCell,
1689                             cStr, pSeps, bMerge, bIsQuoted, bOverflowCell, bRemoveSpace );
1690 
1691                     sal_uInt8 nFmt = SC_COL_STANDARD;
1692                     for ( i=nInfoStart; i<nInfoCount; i++ )
1693                     {
1694                         if ( pColStart[i] == nSourceCol + 1 )       // pColStart is 1-based
1695                         {
1696                             nFmt = pColFormat[i];
1697                             nInfoStart = i + 1;     // ColInfos are in succession
1698                             break;  // for
1699                         }
1700                     }
1701                     if ( nFmt != SC_COL_SKIP )
1702                     {
1703                         if (nCol > rDoc.MaxCol())
1704                             bOverflowCol = true;    // display warning on import
1705                         else if (!bDetermineRange)
1706                         {
1707                             if (bIsQuoted && bQuotedAsText)
1708                                 nFmt = SC_COL_TEXT;
1709 
1710                             bMultiLine |= lcl_PutString(
1711                                 aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt,
1712                                 &aNumFormatter, bDetectNumFormat, bDetectSciNumFormat, bEvaluateFormulas, bSkipEmptyCells,
1713                                 aTransliteration, aCalendar,
1714                                 pEnglishTransliteration.get(), pEnglishCalendar.get());
1715                         }
1716                         ++nCol;
1717                     }
1718 
1719                     ++nSourceCol;
1720                 }
1721             }
1722             if (nEndCol < nCol)
1723                 nEndCol = nCol;     //! points to the next free or even rDoc.MaxCol()+2
1724 
1725             if (!bDetermineRange)
1726             {
1727                 if (bMultiLine && !bRangeIsDetermined && pDocSh)
1728                 {   // Adjust just once at the end for a whole range.
1729                     nFirstUpdateRowHeight = std::min( nFirstUpdateRowHeight, nRow );
1730                     nLastUpdateRowHeight = std::max( nLastUpdateRowHeight, nRow );
1731                 }
1732                 xProgress->SetStateOnPercent( rStrm.Tell() - nOldPos );
1733             }
1734             ++nRow;
1735         }
1736         // so far nRow/nEndCol pointed to the next free
1737         if (nRow > nStartRow)
1738             --nRow;
1739         if (nEndCol > nStartCol)
1740             nEndCol = ::std::min( static_cast<SCCOL>(nEndCol - 1), rDoc.MaxCol());
1741 
1742         if (bDetermineRange)
1743         {
1744             aRange.aEnd.SetCol( nEndCol );
1745             aRange.aEnd.SetRow( nRow );
1746 
1747             if ( !mbApi && nStartCol != nEndCol &&
1748                  !rDoc.IsBlockEmpty( nStartCol + 1, nStartRow, nEndCol, nRow, nTab ) )
1749             {
1750                 ScReplaceWarnBox aBox(ScDocShell::GetActiveDialogParent());
1751                 if (aBox.run() != RET_YES)
1752                 {
1753                     return false;
1754                 }
1755             }
1756 
1757             rStrm.Seek( nOriginalStreamPos );
1758             nRow = nStartRow;
1759             if (!StartPaste())
1760             {
1761                 EndPaste(false);
1762                 return false;
1763             }
1764         }
1765 
1766         bDetermineRange = !bDetermineRange;     // toggle
1767     } while (!bDetermineRange);
1768 
1769     if ( !mbOverwriting )
1770         aDocImport.finalize();
1771 
1772     xProgress.reset();    // make room for AdjustRowHeight progress
1773 
1774     if( nFirstUpdateRowHeight < nLastUpdateRowHeight && pDocSh )
1775         pDocSh->AdjustRowHeight( nFirstUpdateRowHeight, nLastUpdateRowHeight, nTab);
1776 
1777     if (bRangeIsDetermined)
1778         EndPaste(false);
1779 
1780     if (mbImportBroadcast && !mbOverwriting)
1781     {
1782         rDoc.BroadcastCells(aRange, SfxHintId::ScDataChanged);
1783         pDocSh->PostDataChanged();
1784     }
1785     return true;
1786 }
1787 
1788 void ScImportExport::EmbeddedNullTreatment( OUString & rStr )
1789 {
1790     // A nasty workaround for data with embedded NULL characters. As long as we
1791     // can't handle them properly as cell content (things assume 0-terminated
1792     // strings at too many places) simply strip all NULL characters from raw
1793     // data. Excel does the same. See fdo#57841 for sample data.
1794 
1795     // The normal case is no embedded NULL, check first before de-/allocating
1796     // ustring stuff.
1797     sal_Unicode cNull = 0;
1798     if (sal_Int32 pos = rStr.indexOf(cNull); pos >= 0)
1799     {
1800         rStr = rStr.replaceAll(std::u16string_view(&cNull, 1), u"", pos);
1801     }
1802 }
1803 
1804 const sal_Unicode* ScImportExport::ScanNextFieldFromString( const sal_Unicode* p,
1805         OUString& rField, sal_Unicode cStr, const sal_Unicode* pSeps, bool bMergeSeps, bool& rbIsQuoted,
1806         bool& rbOverflowCell, bool bRemoveSpace )
1807 {
1808     rbIsQuoted = false;
1809     rField.clear();
1810     const sal_Unicode cBlank = ' ';
1811     if (cStr && !ScGlobal::UnicodeStrChr(pSeps, cBlank))
1812     {
1813         // Cope with broken generators that put leading blanks before a quoted
1814         // field, like "field1", "field2", "..."
1815         // NOTE: this is not in conformance with http://tools.ietf.org/html/rfc4180
1816         const sal_Unicode* pb = p;
1817         while (*pb == cBlank)
1818             ++pb;
1819         if (*pb == cStr)
1820             p = pb;
1821     }
1822     if (cStr && *p == cStr) // String in quotes
1823     {
1824         rbIsQuoted = true;
1825         const sal_Unicode* p1;
1826         p1 = p = lcl_ScanString( p, rField, pSeps, cStr, DoubledQuoteMode::ESCAPE, rbOverflowCell );
1827         while (!lcl_isFieldEnd( *p, pSeps))
1828             p++;
1829         // Append remaining unquoted and undelimited data (dirty, dirty) to
1830         // this field.
1831         if (p > p1)
1832         {
1833             const sal_Unicode* ptrim_f = p;
1834             if ( bRemoveSpace )
1835             {
1836                 while ( ptrim_f > p1  && ( *(ptrim_f - 1) == cBlank ) )
1837                     --ptrim_f;
1838             }
1839             if (!lcl_appendLineData( rField, p1, ptrim_f))
1840                 rbOverflowCell = true;
1841         }
1842         if( *p )
1843             p++;
1844     }
1845     else                        // up to delimiter
1846     {
1847         const sal_Unicode* p0 = p;
1848         while (!lcl_isFieldEnd( *p, pSeps))
1849             p++;
1850         const sal_Unicode* ptrim_i = p0;
1851         const sal_Unicode* ptrim_f = p;  // [ptrim_i,ptrim_f) is cell data after trimming
1852         if ( bRemoveSpace )
1853         {
1854             while ( ptrim_i < ptrim_f && *ptrim_i == cBlank )
1855                 ++ptrim_i;
1856             while ( ptrim_f > ptrim_i && ( *(ptrim_f - 1) == cBlank ) )
1857                 --ptrim_f;
1858         }
1859         if (!lcl_appendLineData( rField, ptrim_i, ptrim_f))
1860             rbOverflowCell = true;
1861         if( *p )
1862             p++;
1863     }
1864     if ( bMergeSeps )           // skip following delimiters
1865     {
1866         while (*p && ScGlobal::UnicodeStrChr( pSeps, *p))
1867             p++;
1868     }
1869     return p;
1870 }
1871 
1872 namespace {
1873 
1874 /**
1875  * Check if a given string has any line break characters or separators.
1876  *
1877  * @param rStr string to inspect.
1878  * @param cSep separator character.
1879  */
1880 bool hasLineBreaksOrSeps( const OUString& rStr, sal_Unicode cSep )
1881 {
1882     const sal_Unicode* p = rStr.getStr();
1883     for (sal_Int32 i = 0, n = rStr.getLength(); i < n; ++i, ++p)
1884     {
1885         sal_Unicode c = *p;
1886         if (c == cSep)
1887             // separator found.
1888             return true;
1889 
1890         switch (c)
1891         {
1892             case '\n':
1893             case '\r':
1894                 // line break found.
1895                 return true;
1896             default:
1897                 ;
1898         }
1899     }
1900     return false;
1901 }
1902 
1903 }
1904 
1905 bool ScImportExport::Doc2Text( SvStream& rStrm )
1906 {
1907     SCCOL nCol;
1908     SCROW nRow;
1909     SCCOL nStartCol = aRange.aStart.Col();
1910     SCROW nStartRow = aRange.aStart.Row();
1911     SCTAB nStartTab = aRange.aStart.Tab();
1912     SCCOL nEndCol = aRange.aEnd.Col();
1913     SCROW nEndRow = aRange.aEnd.Row();
1914     SCTAB nEndTab = aRange.aEnd.Tab();
1915 
1916     if (!rDoc.GetClipParam().isMultiRange() && nStartTab == nEndTab)
1917         if (!rDoc.ShrinkToDataArea( nStartTab, nStartCol, nStartRow, nEndCol, nEndRow ))
1918             return false;
1919 
1920     OUString aCellStr;
1921 
1922     bool bConvertLF = (GetSystemLineEnd() != LINEEND_LF);
1923 
1924     // We need to cache sc::ColumnBlockPosition per each column, tab is always nStartTab.
1925     std::vector< sc::ColumnBlockPosition > blockPos( nEndCol - nStartCol + 1 );
1926     for( SCCOL i = nStartCol; i <= nEndCol; ++i )
1927         rDoc.InitColumnBlockPosition( blockPos[ i - nStartCol ], nStartTab, i );
1928     for (nRow = nStartRow; nRow <= nEndRow; nRow++)
1929     {
1930         if (bIncludeFiltered || !rDoc.RowFiltered( nRow, nStartTab ))
1931         {
1932             for (nCol = nStartCol; nCol <= nEndCol; nCol++)
1933             {
1934                 ScAddress aPos(nCol, nRow, nStartTab);
1935                 sal_uInt32 nNumFmt = rDoc.GetNumberFormat(aPos);
1936                 SvNumberFormatter* pFormatter = rDoc.GetFormatTable();
1937 
1938                 ScRefCellValue aCell(rDoc, aPos, blockPos[ nCol - nStartCol ]);
1939                 switch (aCell.getType())
1940                 {
1941                     case CELLTYPE_FORMULA:
1942                     {
1943                         if (bFormulas)
1944                         {
1945                             aCellStr = aCell.getFormula()->GetFormula();
1946                             if( aCellStr.indexOf( cSep ) != -1 )
1947                                 lcl_WriteString( rStrm, aCellStr, cStr, cStr );
1948                             else
1949                                 rStrm.WriteUnicodeOrByteText(aCellStr);
1950                         }
1951                         else
1952                         {
1953                             const Color* pColor;
1954                             aCellStr = ScCellFormat::GetString(aCell, nNumFmt, &pColor, *pFormatter, rDoc);
1955 
1956                             bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 );
1957                             if( bMultiLineText )
1958                             {
1959                                 if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace )
1960                                     aCellStr = aCellStr.replaceAll( "\n", " " );
1961                                 else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF )
1962                                     aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd());
1963                             }
1964 
1965                             if( mExportTextOptions.mcSeparatorConvertTo && cSep )
1966                                 aCellStr = aCellStr.replaceAll( OUStringChar(cSep), OUStringChar(mExportTextOptions.mcSeparatorConvertTo) );
1967 
1968                             if( mExportTextOptions.mbAddQuotes && ( aCellStr.indexOf( cSep ) != -1 ) )
1969                                 lcl_WriteString( rStrm, aCellStr, cStr, cStr );
1970                             else
1971                                 rStrm.WriteUnicodeOrByteText(aCellStr);
1972                         }
1973                     }
1974                     break;
1975                     case CELLTYPE_VALUE:
1976                     {
1977                         const Color* pColor;
1978                         aCellStr = ScCellFormat::GetString(aCell, nNumFmt, &pColor, *pFormatter, rDoc);
1979                         rStrm.WriteUnicodeOrByteText(aCellStr);
1980                     }
1981                     break;
1982                     case CELLTYPE_NONE:
1983                     break;
1984                     default:
1985                     {
1986                         const Color* pColor;
1987                         aCellStr = ScCellFormat::GetString(aCell, nNumFmt, &pColor, *pFormatter, rDoc);
1988 
1989                         bool bMultiLineText = ( aCellStr.indexOf( '\n' ) != -1 );
1990                         if( bMultiLineText )
1991                         {
1992                             if( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSpace )
1993                                 aCellStr = aCellStr.replaceAll( "\n", " " );
1994                             else if ( mExportTextOptions.meNewlineConversion == ScExportTextOptions::ToSystem && bConvertLF )
1995                                 aCellStr = convertLineEnd(aCellStr, GetSystemLineEnd());
1996                         }
1997 
1998                         if( mExportTextOptions.mcSeparatorConvertTo && cSep )
1999                             aCellStr = aCellStr.replaceAll( OUStringChar(cSep), OUStringChar(mExportTextOptions.mcSeparatorConvertTo) );
2000 
2001                         if( mExportTextOptions.mbAddQuotes && hasLineBreaksOrSeps(aCellStr, cSep) )
2002                             lcl_WriteString( rStrm, aCellStr, cStr, cStr );
2003                         else
2004                             rStrm.WriteUnicodeOrByteText(aCellStr);
2005                     }
2006                 }
2007                 if( nCol < nEndCol )
2008                     rStrm.WriteUnicodeOrByteText(rtl::OUStringChar(cSep));
2009             }
2010             // Do not append a line feed for one single cell.
2011             // NOTE: this Doc2Text() is only called for clipboard via
2012             // ScImportExport::ExportStream().
2013             if (nStartRow != nEndRow || nStartCol != nEndCol)
2014                 endlub(rStrm);
2015             if( rStrm.GetError() != ERRCODE_NONE )
2016                 break;
2017             if( nSizeLimit && rStrm.Tell() > nSizeLimit )
2018                 break;
2019         }
2020     }
2021 
2022     return rStrm.GetError() == ERRCODE_NONE;
2023 }
2024 
2025 bool ScImportExport::Sylk2Doc( SvStream& rStrm )
2026 {
2027     bool bOk = true;
2028     bool bMyDoc = false;
2029     SylkVersion eVersion = SylkVersion::OTHER;
2030 
2031     // US-English separators for StringToDouble
2032     sal_Unicode const cDecSep = '.';
2033     sal_Unicode const cGrpSep = ',';
2034 
2035     SCCOL nStartCol = aRange.aStart.Col();
2036     SCROW nStartRow = aRange.aStart.Row();
2037     SCCOL nEndCol = aRange.aEnd.Col();
2038     SCROW nEndRow = aRange.aEnd.Row();
2039     sal_uInt64 nOldPos = rStrm.Tell();
2040     bool bData = !bSingle;
2041     ::std::vector< sal_uInt32 > aFormats;
2042 
2043     if( !bSingle)
2044         bOk = StartPaste();
2045 
2046     while( bOk )
2047     {
2048         OUString aLine;
2049         OUString aText;
2050         OStringBuffer aByteLine;
2051         SCCOL nCol = nStartCol;
2052         SCROW nRow = nStartRow;
2053         SCCOL nRefCol = nCol;
2054         SCROW nRefRow = nRow;
2055         rStrm.Seek( nOldPos );
2056         for( ;; )
2057         {
2058             //! allow unicode
2059             rStrm.ReadLine( aByteLine );
2060             aLine = OStringToOUString(aByteLine, rStrm.GetStreamCharSet());
2061             if( rStrm.eof() )
2062                 break;
2063             bool bInvalidCol = false;
2064             bool bInvalidRow = false;
2065             const sal_Unicode* p = aLine.getStr();
2066             sal_Unicode cTag = *p++;
2067             if( cTag == 'C' )       // Content
2068             {
2069                 if( *p++ != ';' )
2070                     return false;
2071 
2072                 bool bInvalidRefCol = false;
2073                 bool bInvalidRefRow = false;
2074                 while( *p )
2075                 {
2076                     sal_Unicode ch = *p++;
2077                     ch = ScGlobal::ToUpperAlpha( ch );
2078                     switch( ch )
2079                     {
2080                         case 'X':
2081                         {
2082                             bInvalidCol = false;
2083                             bool bFail = o3tl::checked_add<SCCOL>(o3tl::toInt32(std::u16string_view(p)), nStartCol - 1, nCol);
2084                             if (bFail || nCol < 0 || rDoc.MaxCol() < nCol)
2085                             {
2086                                 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol);
2087                                 nCol = std::clamp<SCCOL>(nCol, 0, rDoc.MaxCol());
2088                                 bInvalidCol = bOverflowCol = true;
2089                             }
2090                             break;
2091                         }
2092                         case 'Y':
2093                         {
2094                             bInvalidRow = false;
2095                             bool bFail = o3tl::checked_add(o3tl::toInt32(std::u16string_view(p)), nStartRow - 1, nRow);
2096                             if (bFail || nRow < 0 || nMaxImportRow < nRow)
2097                             {
2098                                 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow);
2099                                 nRow = std::clamp<SCROW>(nRow, 0, nMaxImportRow);
2100                                 bInvalidRow = bOverflowRow = true;
2101                             }
2102                             break;
2103                         }
2104                         case 'C':
2105                         {
2106                             bInvalidRefCol = false;
2107                             bool bFail = o3tl::checked_add<SCCOL>(o3tl::toInt32(std::u16string_view(p)), nStartCol - 1, nRefCol);
2108                             if (bFail || nRefCol < 0 || rDoc.MaxCol() < nRefCol)
2109                             {
2110                                 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;C invalid nRefCol=" << nRefCol);
2111                                 nRefCol = std::clamp<SCCOL>(nRefCol, 0, rDoc.MaxCol());
2112                                 bInvalidRefCol = bOverflowCol = true;
2113                             }
2114                             break;
2115                         }
2116                         case 'R':
2117                         {
2118                             bInvalidRefRow = false;
2119                             bool bFail = o3tl::checked_add(o3tl::toInt32(std::u16string_view(p)), nStartRow - 1, nRefRow);
2120                             if (bFail || nRefRow < 0 || nMaxImportRow < nRefRow)
2121                             {
2122                                 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;R invalid nRefRow=" << nRefRow);
2123                                 nRefRow = std::clamp<SCROW>(nRefRow, 0, nMaxImportRow);
2124                                 bInvalidRefRow = bOverflowRow = true;
2125                             }
2126                             break;
2127                         }
2128                         case 'K':
2129                         {
2130                             if( !bSingle &&
2131                                     ( nCol < nStartCol || nCol > nEndCol
2132                                       || nRow < nStartRow || nRow > nEndRow
2133                                       || nCol > rDoc.MaxCol() || nRow > nMaxImportRow
2134                                       || bInvalidCol || bInvalidRow ) )
2135                                 break;
2136                             if( !bData )
2137                             {
2138                                 if( nRow > nEndRow )
2139                                     nEndRow = nRow;
2140                                 if( nCol > nEndCol )
2141                                     nEndCol = nCol;
2142                                 break;
2143                             }
2144                             bool bText;
2145                             if( *p == '"' )
2146                             {
2147                                 bText = true;
2148                                 aText.clear();
2149                                 p = lcl_ScanSylkString( p, aText, eVersion);
2150                             }
2151                             else
2152                                 bText = false;
2153                             const sal_Unicode* q = p;
2154                             while( *q && *q != ';' )
2155                                 q++;
2156                             if ( (*q != ';' || *(q+1) != 'I') && !bInvalidCol && !bInvalidRow )
2157                             {   // don't ignore value
2158                                 if( bText )
2159                                 {
2160                                     rDoc.EnsureTable(aRange.aStart.Tab());
2161                                     rDoc.SetTextCell(
2162                                         ScAddress(nCol, nRow, aRange.aStart.Tab()), aText);
2163                                 }
2164                                 else
2165                                 {
2166                                     double fVal = rtl_math_uStringToDouble( p,
2167                                             aLine.getStr() + aLine.getLength(),
2168                                             cDecSep, cGrpSep, nullptr, nullptr );
2169                                     rDoc.SetValue( nCol, nRow, aRange.aStart.Tab(), fVal );
2170                                 }
2171                             }
2172                         }
2173                         break;
2174                         case 'E':
2175                         case 'M':
2176                         {
2177                             if ( ch == 'M' )
2178                             {
2179                                 if ( nRefCol < nCol )
2180                                     nRefCol = nCol;
2181                                 if ( nRefRow < nRow )
2182                                     nRefRow = nRow;
2183                                 if ( !bData )
2184                                 {
2185                                     if( nRefRow > nEndRow )
2186                                         nEndRow = nRefRow;
2187                                     if( nRefCol > nEndCol )
2188                                         nEndCol = nRefCol;
2189                                 }
2190                             }
2191                             if( !bMyDoc || !bData )
2192                                 break;
2193                             aText = "=";
2194                             p = lcl_ScanSylkFormula( p, aText, eVersion);
2195 
2196                             if (bInvalidCol || bInvalidRow || (ch == 'M' && (bInvalidRefCol || bInvalidRefRow)))
2197                                 break;
2198 
2199                             ScAddress aPos( nCol, nRow, aRange.aStart.Tab() );
2200                             /* FIXME: do we want GRAM_ODFF_A1 instead? At the
2201                              * end it probably should be GRAM_ODFF_R1C1, since
2202                              * R1C1 is what Excel writes in SYLK, or even
2203                              * better GRAM_ENGLISH_XL_R1C1. */
2204                             const formula::FormulaGrammar::Grammar eGrammar = formula::FormulaGrammar::GRAM_PODF_A1;
2205                             ScCompiler aComp(rDoc, aPos, eGrammar);
2206                             std::unique_ptr<ScTokenArray> xCode(aComp.CompileString(aText)); // ctor/InsertMatrixFormula did copy TokenArray
2207                             rDoc.CheckLinkFormulaNeedingCheck(*xCode);
2208                             if ( ch == 'M' )
2209                             {
2210                                 ScMarkData aMark(rDoc.GetSheetLimits());
2211                                 aMark.SelectTable( aPos.Tab(), true );
2212                                 rDoc.InsertMatrixFormula( nCol, nRow, nRefCol,
2213                                     nRefRow, aMark, OUString(), xCode.get() );
2214                             }
2215                             else
2216                             {
2217                                 ScFormulaCell* pFCell = new ScFormulaCell(
2218                                         rDoc, aPos, *xCode, eGrammar, ScMatrixMode::NONE);
2219                                 rDoc.SetFormulaCell(aPos, pFCell);
2220                             }
2221                         }
2222                         break;
2223                     }
2224                     while( *p && *p != ';' )
2225                         p++;
2226                     if( *p )
2227                         p++;
2228                 }
2229             }
2230             else if( cTag == 'F' )      // Format
2231             {
2232                 if( *p++ != ';' )
2233                     return false;
2234                 sal_Int32 nFormat = -1;
2235                 while( *p )
2236                 {
2237                     sal_Unicode ch = *p++;
2238                     ch = ScGlobal::ToUpperAlpha( ch );
2239                     switch( ch )
2240                     {
2241                         case 'X':
2242                         {
2243                             bInvalidCol = false;
2244                             bool bFail = o3tl::checked_add<SCCOL>(o3tl::toInt32(std::u16string_view(p)), nStartCol - 1, nCol);
2245                             if (bFail || nCol < 0 || rDoc.MaxCol() < nCol)
2246                             {
2247                                 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;X invalid nCol=" << nCol);
2248                                 nCol = std::clamp<SCCOL>(nCol, 0, rDoc.MaxCol());
2249                                 bInvalidCol = bOverflowCol = true;
2250                             }
2251                             break;
2252                         }
2253                         case 'Y':
2254                         {
2255                             bInvalidRow = false;
2256                             bool bFail = o3tl::checked_add(o3tl::toInt32(std::u16string_view(p)), nStartRow - 1, nRow);
2257                             if (bFail || nRow < 0 || nMaxImportRow < nRow)
2258                             {
2259                                 SAL_WARN("sc.ui","ScImportExport::Sylk2Doc - ;Y invalid nRow=" << nRow);
2260                                 nRow = std::clamp<SCROW>(nRow, 0, nMaxImportRow);
2261                                 bInvalidRow = bOverflowRow = true;
2262                             }
2263                             break;
2264                         }
2265                         case 'P' :
2266                             if ( bData )
2267                             {
2268                                 // F;P<n> sets format code of P;P<code> at
2269                                 // current position, or at ;X;Y if specified.
2270                                 // Note that ;X;Y may appear after ;P
2271                                 const sal_Unicode* p0 = p;
2272                                 while( *p && *p != ';' )
2273                                     p++;
2274                                 OUString aNumber(p0, p - p0);
2275                                 nFormat = aNumber.toInt32();
2276                             }
2277                             break;
2278                     }
2279                     while( *p && *p != ';' )
2280                         p++;
2281                     if( *p )
2282                         p++;
2283                 }
2284                 if ( !bData )
2285                 {
2286                     if( nRow > nEndRow )
2287                         nEndRow = nRow;
2288                     if( nCol > nEndCol )
2289                         nEndCol = nCol;
2290                 }
2291                 if ( 0 <= nFormat && o3tl::make_unsigned(nFormat) < aFormats.size() && !bInvalidCol && !bInvalidRow )
2292                 {
2293                     sal_uInt32 nKey = aFormats[nFormat];
2294                     rDoc.ApplyAttr( nCol, nRow, aRange.aStart.Tab(),
2295                             SfxUInt32Item( ATTR_VALUE_FORMAT, nKey ) );
2296                 }
2297             }
2298             else if( cTag == 'P' )
2299             {
2300                 if ( bData && *p == ';' && *(p+1) == 'P' )
2301                 {
2302                     OUString aCode( p+2 );
2303 
2304                     sal_uInt32 nKey;
2305                     sal_Int32 nCheckPos;
2306 
2307                     if (aCode.getLength() > 2048 && utl::ConfigManager::IsFuzzing())
2308                     {
2309                         // consider an excessive length as a failure when fuzzing
2310                         nCheckPos = 1;
2311                     }
2312                     else
2313                     {
2314                         // unescape doubled semicolons
2315                         aCode = aCode.replaceAll(";;", ";");
2316                         // get rid of Xcl escape characters
2317                         aCode = aCode.replaceAll("\x1b", "");
2318                         SvNumFormatType nType;
2319                         rDoc.GetFormatTable()->PutandConvertEntry( aCode, nCheckPos, nType, nKey,
2320                                                                     LANGUAGE_ENGLISH_US, ScGlobal::eLnge, false);
2321                     }
2322 
2323                     if ( nCheckPos )
2324                         nKey = 0;
2325 
2326                     aFormats.push_back( nKey );
2327                 }
2328             }
2329             else if (cTag == 'I' && *p == 'D' && aLine.getLength() > 4)
2330             {
2331                 aLine = aLine.copy(4);
2332                 if (aLine == "CALCOOO32")
2333                     eVersion = SylkVersion::OOO32;
2334                 else if (aLine == "SCALC3")
2335                     eVersion = SylkVersion::SCALC3;
2336                 bMyDoc = (eVersion <= SylkVersion::OWN);
2337             }
2338             else if( cTag == 'E' )                      // End
2339                 break;
2340         }
2341         if( !bData )
2342         {
2343             aRange.aEnd.SetCol( nEndCol );
2344             aRange.aEnd.SetRow( nEndRow );
2345             bOk = StartPaste();
2346             bData = true;
2347         }
2348         else
2349             break;
2350     }
2351 
2352     EndPaste();
2353     return bOk;
2354 }
2355 
2356 bool ScImportExport::Doc2Sylk( SvStream& rStrm )
2357 {
2358     SCCOL nCol;
2359     SCROW nRow;
2360     SCCOL nStartCol = aRange.aStart.Col();
2361     SCROW nStartRow = aRange.aStart.Row();
2362     SCCOL nEndCol = aRange.aEnd.Col();
2363     SCROW nEndRow = aRange.aEnd.Row();
2364     OUString aCellStr;
2365     OUString aValStr;
2366     rStrm.WriteUnicodeOrByteText(u"ID;PCALCOOO32");
2367     endlub(rStrm);
2368 
2369     for (nRow = nStartRow; nRow <= nEndRow; nRow++)
2370     {
2371         for (nCol = nStartCol; nCol <= nEndCol; nCol++)
2372         {
2373             OUString aBufStr;
2374             double nVal;
2375             bool bForm = false;
2376             SCROW r = nRow - nStartRow + 1;
2377             SCCOL c = nCol - nStartCol + 1;
2378             ScRefCellValue aCell(rDoc, ScAddress(nCol, nRow, aRange.aStart.Tab()));
2379             CellType eType = aCell.getType();
2380             switch( eType )
2381             {
2382                 case CELLTYPE_FORMULA:
2383                     bForm = bFormulas;
2384                     if( rDoc.HasValueData( nCol, nRow, aRange.aStart.Tab()) )
2385                         goto hasvalue;
2386                     else
2387                         goto hasstring;
2388 
2389                 case CELLTYPE_VALUE:
2390                 hasvalue:
2391                     nVal = rDoc.GetValue( nCol, nRow, aRange.aStart.Tab() );
2392 
2393                     aValStr = ::rtl::math::doubleToUString( nVal,
2394                             rtl_math_StringFormat_Automatic,
2395                             rtl_math_DecimalPlaces_Max, '.', true );
2396 
2397                     aBufStr = "C;X"
2398                             + OUString::number( c )
2399                             + ";Y"
2400                             + OUString::number( r )
2401                             + ";K"
2402                             + aValStr;
2403                     rStrm.WriteUnicodeOrByteText(aBufStr);
2404                     goto checkformula;
2405 
2406                 case CELLTYPE_STRING:
2407                 case CELLTYPE_EDIT:
2408                 hasstring:
2409                     aCellStr = rDoc.GetString(nCol, nRow, aRange.aStart.Tab());
2410                     aCellStr = aCellStr.replaceAll("\n", SYLK_LF);
2411 
2412                     aBufStr = "C;X"
2413                             + OUString::number( c )
2414                             + ";Y"
2415                             + OUString::number( r )
2416                             + ";K";
2417                     rStrm.WriteUnicodeOrByteText(aBufStr);
2418                     lcl_WriteString( rStrm, aCellStr, '"', ';' );
2419 
2420                 checkformula:
2421                     if( bForm )
2422                     {
2423                         const ScFormulaCell* pFCell = aCell.getFormula();
2424                         switch ( pFCell->GetMatrixFlag() )
2425                         {
2426                             case ScMatrixMode::Reference :
2427                                 aCellStr.clear();
2428                             break;
2429                             default:
2430                                 aCellStr = pFCell->GetFormula( formula::FormulaGrammar::GRAM_PODF_A1);
2431                                 /* FIXME: do we want GRAM_ODFF_A1 instead? At
2432                                  * the end it probably should be
2433                                  * GRAM_ODFF_R1C1, since R1C1 is what Excel
2434                                  * writes in SYLK, or even better
2435                                  * GRAM_ENGLISH_XL_R1C1. */
2436                         }
2437                         if ( pFCell->GetMatrixFlag() != ScMatrixMode::NONE &&
2438                                 aCellStr.startsWith("{") &&
2439                                 aCellStr.endsWith("}") )
2440                         {   // cut off matrix {} characters
2441                             aCellStr = aCellStr.copy(1, aCellStr.getLength()-2);
2442                         }
2443                         if ( aCellStr[0] == '=' )
2444                             aCellStr = aCellStr.copy(1);
2445                         OUString aPrefix;
2446                         switch ( pFCell->GetMatrixFlag() )
2447                         {
2448                             case ScMatrixMode::Formula :
2449                             {   // diff expression with 'M' M$-extension
2450                                 SCCOL nC;
2451                                 SCROW nR;
2452                                 pFCell->GetMatColsRows( nC, nR );
2453                                 nC += c - 1;
2454                                 nR += r - 1;
2455                                 aPrefix = ";R"
2456                                         + OUString::number( nR )
2457                                         + ";C"
2458                                         + OUString::number( nC )
2459                                         + ";M";
2460                             }
2461                             break;
2462                             case ScMatrixMode::Reference :
2463                             {   // diff expression with 'I' M$-extension
2464                                 ScAddress aPos;
2465                                 (void)pFCell->GetMatrixOrigin( rDoc, aPos );
2466                                 aPrefix = ";I;R"
2467                                         + OUString::number( aPos.Row() - nStartRow + 1 )
2468                                         + ";C"
2469                                         + OUString::number( aPos.Col() - nStartCol + 1 );
2470                             }
2471                             break;
2472                             default:
2473                                 // formula Expression
2474                                 aPrefix = ";E";
2475                         }
2476                         rStrm.WriteUnicodeOrByteText(aPrefix);
2477                         if ( !aCellStr.isEmpty() )
2478                             lcl_WriteString( rStrm, aCellStr, 0, ';' );
2479                     }
2480                     endlub(rStrm);
2481                     break;
2482 
2483                 default:
2484                 {
2485                     // added to avoid warnings
2486                 }
2487             }
2488         }
2489     }
2490     rStrm.WriteUnicodeOrByteText(u"E");
2491     endlub(rStrm);
2492     return rStrm.GetError() == ERRCODE_NONE;
2493 }
2494 
2495 bool ScImportExport::Doc2HTML( SvStream& rStrm, const OUString& rBaseURL )
2496 {
2497     // rtl_TextEncoding is ignored in ScExportHTML, read from Load/Save HTML options
2498     ScFormatFilter::Get().ScExportHTML( rStrm, rBaseURL, &rDoc, aRange, RTL_TEXTENCODING_DONTKNOW, bAll,
2499         aStreamPath, aNonConvertibleChars, maFilterOptions );
2500     return rStrm.GetError() == ERRCODE_NONE;
2501 }
2502 
2503 bool ScImportExport::Doc2RTF( SvStream& rStrm )
2504 {
2505     //  rtl_TextEncoding is ignored in ScExportRTF
2506     ScFormatFilter::Get().ScExportRTF( rStrm, &rDoc, aRange, RTL_TEXTENCODING_DONTKNOW );
2507     return rStrm.GetError() == ERRCODE_NONE;
2508 }
2509 
2510 bool ScImportExport::Doc2Dif( SvStream& rStrm )
2511 {
2512     // for DIF in the clipboard, IBM_850 is always used
2513     ScFormatFilter::Get().ScExportDif( rStrm, &rDoc, aRange, RTL_TEXTENCODING_IBM_850 );
2514     return true;
2515 }
2516 
2517 bool ScImportExport::Dif2Doc( SvStream& rStrm )
2518 {
2519     SCTAB nTab = aRange.aStart.Tab();
2520     ScDocumentUniquePtr pImportDoc( new ScDocument( SCDOCMODE_UNDO ) );
2521     pImportDoc->InitUndo( rDoc, nTab, nTab );
2522 
2523     // for DIF in the clipboard, IBM_850 is always used
2524     ScFormatFilter::Get().ScImportDif( rStrm, pImportDoc.get(), aRange.aStart, RTL_TEXTENCODING_IBM_850 );
2525 
2526     SCCOL nEndCol;
2527     SCROW nEndRow;
2528     pImportDoc->GetCellArea( nTab, nEndCol, nEndRow );
2529     // if there are no cells in the imported content, nEndCol/nEndRow may be before the start
2530     if ( nEndCol < aRange.aStart.Col() )
2531         nEndCol = aRange.aStart.Col();
2532     if ( nEndRow < aRange.aStart.Row() )
2533         nEndRow = aRange.aStart.Row();
2534     aRange.aEnd = ScAddress( nEndCol, nEndRow, nTab );
2535 
2536     bool bOk = StartPaste();
2537     if (bOk)
2538     {
2539         InsertDeleteFlags nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
2540         rDoc.DeleteAreaTab( aRange, nFlags );
2541         pImportDoc->CopyToDocument(aRange, nFlags, false, rDoc);
2542         EndPaste();
2543     }
2544 
2545     return bOk;
2546 }
2547 
2548 bool ScImportExport::RTF2Doc( SvStream& rStrm, const OUString& rBaseURL )
2549 {
2550     std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateRTFImport( &rDoc, aRange );
2551     if (!pImp)
2552         return false;
2553     pImp->Read( rStrm, rBaseURL );
2554     aRange = pImp->GetRange();
2555 
2556     bool bOk = StartPaste();
2557     if (bOk)
2558     {
2559         InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
2560         rDoc.DeleteAreaTab( aRange, nFlags );
2561         pImp->WriteToDocument();
2562         EndPaste();
2563     }
2564     return bOk;
2565 }
2566 
2567 bool ScImportExport::HTML2Doc( SvStream& rStrm, const OUString& rBaseURL )
2568 {
2569     std::unique_ptr<ScEEAbsImport> pImp = ScFormatFilter::Get().CreateHTMLImport( &rDoc, rBaseURL, aRange);
2570     if (!pImp)
2571         return false;
2572     pImp->Read( rStrm, rBaseURL );
2573     aRange = pImp->GetRange();
2574 
2575     bool bOk = StartPaste();
2576     if (bOk)
2577     {
2578         // ScHTMLImport may call ScDocument::InitDrawLayer, resulting in
2579         // a Draw Layer but no Draw View -> create Draw Layer and View here
2580         if (pDocSh)
2581             pDocSh->MakeDrawLayer();
2582 
2583         InsertDeleteFlags const nFlags = InsertDeleteFlags::ALL & ~InsertDeleteFlags::STYLES;
2584         rDoc.DeleteAreaTab( aRange, nFlags );
2585 
2586         if (pExtOptions)
2587         {
2588             // Pick up import options if available.
2589             LanguageType eLang = pExtOptions->GetLanguage();
2590             SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eLang);
2591             bool bSpecialNumber = pExtOptions->IsDetectSpecialNumber();
2592             bool bScientificNumber = pExtOptions->IsDetectScientificNumber();
2593             pImp->WriteToDocument(false, 1.0, &aNumFormatter, bSpecialNumber, bScientificNumber);
2594         }
2595         else
2596             // Regular import, with no options.
2597             pImp->WriteToDocument();
2598 
2599         EndPaste();
2600     }
2601     return bOk;
2602 }
2603 
2604 #ifndef DISABLE_DYNLOADING
2605 
2606 extern "C" { static void thisModule() {} }
2607 
2608 #else
2609 
2610 extern "C" {
2611 ScFormatFilterPlugin* ScFilterCreate();
2612 }
2613 
2614 #endif
2615 
2616 typedef ScFormatFilterPlugin * (*FilterFn)();
2617 ScFormatFilterPlugin &ScFormatFilter::Get()
2618 {
2619     static ScFormatFilterPlugin *plugin = []()
2620     {
2621 #ifndef DISABLE_DYNLOADING
2622         OUString sFilterLib(SVLIBRARY("scfilt"));
2623         static ::osl::Module aModule;
2624         bool bLoaded = aModule.is();
2625         if (!bLoaded)
2626             bLoaded = aModule.loadRelative(&thisModule, sFilterLib);
2627         if (!bLoaded)
2628             bLoaded = aModule.load(sFilterLib);
2629         if (bLoaded)
2630         {
2631             oslGenericFunction fn = aModule.getFunctionSymbol( "ScFilterCreate" );
2632             if (fn != nullptr)
2633                 return reinterpret_cast<FilterFn>(fn)();
2634         }
2635         assert(false);
2636         return static_cast<ScFormatFilterPlugin*>(nullptr);
2637 #else
2638         return ScFilterCreate();
2639 #endif
2640     }();
2641 
2642     return *plugin;
2643 }
2644 
2645 // Precondition: pStr is guaranteed to be non-NULL and points to a 0-terminated
2646 // array.
2647 static const sal_Unicode* lcl_UnicodeStrChr( const sal_Unicode* pStr,
2648         sal_Unicode c )
2649 {
2650     while (*pStr)
2651     {
2652         if (*pStr == c)
2653             return pStr;
2654         ++pStr;
2655     }
2656     return nullptr;
2657 }
2658 
2659 ScImportStringStream::ScImportStringStream( const OUString& rStr )
2660     : SvMemoryStream( const_cast<sal_Unicode *>(rStr.getStr()),
2661             rStr.getLength() * sizeof(sal_Unicode), StreamMode::READ)
2662 {
2663     SetStreamCharSet( RTL_TEXTENCODING_UNICODE );
2664 #ifdef OSL_BIGENDIAN
2665     SetEndian(SvStreamEndian::BIG);
2666 #else
2667     SetEndian(SvStreamEndian::LITTLE);
2668 #endif
2669 }
2670 
2671 OUString ReadCsvLine( SvStream &rStream, bool bEmbeddedLineBreak,
2672         OUString& rFieldSeparators, sal_Unicode cFieldQuote, sal_Unicode& rcDetectSep, sal_uInt32 nMaxSourceLines )
2673 {
2674     enum RetryState
2675     {
2676         FORBID,
2677         ALLOW,
2678         RETRY,
2679         RETRIED
2680     } eRetryState = (bEmbeddedLineBreak && rcDetectSep == 0 ? RetryState::ALLOW : RetryState::FORBID);
2681 
2682     sal_uInt64 nStreamPos = (eRetryState == RetryState::ALLOW ? rStream.Tell() : 0);
2683 
2684 Label_RetryWithNewSep:
2685 
2686     if (eRetryState == RetryState::RETRY)
2687     {
2688         eRetryState = RetryState::RETRIED;
2689         rStream.Seek( nStreamPos);
2690     }
2691 
2692     OUString aStr;
2693     rStream.ReadUniOrByteStringLine(aStr, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit);
2694 
2695     if (bEmbeddedLineBreak)
2696     {
2697         sal_Int32 nFirstLineLength = aStr.getLength();
2698         sal_uInt64 nFirstLineStreamPos = rStream.Tell();
2699         sal_uInt32 nLine = 0;
2700 
2701         const sal_Unicode* pSeps = rFieldSeparators.getStr();
2702 
2703         QuoteType eQuoteState = FIELDEND_QUOTE;
2704         bool bFieldStart = true;
2705 
2706         sal_Int32 nLastOffset = 0;
2707         sal_Int32 nQuotes = 0;
2708         while (!rStream.eof() && aStr.getLength() < nArbitraryLineLengthLimit)
2709         {
2710             const sal_Unicode * p = aStr.getStr() + nLastOffset;
2711             const sal_Unicode * const pStop = aStr.getStr() + aStr.getLength();
2712             while (p < pStop)
2713             {
2714                 if (!*p)
2715                 {
2716                     // Skip embedded null-characters. They don't change
2717                     // anything and are handled at a higher level.
2718                     ++p;
2719                     continue;
2720                 }
2721 
2722                 if (nQuotes)
2723                 {
2724                     if (*p == cFieldQuote)
2725                     {
2726                         if (bFieldStart)
2727                         {
2728                             ++nQuotes;
2729                             bFieldStart = false;
2730                             eQuoteState = FIELDSTART_QUOTE;
2731                             nFirstLineLength = aStr.getLength();
2732                             nFirstLineStreamPos = rStream.Tell();
2733                         }
2734                         // Do not detect a FIELDSTART_QUOTE if not in
2735                         // bFieldStart mode, in which case for unquoted content
2736                         // we are in FIELDEND_QUOTE state.
2737                         else if (eQuoteState != FIELDEND_QUOTE)
2738                         {
2739                             eQuoteState = lcl_isEscapedOrFieldEndQuote( nQuotes, p, pSeps, cFieldQuote, rcDetectSep);
2740 
2741                             if (eRetryState == RetryState::ALLOW && rcDetectSep)
2742                             {
2743                                 eRetryState = RetryState::RETRY;
2744                                 rFieldSeparators += OUStringChar(rcDetectSep);
2745                                 pSeps = rFieldSeparators.getStr();
2746                                 goto Label_RetryWithNewSep;
2747                             }
2748 
2749                             // DONTKNOW_QUOTE is an embedded unescaped quote we
2750                             // don't count for pairing.
2751                             if (eQuoteState != DONTKNOW_QUOTE)
2752                                 ++nQuotes;
2753                         }
2754                     }
2755                     else if (eQuoteState == FIELDEND_QUOTE)
2756                     {
2757                         if (bFieldStart)
2758                             // If blank is a separator it starts a field, if it
2759                             // is not and thus maybe leading before quote we
2760                             // are still at start of field regarding quotes.
2761                             bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2762                         else
2763                             bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2764                     }
2765                 }
2766                 else
2767                 {
2768                     if (*p == cFieldQuote && bFieldStart)
2769                     {
2770                         nQuotes = 1;
2771                         eQuoteState = FIELDSTART_QUOTE;
2772                         bFieldStart = false;
2773                         nFirstLineLength = aStr.getLength();
2774                         nFirstLineStreamPos = rStream.Tell();
2775                     }
2776                     else if (eQuoteState == FIELDEND_QUOTE)
2777                     {
2778                         // This also skips leading blanks at beginning of line
2779                         // if followed by a quote. It's debatable whether we
2780                         // actually want that or not, but congruent with what
2781                         // ScanNextFieldFromString() does.
2782                         if (bFieldStart)
2783                             bFieldStart = (*p == ' ' || lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2784                         else
2785                             bFieldStart = (lcl_UnicodeStrChr( pSeps, *p) != nullptr);
2786                     }
2787                 }
2788                 // A quote character inside a field content does not start
2789                 // a quote.
2790                 ++p;
2791             }
2792 
2793             if ((nQuotes & 1) == 0)
2794                 // We still have a (theoretical?) problem here if due to
2795                 // nArbitraryLineLengthLimit (or nMaxSourceLines below) we
2796                 // split a string right between a doubled quote pair.
2797                 break;
2798             else if (eQuoteState == DONTKNOW_QUOTE)
2799                 // A single unescaped quote somewhere in a quote started
2800                 // field, most likely that was not meant to have embedded
2801                 // linefeeds either.
2802                 break;
2803             else if (++nLine >= nMaxSourceLines && nMaxSourceLines > 0)
2804                 // Unconditionally increment nLine even if nMaxSourceLines==0
2805                 // so it can be observed in debugger.
2806                 break;
2807             else
2808             {
2809                 nLastOffset = aStr.getLength();
2810                 OUString aNext;
2811                 rStream.ReadUniOrByteStringLine(aNext, rStream.GetStreamCharSet(), nArbitraryLineLengthLimit);
2812                 if (!rStream.eof())
2813                     aStr += "\n" + aNext;
2814             }
2815         }
2816         if (nQuotes & 1)
2817         {
2818             // No closing quote at all. A single quote at field start => no
2819             // embedded linefeeds for that field, take only first logical line.
2820             aStr = aStr.copy( 0, nFirstLineLength);
2821             rStream.Seek( nFirstLineStreamPos);
2822         }
2823     }
2824     return aStr;
2825 }
2826 
2827 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
2828