xref: /core/sc/source/filter/rtf/rtfparse.cxx (revision 8f7c3507)
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <memory>
21 #include <scitems.hxx>
22 #include <editeng/eeitem.hxx>
23 #include <editeng/editeng.hxx>
24 #include <editeng/fhgtitem.hxx>
25 #include <editeng/svxrtf.hxx>
26 #include <vcl/outdev.hxx>
27 #include <svtools/rtftoken.h>
28 #include <osl/diagnose.h>
29 #include <svl/itempool.hxx>
30 
31 #include <rtfparse.hxx>
32 
33 #define SC_RTFTWIPTOL 10        // 10 Twips tolerance when determining columns
34 
35 ScRTFParser::ScRTFParser( EditEngine* pEditP ) :
36         ScEEParser( pEditP ),
37         mnCurPos(0),
38         pActDefault( nullptr ),
39         pDefMerge( nullptr ),
40         nStartAdjust( sal_uLong(~0) ),
41         nLastWidth(0),
42         bNewDef( false )
43 {
44     // RTF default FontSize 12Pt
45     long nMM = OutputDevice::LogicToLogic( 12, MapUnit::MapPoint, MapUnit::Map100thMM );
46     pPool->SetPoolDefaultItem( SvxFontHeightItem( nMM, 100, EE_CHAR_FONTHEIGHT ) );
47     // Free-flying pInsDefault
48     pInsDefault.reset( new ScRTFCellDefault( pPool ) );
49 }
50 
51 ScRTFParser::~ScRTFParser()
52 {
53     pInsDefault.reset();
54     maDefaultList.clear();
55 }
56 
57 ErrCode ScRTFParser::Read( SvStream& rStream, const OUString& rBaseURL )
58 {
59     Link<RtfImportInfo&,void> aOldLink = pEdit->GetRtfImportHdl();
60     pEdit->SetRtfImportHdl( LINK( this, ScRTFParser, RTFImportHdl ) );
61     ErrCode nErr = pEdit->Read( rStream, rBaseURL, EETextFormat::Rtf );
62     if ( nRtfLastToken == RTF_PAR )
63     {
64         if ( !maList.empty() )
65         {
66             auto& pE = maList.back();
67             if (    // Completely empty
68                 (  pE->aSel.nStartPara == pE->aSel.nEndPara
69                    && pE->aSel.nStartPos  == pE->aSel.nEndPos
70                 )
71                 ||  // Empty paragraph
72                 (  pE->aSel.nStartPara + 1 == pE->aSel.nEndPara
73                    && pE->aSel.nStartPos      == pEdit->GetTextLen( pE->aSel.nStartPara )
74                    && pE->aSel.nEndPos        == 0
75                 )
76                )
77             {   // Don't take over the last paragraph
78                 maList.pop_back();
79             }
80         }
81     }
82     ColAdjust();
83     pEdit->SetRtfImportHdl( aOldLink );
84     return nErr;
85 }
86 
87 void ScRTFParser::EntryEnd( ScEEParseEntry* pE, const ESelection& aSel )
88 {
89     // Paragraph -2 strips the attached empty paragraph
90     pE->aSel.nEndPara = aSel.nEndPara - 2;
91     // Although it's called nEndPos, the last one is position + 1
92     pE->aSel.nEndPos = pEdit->GetTextLen( aSel.nEndPara - 1 );
93 }
94 
95 inline void ScRTFParser::NextRow()
96 {
97     if ( nRowMax < ++nRowCnt )
98         nRowMax = nRowCnt;
99 }
100 
101 bool ScRTFParser::SeekTwips( sal_uInt16 nTwips, SCCOL* pCol )
102 {
103     ScRTFColTwips::const_iterator it = aColTwips.find( nTwips );
104     bool bFound = it != aColTwips.end();
105     sal_uInt16 nPos = it - aColTwips.begin();
106     *pCol = static_cast<SCCOL>(nPos);
107     if ( bFound )
108         return true;
109     sal_uInt16 nCount = aColTwips.size();
110     if ( !nCount )
111         return false;
112     SCCOL nCol = *pCol;
113     // nCol is insertion position; the next one higher up is there (or not)
114     if ( nCol < static_cast<SCCOL>(nCount) && ((aColTwips[nCol] - SC_RTFTWIPTOL) <= nTwips) )
115         return true;
116     // Not smaller than everything else? Then compare with the next lower one
117     else if ( nCol != 0 && ((aColTwips[nCol-1] + SC_RTFTWIPTOL) >= nTwips) )
118     {
119         (*pCol)--;
120         return true;
121     }
122     return false;
123 }
124 
125 void ScRTFParser::ColAdjust()
126 {
127     if ( nStartAdjust != sal_uLong(~0) )
128     {
129         SCCOL nCol = 0;
130         for (size_t i = nStartAdjust, nListSize = maList.size(); i < nListSize; ++i)
131         {
132             auto& pE = maList[i];
133             if ( pE->nCol == 0 )
134                 nCol = 0;
135             pE->nCol = nCol;
136             if ( pE->nColOverlap > 1 )
137                 nCol = nCol + pE->nColOverlap; // Merged cells with \clmrg
138             else
139             {
140                 SeekTwips( pE->nTwips, &nCol );
141                 if ( ++nCol <= pE->nCol )
142                     nCol = pE->nCol + 1; // Moved cell X
143                 pE->nColOverlap = nCol - pE->nCol; // Merged cells without \clmrg
144             }
145             if ( nCol > nColMax )
146                 nColMax = nCol;
147         }
148         nStartAdjust = sal_uLong(~0);
149         aColTwips.clear();
150     }
151 }
152 
153 IMPL_LINK( ScRTFParser, RTFImportHdl, RtfImportInfo&, rInfo, void )
154 {
155     switch ( rInfo.eState )
156     {
157         case RtfImportState::NextToken:
158             ProcToken( &rInfo );
159             break;
160         case RtfImportState::UnknownAttr:
161             ProcToken( &rInfo );
162             break;
163         case RtfImportState::Start:
164         {
165             SvxRTFParser* pParser = static_cast<SvxRTFParser*>(rInfo.pParser);
166             pParser->SetAttrPool( pPool );
167             RTFPardAttrMapIds& rMap = pParser->GetPardMap();
168             rMap.nBrush = ATTR_BACKGROUND;
169             rMap.nBox = ATTR_BORDER;
170             rMap.nShadow = ATTR_SHADOW;
171         }
172             break;
173         case RtfImportState::End:
174             if ( rInfo.aSelection.nEndPos )
175             {   // If still text: create last paragraph
176                 pActDefault = nullptr;
177                 rInfo.nToken = RTF_PAR;
178                 // EditEngine did not attach an empty paragraph anymore
179                 // which EntryEnd could strip
180                 rInfo.aSelection.nEndPara++;
181                 ProcToken( &rInfo );
182             }
183             break;
184         case RtfImportState::SetAttr:
185             break;
186         case RtfImportState::InsertText:
187             break;
188         case RtfImportState::InsertPara:
189             break;
190         default:
191             OSL_FAIL("unknown ImportInfo.eState");
192     }
193 }
194 
195 // Bad behavior:
196 // For RTF_INTBL or respectively at the start of the first RTF_CELL
197 // after RTF_CELLX if there was no RTF_INTBL
198 void ScRTFParser::NewCellRow()
199 {
200     if ( bNewDef )
201     {
202         bNewDef = false;
203         // Not flush on the right? => new table
204         if ( nLastWidth && !maDefaultList.empty() )
205         {
206             const ScRTFCellDefault& rD = *maDefaultList.back().get();
207             if (rD.nTwips != nLastWidth)
208             {
209                 SCCOL n1, n2;
210                 if ( !(  SeekTwips( nLastWidth, &n1 )
211                       && SeekTwips( rD.nTwips, &n2 )
212                       && n1 == n2
213                       )
214                 )
215                 {
216                     ColAdjust();
217                 }
218             }
219         }
220         // Build up TwipCols only after nLastWidth comparison!
221         for (std::unique_ptr<ScRTFCellDefault> & pCellDefault : maDefaultList)
222         {
223             const ScRTFCellDefault& rD = *pCellDefault;
224             SCCOL nCol;
225             if ( !SeekTwips(rD.nTwips, &nCol) )
226                 aColTwips.insert( rD.nTwips );
227         }
228     }
229     pDefMerge = nullptr;
230     pActDefault = maDefaultList.empty() ? nullptr : maDefaultList[0].get();
231     mnCurPos = 0;
232     OSL_ENSURE( pActDefault, "NewCellRow: pActDefault==0" );
233 }
234 
235 /*
236     SW:
237     ~~~
238     [\par]
239     \trowd \cellx \cellx ...
240     \intbl \cell \cell ...
241     \row
242     [\par]
243     [\trowd \cellx \cellx ...]
244     \intbl \cell \cell ...
245     \row
246     [\par]
247 
248     M$-Word:
249     ~~~~~~~~
250     [\par]
251     \trowd \cellx \cellx ...
252     \intbl \cell \cell ...
253     \intbl \row
254     [\par]
255     [\trowd \cellx \cellx ...]
256     \intbl \cell \cell ...
257     \intbl \row
258     [\par]
259 
260  */
261 
262 void ScRTFParser::ProcToken( RtfImportInfo* pInfo )
263 {
264     switch ( pInfo->nToken )
265     {
266         case RTF_TROWD:         // denotes table row default, before RTF_CELLX
267         {
268             if (!maDefaultList.empty())
269                 nLastWidth = maDefaultList.back()->nTwips;
270 
271             nColCnt = 0;
272             if (pActDefault != pInsDefault.get())
273                 pActDefault = nullptr;
274             maDefaultList.clear();
275             pDefMerge = nullptr;
276             nRtfLastToken = pInfo->nToken;
277             mnCurPos = 0;
278         }
279         break;
280         case RTF_CLMGF:         // The first cell of cells to be merged
281         {
282             pDefMerge = pInsDefault.get();
283             nRtfLastToken = pInfo->nToken;
284         }
285         break;
286         case RTF_CLMRG:         // A cell to be merged with the preceding cell
287         {
288             if (!pDefMerge && !maDefaultList.empty())
289             {
290                 pDefMerge = maDefaultList.back().get();
291                 mnCurPos = maDefaultList.size() - 1;
292             }
293             OSL_ENSURE( pDefMerge, "RTF_CLMRG: pDefMerge==0" );
294             if ( pDefMerge ) // Else broken RTF
295                 pDefMerge->nColOverlap++;   // multiple successive ones possible
296             pInsDefault->nColOverlap = 0;   // Flag: ignore these
297             nRtfLastToken = pInfo->nToken;
298         }
299         break;
300         case RTF_CELLX:         // closes cell default
301         {
302             bNewDef = true;
303             pInsDefault->nCol = nColCnt;
304             pInsDefault->nTwips = pInfo->nTokenValue; // Right cell border
305             maDefaultList.push_back( std::move(pInsDefault) );
306             // New free-flying pInsDefault
307             pInsDefault.reset( new ScRTFCellDefault( pPool ) );
308             if ( ++nColCnt > nColMax )
309                 nColMax = nColCnt;
310             nRtfLastToken = pInfo->nToken;
311         }
312         break;
313         case RTF_INTBL:         // before the first RTF_CELL
314         {
315             // Once over NextToken and once over UnknownAttrToken
316             // or e.g. \intbl ... \cell \pard \intbl ... \cell
317             if ( nRtfLastToken != RTF_INTBL && nRtfLastToken != RTF_CELL && nRtfLastToken != RTF_PAR )
318             {
319                 NewCellRow();
320                 nRtfLastToken = pInfo->nToken;
321             }
322         }
323         break;
324         case RTF_CELL:          // denotes the end of a cell.
325         {
326             OSL_ENSURE( pActDefault, "RTF_CELL: pActDefault==0" );
327             if ( bNewDef || !pActDefault )
328                 NewCellRow();    // before was no \intbl, bad behavior
329             // Broken RTF? Let's save what we can
330             if ( !pActDefault )
331                 pActDefault = pInsDefault.get();
332             if ( pActDefault->nColOverlap > 0 )
333             {   // Not merged with preceding
334                 mxActEntry->nCol = pActDefault->nCol;
335                 mxActEntry->nColOverlap = pActDefault->nColOverlap;
336                 mxActEntry->nTwips = pActDefault->nTwips;
337                 mxActEntry->nRow = nRowCnt;
338                 mxActEntry->aItemSet.Set(pActDefault->aItemSet);
339                 EntryEnd(mxActEntry.get(), pInfo->aSelection);
340 
341                 if ( nStartAdjust == sal_uLong(~0) )
342                     nStartAdjust = maList.size();
343                 maList.push_back(mxActEntry);
344                 NewActEntry(mxActEntry.get()); // New free-flying mxActEntry
345             }
346             else
347             {   // Assign current Twips to MergeCell
348                 if ( !maList.empty() )
349                 {
350                     auto& pE = maList.back();
351                     pE->nTwips = pActDefault->nTwips;
352                 }
353                 // Adjust selection of free-flying mxActEntry
354                 // Paragraph -1 due to separated text in EditEngine during parsing
355                 mxActEntry->aSel.nStartPara = pInfo->aSelection.nEndPara - 1;
356             }
357 
358             pActDefault = nullptr;
359             if (!maDefaultList.empty() && (mnCurPos+1) < maDefaultList.size())
360                 pActDefault = maDefaultList[++mnCurPos].get();
361 
362             nRtfLastToken = pInfo->nToken;
363         }
364         break;
365         case RTF_ROW:           // denotes the end of a row
366         {
367             NextRow();
368             nRtfLastToken = pInfo->nToken;
369         }
370         break;
371         case RTF_PAR:           // Paragraph
372         {
373             if ( !pActDefault )
374             {   // text not in table
375                 ColAdjust();    // close the processing table
376                 mxActEntry->nCol = 0;
377                 mxActEntry->nRow = nRowCnt;
378                 EntryEnd(mxActEntry.get(), pInfo->aSelection);
379                 maList.push_back(mxActEntry);
380                 NewActEntry(mxActEntry.get());   // new mxActEntry
381                 NextRow();
382             }
383             nRtfLastToken = pInfo->nToken;
384         }
385         break;
386         default:
387         {   // do not set nRtfLastToken
388             switch ( pInfo->nToken & ~(0xff | RTF_TABLEDEF) )
389             {
390                 case RTF_SHADINGDEF:
391                     static_cast<SvxRTFParser*>(pInfo->pParser)->ReadBackgroundAttr(
392                         pInfo->nToken, pInsDefault->aItemSet, true );
393                 break;
394                 case RTF_BRDRDEF:
395                     static_cast<SvxRTFParser*>(pInfo->pParser)->ReadBorderAttr(
396                         pInfo->nToken, pInsDefault->aItemSet, true );
397                 break;
398             }
399         }
400     }
401 }
402 
403 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
404