xref: /core/starmath/source/parse.cxx (revision 8b550895)
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <memory>
21 #include <com/sun/star/i18n/UnicodeType.hpp>
22 #include <com/sun/star/i18n/KParseTokens.hpp>
23 #include <com/sun/star/i18n/KParseType.hpp>
24 #include <i18nlangtag/lang.h>
25 #include <tools/lineend.hxx>
26 #include <unotools/configmgr.hxx>
27 #include <unotools/syslocale.hxx>
28 #include <sal/log.hxx>
29 #include <osl/diagnose.h>
30 #include <rtl/character.hxx>
31 #include <node.hxx>
32 #include <parse.hxx>
33 #include <strings.hrc>
34 #include <smmod.hxx>
35 #include "cfgitem.hxx"
36 #include <cassert>
37 #include <stack>
38 
39 using namespace ::com::sun::star::i18n;
40 
41 
42 SmToken::SmToken()
43     : eType(TUNKNOWN)
44     , cMathChar('\0')
45     , nGroup(TG::NONE)
46     , nLevel(0)
47     , nRow(0)
48     , nCol(0)
49 {
50 }
51 
52 SmToken::SmToken(SmTokenType eTokenType,
53                  sal_Unicode cMath,
54                  const char* pText,
55                  TG nTokenGroup,
56                  sal_uInt16 nTokenLevel)
57     : aText(OUString::createFromAscii(pText))
58     , eType(eTokenType)
59     , cMathChar(cMath)
60     , nGroup(nTokenGroup)
61     , nLevel(nTokenLevel)
62     , nRow(0)
63     , nCol(0)
64 {
65 }
66 
67 
68 static const SmTokenTableEntry aTokenTable[] =
69 {
70     { "abs", TABS, '\0', TG::UnOper, 13 },
71     { "acute", TACUTE, MS_ACUTE, TG::Attribute, 5 },
72     { "aleph" , TALEPH, MS_ALEPH, TG::Standalone, 5 },
73     { "alignb", TALIGNC, '\0', TG::Align, 0},
74     { "alignc", TALIGNC, '\0', TG::Align, 0},
75     { "alignl", TALIGNL, '\0', TG::Align, 0},
76     { "alignm", TALIGNC, '\0', TG::Align, 0},
77     { "alignr", TALIGNR, '\0', TG::Align, 0},
78     { "alignt", TALIGNC, '\0', TG::Align, 0},
79     { "and", TAND, MS_AND, TG::Product, 0},
80     { "approx", TAPPROX, MS_APPROX, TG::Relation, 0},
81     { "aqua", TAQUA, '\0', TG::Color, 0},
82     { "arccos", TACOS, '\0', TG::Function, 5},
83     { "arccot", TACOT, '\0', TG::Function, 5},
84     { "arcosh", TACOSH, '\0', TG::Function, 5 },
85     { "arcoth", TACOTH, '\0', TG::Function, 5 },
86     { "arcsin", TASIN, '\0', TG::Function, 5},
87     { "arctan", TATAN, '\0', TG::Function, 5},
88     { "arsinh", TASINH, '\0', TG::Function, 5},
89     { "artanh", TATANH, '\0', TG::Function, 5},
90     { "backepsilon" , TBACKEPSILON, MS_BACKEPSILON, TG::Standalone, 5},
91     { "bar", TBAR, MS_BAR, TG::Attribute, 5},
92     { "binom", TBINOM, '\0', TG::NONE, 5 },
93     { "black", TBLACK, '\0', TG::Color, 0},
94     { "blue", TBLUE, '\0', TG::Color, 0},
95     { "bold", TBOLD, '\0', TG::FontAttr, 5},
96     { "boper", TBOPER, '\0', TG::Product, 0},
97     { "breve", TBREVE, MS_BREVE, TG::Attribute, 5},
98     { "bslash", TBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
99     { "cdot", TCDOT, MS_CDOT, TG::Product, 0},
100     { "check", TCHECK, MS_CHECK, TG::Attribute, 5},
101     { "circ" , TCIRC, MS_CIRC, TG::Standalone, 5},
102     { "circle", TCIRCLE, MS_CIRCLE, TG::Attribute, 5},
103     { "color", TCOLOR, '\0', TG::FontAttr, 5},
104     { "coprod", TCOPROD, MS_COPROD, TG::Oper, 5},
105     { "cos", TCOS, '\0', TG::Function, 5},
106     { "cosh", TCOSH, '\0', TG::Function, 5},
107     { "cot", TCOT, '\0', TG::Function, 5},
108     { "coth", TCOTH, '\0', TG::Function, 5},
109     { "csub", TCSUB, '\0', TG::Power, 0},
110     { "csup", TCSUP, '\0', TG::Power, 0},
111     { "cyan", TCYAN, '\0', TG::Color, 0},
112     { "dddot", TDDDOT, MS_DDDOT, TG::Attribute, 5},
113     { "ddot", TDDOT, MS_DDOT, TG::Attribute, 5},
114     { "def", TDEF, MS_DEF, TG::Relation, 0},
115     { "div", TDIV, MS_DIV, TG::Product, 0},
116     { "divides", TDIVIDES, MS_LINE, TG::Relation, 0},
117     { "dlarrow" , TDLARROW, MS_DLARROW, TG::Standalone, 5},
118     { "dlrarrow" , TDLRARROW, MS_DLRARROW, TG::Standalone, 5},
119     { "dot", TDOT, MS_DOT, TG::Attribute, 5},
120     { "dotsaxis", TDOTSAXIS, MS_DOTSAXIS, TG::Standalone, 5}, // 5 to continue expression
121     { "dotsdiag", TDOTSDIAG, MS_DOTSUP, TG::Standalone, 5},
122     { "dotsdown", TDOTSDOWN, MS_DOTSDOWN, TG::Standalone, 5},
123     { "dotslow", TDOTSLOW, MS_DOTSLOW, TG::Standalone, 5},
124     { "dotsup", TDOTSUP, MS_DOTSUP, TG::Standalone, 5},
125     { "dotsvert", TDOTSVERT, MS_DOTSVERT, TG::Standalone, 5},
126     { "downarrow" , TDOWNARROW, MS_DOWNARROW, TG::Standalone, 5},
127     { "drarrow" , TDRARROW, MS_DRARROW, TG::Standalone, 5},
128     { "emptyset" , TEMPTYSET, MS_EMPTYSET, TG::Standalone, 5},
129     { "equiv", TEQUIV, MS_EQUIV, TG::Relation, 0},
130     { "exists", TEXISTS, MS_EXISTS, TG::Standalone, 5},
131     { "exp", TEXP, '\0', TG::Function, 5},
132     { "fact", TFACT, MS_FACT, TG::UnOper, 5},
133     { "fixed", TFIXED, '\0', TG::Font, 0},
134     { "font", TFONT, '\0', TG::FontAttr, 5},
135     { "forall", TFORALL, MS_FORALL, TG::Standalone, 5},
136     { "from", TFROM, '\0', TG::Limit, 0},
137     { "fuchsia", TFUCHSIA, '\0', TG::Color, 0},
138     { "func", TFUNC, '\0', TG::Function, 5},
139     { "ge", TGE, MS_GE, TG::Relation, 0},
140     { "geslant", TGESLANT, MS_GESLANT, TG::Relation, 0 },
141     { "gg", TGG, MS_GG, TG::Relation, 0},
142     { "grave", TGRAVE, MS_GRAVE, TG::Attribute, 5},
143     { "gray", TGRAY, '\0', TG::Color, 0},
144     { "green", TGREEN, '\0', TG::Color, 0},
145     { "gt", TGT, MS_GT, TG::Relation, 0},
146     { "harpoon", THARPOON, MS_HARPOON, TG::Attribute, 5},
147     { "hat", THAT, MS_HAT, TG::Attribute, 5},
148     { "hbar" , THBAR, MS_HBAR, TG::Standalone, 5},
149     { "iiint", TIIINT, MS_IIINT, TG::Oper, 5},
150     { "iint", TIINT, MS_IINT, TG::Oper, 5},
151     { "im" , TIM, MS_IM, TG::Standalone, 5 },
152     { "in", TIN, MS_IN, TG::Relation, 0},
153     { "infinity" , TINFINITY, MS_INFINITY, TG::Standalone, 5},
154     { "infty" , TINFINITY, MS_INFINITY, TG::Standalone, 5},
155     { "int", TINT, MS_INT, TG::Oper, 5},
156     { "intd", TINTD, MS_INT, TG::Oper, 5},
157     { "intersection", TINTERSECT, MS_INTERSECT, TG::Product, 0},
158     { "ital", TITALIC, '\0', TG::FontAttr, 5},
159     { "italic", TITALIC, '\0', TG::FontAttr, 5},
160     { "lambdabar" , TLAMBDABAR, MS_LAMBDABAR, TG::Standalone, 5},
161     { "langle", TLANGLE, MS_LMATHANGLE, TG::LBrace, 5},
162     { "lbrace", TLBRACE, MS_LBRACE, TG::LBrace, 5},
163     { "lceil", TLCEIL, MS_LCEIL, TG::LBrace, 5},
164     { "ldbracket", TLDBRACKET, MS_LDBRACKET, TG::LBrace, 5},
165     { "ldline", TLDLINE, MS_DVERTLINE, TG::LBrace, 5},
166     { "le", TLE, MS_LE, TG::Relation, 0},
167     { "left", TLEFT, '\0', TG::NONE, 5},
168     { "leftarrow" , TLEFTARROW, MS_LEFTARROW, TG::Standalone, 5},
169     { "leslant", TLESLANT, MS_LESLANT, TG::Relation, 0 },
170     { "lfloor", TLFLOOR, MS_LFLOOR, TG::LBrace, 5},
171     { "lim", TLIM, '\0', TG::Oper, 5},
172     { "lime", TLIME, '\0', TG::Color, 0},
173     { "liminf", TLIMINF, '\0', TG::Oper, 5},
174     { "limsup", TLIMSUP, '\0', TG::Oper, 5},
175     { "lint", TLINT, MS_LINT, TG::Oper, 5},
176     { "ll", TLL, MS_LL, TG::Relation, 0},
177     { "lline", TLLINE, MS_VERTLINE, TG::LBrace, 5},
178     { "llint", TLLINT, MS_LLINT, TG::Oper, 5},
179     { "lllint", TLLLINT, MS_LLLINT, TG::Oper, 5},
180     { "ln", TLN, '\0', TG::Function, 5},
181     { "log", TLOG, '\0', TG::Function, 5},
182     { "lsub", TLSUB, '\0', TG::Power, 0},
183     { "lsup", TLSUP, '\0', TG::Power, 0},
184     { "lt", TLT, MS_LT, TG::Relation, 0},
185     { "magenta", TMAGENTA, '\0', TG::Color, 0},
186     { "maroon", TMAROON, '\0', TG::Color, 0},
187     { "matrix", TMATRIX, '\0', TG::NONE, 5},
188     { "minusplus", TMINUSPLUS, MS_MINUSPLUS, TG::UnOper | TG::Sum, 5},
189     { "mline", TMLINE, MS_VERTLINE, TG::NONE, 0},      //! not in TG::RBrace, Level 0
190     { "nabla", TNABLA, MS_NABLA, TG::Standalone, 5},
191     { "navy", TNAVY, '\0', TG::Color, 0},
192     { "nbold", TNBOLD, '\0', TG::FontAttr, 5},
193     { "ndivides", TNDIVIDES, MS_NDIVIDES, TG::Relation, 0},
194     { "neg", TNEG, MS_NEG, TG::UnOper, 5 },
195     { "neq", TNEQ, MS_NEQ, TG::Relation, 0},
196     { "newline", TNEWLINE, '\0', TG::NONE, 0},
197     { "ni", TNI, MS_NI, TG::Relation, 0},
198     { "nitalic", TNITALIC, '\0', TG::FontAttr, 5},
199     { "none", TNONE, '\0', TG::LBrace | TG::RBrace, 0},
200     { "nospace", TNOSPACE, '\0', TG::Standalone, 5},
201     { "notexists", TNOTEXISTS, MS_NOTEXISTS, TG::Standalone, 5},
202     { "notin", TNOTIN, MS_NOTIN, TG::Relation, 0},
203     { "nprec", TNOTPRECEDES, MS_NOTPRECEDES, TG::Relation, 0 },
204     { "nroot", TNROOT, MS_SQRT, TG::UnOper, 5},
205     { "nsubset", TNSUBSET, MS_NSUBSET, TG::Relation, 0 },
206     { "nsubseteq", TNSUBSETEQ, MS_NSUBSETEQ, TG::Relation, 0 },
207     { "nsucc", TNOTSUCCEEDS, MS_NOTSUCCEEDS, TG::Relation, 0 },
208     { "nsupset", TNSUPSET, MS_NSUPSET, TG::Relation, 0 },
209     { "nsupseteq", TNSUPSETEQ, MS_NSUPSETEQ, TG::Relation, 0 },
210     { "odivide", TODIVIDE, MS_ODIVIDE, TG::Product, 0},
211     { "odot", TODOT, MS_ODOT, TG::Product, 0},
212     { "olive", TOLIVE, '\0', TG::Color, 0},
213     { "ominus", TOMINUS, MS_OMINUS, TG::Sum, 0},
214     { "oper", TOPER, '\0', TG::Oper, 5},
215     { "oplus", TOPLUS, MS_OPLUS, TG::Sum, 0},
216     { "or", TOR, MS_OR, TG::Sum, 0},
217     { "ortho", TORTHO, MS_ORTHO, TG::Relation, 0},
218     { "otimes", TOTIMES, MS_OTIMES, TG::Product, 0},
219     { "over", TOVER, '\0', TG::Product, 0},
220     { "overbrace", TOVERBRACE, MS_OVERBRACE, TG::Product, 5},
221     { "overline", TOVERLINE, '\0', TG::Attribute, 5},
222     { "overstrike", TOVERSTRIKE, '\0', TG::Attribute, 5},
223     { "owns", TNI, MS_NI, TG::Relation, 0},
224     { "parallel", TPARALLEL, MS_DLINE, TG::Relation, 0},
225     { "partial", TPARTIAL, MS_PARTIAL, TG::Standalone, 5 },
226     { "phantom", TPHANTOM, '\0', TG::FontAttr, 5},
227     { "plusminus", TPLUSMINUS, MS_PLUSMINUS, TG::UnOper | TG::Sum, 5},
228     { "prec", TPRECEDES, MS_PRECEDES, TG::Relation, 0 },
229     { "preccurlyeq", TPRECEDESEQUAL, MS_PRECEDESEQUAL, TG::Relation, 0 },
230     { "precsim", TPRECEDESEQUIV, MS_PRECEDESEQUIV, TG::Relation, 0 },
231     { "prod", TPROD, MS_PROD, TG::Oper, 5},
232     { "prop", TPROP, MS_PROP, TG::Relation, 0},
233     { "purple", TPURPLE, '\0', TG::Color, 0},
234     { "rangle", TRANGLE, MS_RMATHANGLE, TG::RBrace, 0},  //! 0 to terminate expression
235     { "rbrace", TRBRACE, MS_RBRACE, TG::RBrace, 0},
236     { "rceil", TRCEIL, MS_RCEIL, TG::RBrace, 0},
237     { "rdbracket", TRDBRACKET, MS_RDBRACKET, TG::RBrace, 0},
238     { "rdline", TRDLINE, MS_DVERTLINE, TG::RBrace, 0},
239     { "re" , TRE, MS_RE, TG::Standalone, 5 },
240     { "red", TRED, '\0', TG::Color, 0},
241     { "rfloor", TRFLOOR, MS_RFLOOR, TG::RBrace, 0},  //! 0 to terminate expression
242     { "right", TRIGHT, '\0', TG::NONE, 0},
243     { "rightarrow" , TRIGHTARROW, MS_RIGHTARROW, TG::Standalone, 5},
244     { "rline", TRLINE, MS_VERTLINE, TG::RBrace, 0},  //! 0 to terminate expression
245     { "rsub", TRSUB, '\0', TG::Power, 0},
246     { "rsup", TRSUP, '\0', TG::Power, 0},
247     { "sans", TSANS, '\0', TG::Font, 0},
248     { "serif", TSERIF, '\0', TG::Font, 0},
249     { "setC" , TSETC, MS_SETC, TG::Standalone, 5},
250     { "setminus", TBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
251     { "setN" , TSETN, MS_SETN, TG::Standalone, 5},
252     { "setQ" , TSETQ, MS_SETQ, TG::Standalone, 5},
253     { "setR" , TSETR, MS_SETR, TG::Standalone, 5},
254     { "setZ" , TSETZ, MS_SETZ, TG::Standalone, 5},
255     { "silver", TSILVER, '\0', TG::Color, 0},
256     { "sim", TSIM, MS_SIM, TG::Relation, 0},
257     { "simeq", TSIMEQ, MS_SIMEQ, TG::Relation, 0},
258     { "sin", TSIN, '\0', TG::Function, 5},
259     { "sinh", TSINH, '\0', TG::Function, 5},
260     { "size", TSIZE, '\0', TG::FontAttr, 5},
261     { "slash", TSLASH, MS_SLASH, TG::Product, 0 },
262     { "sqrt", TSQRT, MS_SQRT, TG::UnOper, 5},
263     { "stack", TSTACK, '\0', TG::NONE, 5},
264     { "sub", TRSUB, '\0', TG::Power, 0},
265     { "subset", TSUBSET, MS_SUBSET, TG::Relation, 0},
266     { "subseteq", TSUBSETEQ, MS_SUBSETEQ, TG::Relation, 0},
267     { "succ", TSUCCEEDS, MS_SUCCEEDS, TG::Relation, 0 },
268     { "succcurlyeq", TSUCCEEDSEQUAL, MS_SUCCEEDSEQUAL, TG::Relation, 0 },
269     { "succsim", TSUCCEEDSEQUIV, MS_SUCCEEDSEQUIV, TG::Relation, 0 },
270     { "sum", TSUM, MS_SUM, TG::Oper, 5},
271     { "sup", TRSUP, '\0', TG::Power, 0},
272     { "supset", TSUPSET, MS_SUPSET, TG::Relation, 0},
273     { "supseteq", TSUPSETEQ, MS_SUPSETEQ, TG::Relation, 0},
274     { "tan", TTAN, '\0', TG::Function, 5},
275     { "tanh", TTANH, '\0', TG::Function, 5},
276     { "teal", TTEAL, '\0', TG::Color, 0},
277     { "tilde", TTILDE, MS_TILDE, TG::Attribute, 5},
278     { "times", TTIMES, MS_TIMES, TG::Product, 0},
279     { "to", TTO, '\0', TG::Limit, 0},
280     { "toward", TTOWARD, MS_RIGHTARROW, TG::Relation, 0},
281     { "transl", TTRANSL, MS_TRANSL, TG::Relation, 0},
282     { "transr", TTRANSR, MS_TRANSR, TG::Relation, 0},
283     { "underbrace", TUNDERBRACE, MS_UNDERBRACE, TG::Product, 5},
284     { "underline", TUNDERLINE, '\0', TG::Attribute, 5},
285     { "union", TUNION, MS_UNION, TG::Sum, 0},
286     { "uoper", TUOPER, '\0', TG::UnOper, 5},
287     { "uparrow" , TUPARROW, MS_UPARROW, TG::Standalone, 5},
288     { "vec", TVEC, MS_VEC, TG::Attribute, 5},
289     { "white", TWHITE, '\0', TG::Color, 0},
290     { "widebslash", TWIDEBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
291     { "wideharpoon", TWIDEHARPOON, MS_HARPOON, TG::Attribute, 5},
292     { "widehat", TWIDEHAT, MS_HAT, TG::Attribute, 5},
293     { "wideslash", TWIDESLASH, MS_SLASH, TG::Product, 0 },
294     { "widetilde", TWIDETILDE, MS_TILDE, TG::Attribute, 5},
295     { "widevec", TWIDEVEC, MS_VEC, TG::Attribute, 5},
296     { "wp" , TWP, MS_WP, TG::Standalone, 5},
297     { "yellow", TYELLOW, '\0', TG::Color, 0}
298 };
299 
300 #if !defined NDEBUG
301 static bool sortCompare(const SmTokenTableEntry & lhs, const SmTokenTableEntry & rhs)
302 {
303     return OUString::createFromAscii(lhs.pIdent).compareToIgnoreAsciiCase(OUString::createFromAscii(rhs.pIdent)) < 0;
304 }
305 #endif
306 static bool findCompare(const SmTokenTableEntry & lhs, const OUString & s)
307 {
308     return s.compareToIgnoreAsciiCaseAscii(lhs.pIdent) > 0;
309 }
310 const SmTokenTableEntry * SmParser::GetTokenTableEntry( const OUString &rName )
311 {
312     static bool bSortKeyWords = false;
313     if( !bSortKeyWords )
314     {
315         assert( std::is_sorted( std::begin(aTokenTable), std::end(aTokenTable), sortCompare ) );
316         bSortKeyWords = true;
317     }
318 
319     if (rName.isEmpty())
320         return nullptr;
321 
322     auto findIter = std::lower_bound( std::begin(aTokenTable), std::end(aTokenTable), rName, findCompare );
323     if ( findIter != std::end(aTokenTable) && rName.equalsIgnoreAsciiCaseAscii( findIter->pIdent ))
324         return &*findIter;
325 
326     return nullptr;
327 }
328 
329 namespace {
330 
331 bool IsDelimiter( const OUString &rTxt, sal_Int32 nPos )
332     // returns 'true' iff cChar is '\0' or a delimiter
333 {
334     assert(nPos <= rTxt.getLength()); //index out of range
335 
336     if (nPos == rTxt.getLength())
337         return true;
338 
339     sal_Unicode cChar = rTxt[nPos];
340 
341     // check if 'cChar' is in the delimiter table
342     static const sal_Unicode aDelimiterTable[] =
343     {
344         ' ',  '\t', '\n', '\r', '+',  '-',  '*',  '/',  '=',  '#',
345         '%',  '\\', '"',  '~',  '`',  '>',  '<',  '&',  '|',  '(',
346         ')',  '{',  '}',  '[',  ']',  '^',  '_'
347     };
348     for (auto const &cDelimiter : aDelimiterTable)
349     {
350         if (cDelimiter == cChar)
351             return true;
352     }
353 
354     sal_Int16 nTypJp = SM_MOD()->GetSysLocale().GetCharClass().getType( rTxt, nPos );
355     return ( nTypJp == css::i18n::UnicodeType::SPACE_SEPARATOR ||
356              nTypJp == css::i18n::UnicodeType::CONTROL);
357 }
358 
359 }
360 
361 void SmParser::Replace( sal_Int32 nPos, sal_Int32 nLen, const OUString &rText )
362 {
363     assert( nPos + nLen <= m_aBufferString.getLength() );
364 
365     m_aBufferString = m_aBufferString.replaceAt( nPos, nLen, rText );
366     sal_Int32 nChg = rText.getLength() - nLen;
367     m_nBufferIndex = m_nBufferIndex + nChg;
368     m_nTokenIndex = m_nTokenIndex + nChg;
369 }
370 
371 void SmParser::NextToken()
372 {
373     // First character may be any alphabetic
374     static const sal_Int32 coStartFlags =
375         KParseTokens::ANY_LETTER |
376         KParseTokens::IGNORE_LEADING_WS;
377 
378     // Continuing characters may be any alphabetic
379     static const sal_Int32 coContFlags =
380         (coStartFlags & ~KParseTokens::IGNORE_LEADING_WS)
381         | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
382 
383     // user-defined char continuing characters may be any alphanumeric or dot.
384     static const sal_Int32 coUserDefinedCharContFlags =
385         KParseTokens::ANY_LETTER_OR_NUMBER |
386         KParseTokens::ASC_DOT |
387         KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
388 
389     // First character for numbers, may be any numeric or dot
390     static const sal_Int32 coNumStartFlags =
391         KParseTokens::ASC_DIGIT |
392         KParseTokens::ASC_DOT |
393         KParseTokens::IGNORE_LEADING_WS;
394 
395     // Continuing characters for numbers, may be any numeric or dot.
396     // tdf#127873: additionally accept ',' comma group separator as too many
397     // existing documents unwittingly may have used that as decimal separator
398     // in such locales (though it never was as this is always the en-US locale
399     // and the group separator is only parsed away).
400     static const sal_Int32 coNumContFlags =
401         (coNumStartFlags & ~KParseTokens::IGNORE_LEADING_WS) |
402         KParseTokens::GROUP_SEPARATOR_IN_NUMBER;
403 
404     sal_Int32   nBufLen = m_aBufferString.getLength();
405     ParseResult aRes;
406     sal_Int32   nRealStart;
407     bool        bCont;
408     do
409     {
410         // skip white spaces
411         while (UnicodeType::SPACE_SEPARATOR ==
412                         m_pSysCC->getType( m_aBufferString, m_nBufferIndex ))
413            ++m_nBufferIndex;
414 
415         // Try to parse a number in a locale-independent manner using
416         // '.' as decimal separator.
417         // See https://bz.apache.org/ooo/show_bug.cgi?id=45779
418         aRes = m_aNumCC.parsePredefinedToken(KParseType::ASC_NUMBER,
419                                         m_aBufferString, m_nBufferIndex,
420                                         coNumStartFlags, "",
421                                         coNumContFlags, "");
422 
423         if (aRes.TokenType == 0)
424         {
425             // Try again with the default token parsing.
426             aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex,
427                                      coStartFlags, "",
428                                      coContFlags, "");
429         }
430 
431         nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
432         m_nBufferIndex = nRealStart;
433 
434         bCont = false;
435         if ( aRes.TokenType == 0  &&
436                 nRealStart < nBufLen &&
437                 '\n' == m_aBufferString[ nRealStart ] )
438         {
439             // keep data needed for tokens row and col entry up to date
440             ++m_nRow;
441             m_nBufferIndex = m_nColOff = nRealStart + 1;
442             bCont = true;
443         }
444         else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
445         {
446             if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
447             {
448                 //SkipComment
449                 m_nBufferIndex = nRealStart + 2;
450                 while (m_nBufferIndex < nBufLen  &&
451                     '\n' != m_aBufferString[ m_nBufferIndex ])
452                     ++m_nBufferIndex;
453                 bCont = true;
454             }
455         }
456 
457     } while (bCont);
458 
459     // set index of current token
460     m_nTokenIndex = m_nBufferIndex;
461 
462     m_aCurToken.nRow   = m_nRow;
463     m_aCurToken.nCol   = nRealStart - m_nColOff + 1;
464 
465     bool bHandled = true;
466     if (nRealStart >= nBufLen)
467     {
468         m_aCurToken.eType    = TEND;
469         m_aCurToken.cMathChar = '\0';
470         m_aCurToken.nGroup       = TG::NONE;
471         m_aCurToken.nLevel       = 0;
472         m_aCurToken.aText.clear();
473     }
474     else if (aRes.TokenType & KParseType::ANY_NUMBER)
475     {
476         assert(aRes.EndPos > 0);
477         if ( m_aBufferString[aRes.EndPos-1] == ',' &&
478              aRes.EndPos < nBufLen &&
479              m_pSysCC->getType( m_aBufferString, aRes.EndPos ) != UnicodeType::SPACE_SEPARATOR )
480         {
481             // Comma followed by a non-space char is unlikely for decimal/thousands separator.
482             --aRes.EndPos;
483         }
484         sal_Int32 n = aRes.EndPos - nRealStart;
485         assert(n >= 0);
486         m_aCurToken.eType      = TNUMBER;
487         m_aCurToken.cMathChar  = '\0';
488         m_aCurToken.nGroup     = TG::NONE;
489         m_aCurToken.nLevel     = 5;
490         m_aCurToken.aText      = m_aBufferString.copy( nRealStart, n );
491 
492         SAL_WARN_IF( !IsDelimiter( m_aBufferString, aRes.EndPos ), "starmath", "identifier really finished? (compatibility!)" );
493     }
494     else if (aRes.TokenType & KParseType::DOUBLE_QUOTE_STRING)
495     {
496         m_aCurToken.eType      = TTEXT;
497         m_aCurToken.cMathChar  = '\0';
498         m_aCurToken.nGroup     = TG::NONE;
499         m_aCurToken.nLevel     = 5;
500         m_aCurToken.aText     = aRes.DequotedNameOrString;
501         m_aCurToken.nRow       = m_nRow;
502         m_aCurToken.nCol       = nRealStart - m_nColOff + 2;
503     }
504     else if (aRes.TokenType & KParseType::IDENTNAME)
505     {
506         sal_Int32 n = aRes.EndPos - nRealStart;
507         assert(n >= 0);
508         OUString aName( m_aBufferString.copy( nRealStart, n ) );
509         const SmTokenTableEntry *pEntry = GetTokenTableEntry( aName );
510 
511         if (pEntry)
512         {
513             m_aCurToken.eType      = pEntry->eType;
514             m_aCurToken.cMathChar  = pEntry->cMathChar;
515             m_aCurToken.nGroup     = pEntry->nGroup;
516             m_aCurToken.nLevel     = pEntry->nLevel;
517             m_aCurToken.aText      = OUString::createFromAscii( pEntry->pIdent );
518         }
519         else
520         {
521             m_aCurToken.eType      = TIDENT;
522             m_aCurToken.cMathChar  = '\0';
523             m_aCurToken.nGroup     = TG::NONE;
524             m_aCurToken.nLevel     = 5;
525             m_aCurToken.aText      = aName;
526 
527             SAL_WARN_IF(!IsDelimiter(m_aBufferString, aRes.EndPos),"starmath", "identifier really finished? (compatibility!)");
528         }
529     }
530     else if (aRes.TokenType == 0  &&  '_' == m_aBufferString[ nRealStart ])
531     {
532         m_aCurToken.eType    = TRSUB;
533         m_aCurToken.cMathChar = '\0';
534         m_aCurToken.nGroup       = TG::Power;
535         m_aCurToken.nLevel       = 0;
536         m_aCurToken.aText = "_";
537 
538         aRes.EndPos = nRealStart + 1;
539     }
540     else if (aRes.TokenType & KParseType::BOOLEAN)
541     {
542         sal_Int32   &rnEndPos = aRes.EndPos;
543         if (rnEndPos - nRealStart <= 2)
544         {
545             sal_Unicode ch = m_aBufferString[ nRealStart ];
546             switch (ch)
547             {
548                 case '<':
549                     {
550                         if (m_aBufferString.match("<<", nRealStart))
551                         {
552                             m_aCurToken.eType    = TLL;
553                             m_aCurToken.cMathChar = MS_LL;
554                             m_aCurToken.nGroup       = TG::Relation;
555                             m_aCurToken.nLevel       = 0;
556                             m_aCurToken.aText = "<<";
557 
558                             rnEndPos = nRealStart + 2;
559                         }
560                         else if (m_aBufferString.match("<=", nRealStart))
561                         {
562                             m_aCurToken.eType    = TLE;
563                             m_aCurToken.cMathChar = MS_LE;
564                             m_aCurToken.nGroup       = TG::Relation;
565                             m_aCurToken.nLevel       = 0;
566                             m_aCurToken.aText = "<=";
567 
568                             rnEndPos = nRealStart + 2;
569                         }
570                         else if (m_aBufferString.match("<-", nRealStart))
571                         {
572                             m_aCurToken.eType    = TLEFTARROW;
573                             m_aCurToken.cMathChar = MS_LEFTARROW;
574                             m_aCurToken.nGroup       = TG::Standalone;
575                             m_aCurToken.nLevel       = 5;
576                             m_aCurToken.aText = "<-";
577 
578                             rnEndPos = nRealStart + 2;
579                         }
580                         else if (m_aBufferString.match("<>", nRealStart))
581                         {
582                             m_aCurToken.eType    = TNEQ;
583                             m_aCurToken.cMathChar = MS_NEQ;
584                             m_aCurToken.nGroup       = TG::Relation;
585                             m_aCurToken.nLevel       = 0;
586                             m_aCurToken.aText = "<>";
587 
588                             rnEndPos = nRealStart + 2;
589                         }
590                         else if (m_aBufferString.match("<?>", nRealStart))
591                         {
592                             m_aCurToken.eType    = TPLACE;
593                             m_aCurToken.cMathChar = MS_PLACE;
594                             m_aCurToken.nGroup       = TG::NONE;
595                             m_aCurToken.nLevel       = 5;
596                             m_aCurToken.aText = "<?>";
597 
598                             rnEndPos = nRealStart + 3;
599                         }
600                         else
601                         {
602                             m_aCurToken.eType    = TLT;
603                             m_aCurToken.cMathChar = MS_LT;
604                             m_aCurToken.nGroup       = TG::Relation;
605                             m_aCurToken.nLevel       = 0;
606                             m_aCurToken.aText = "<";
607                         }
608                     }
609                     break;
610                 case '>':
611                     {
612                         if (m_aBufferString.match(">=", nRealStart))
613                         {
614                             m_aCurToken.eType    = TGE;
615                             m_aCurToken.cMathChar = MS_GE;
616                             m_aCurToken.nGroup       = TG::Relation;
617                             m_aCurToken.nLevel       = 0;
618                             m_aCurToken.aText = ">=";
619 
620                             rnEndPos = nRealStart + 2;
621                         }
622                         else if (m_aBufferString.match(">>", nRealStart))
623                         {
624                             m_aCurToken.eType    = TGG;
625                             m_aCurToken.cMathChar = MS_GG;
626                             m_aCurToken.nGroup       = TG::Relation;
627                             m_aCurToken.nLevel       = 0;
628                             m_aCurToken.aText = ">>";
629 
630                             rnEndPos = nRealStart + 2;
631                         }
632                         else
633                         {
634                             m_aCurToken.eType    = TGT;
635                             m_aCurToken.cMathChar = MS_GT;
636                             m_aCurToken.nGroup       = TG::Relation;
637                             m_aCurToken.nLevel       = 0;
638                             m_aCurToken.aText = ">";
639                         }
640                     }
641                     break;
642                 default:
643                     bHandled = false;
644             }
645         }
646     }
647     else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
648     {
649         sal_Int32   &rnEndPos = aRes.EndPos;
650         if (rnEndPos - nRealStart == 1)
651         {
652             sal_Unicode ch = m_aBufferString[ nRealStart ];
653             switch (ch)
654             {
655                 case '%':
656                     {
657                         //! modifies aRes.EndPos
658 
659                         OSL_ENSURE( rnEndPos >= nBufLen  ||
660                                     '%' != m_aBufferString[ rnEndPos ],
661                                 "unexpected comment start" );
662 
663                         // get identifier of user-defined character
664                         ParseResult aTmpRes = m_pSysCC->parseAnyToken(
665                                 m_aBufferString, rnEndPos,
666                                 KParseTokens::ANY_LETTER,
667                                 "",
668                                 coUserDefinedCharContFlags,
669                                 "" );
670 
671                         sal_Int32 nTmpStart = rnEndPos + aTmpRes.LeadingWhiteSpace;
672 
673                         // default setting for the case that no identifier
674                         // i.e. a valid symbol-name is following the '%'
675                         // character
676                         m_aCurToken.eType      = TTEXT;
677                         m_aCurToken.cMathChar  = '\0';
678                         m_aCurToken.nGroup     = TG::NONE;
679                         m_aCurToken.nLevel     = 5;
680                         m_aCurToken.aText      ="%";
681                         m_aCurToken.nRow       = m_nRow;
682                         m_aCurToken.nCol       = nTmpStart - m_nColOff;
683 
684                         if (aTmpRes.TokenType & KParseType::IDENTNAME)
685                         {
686 
687                             sal_Int32 n = aTmpRes.EndPos - nTmpStart;
688                             m_aCurToken.eType      = TSPECIAL;
689                             m_aCurToken.aText      = m_aBufferString.copy( nTmpStart-1, n+1 );
690 
691                             OSL_ENSURE( aTmpRes.EndPos > rnEndPos,
692                                     "empty identifier" );
693                             if (aTmpRes.EndPos > rnEndPos)
694                                 rnEndPos = aTmpRes.EndPos;
695                             else
696                                 ++rnEndPos;
697                         }
698 
699                         // if no symbol-name was found we start-over with
700                         // finding the next token right after the '%' sign.
701                         // I.e. we leave rnEndPos unmodified.
702                     }
703                     break;
704                 case '[':
705                     {
706                         m_aCurToken.eType    = TLBRACKET;
707                         m_aCurToken.cMathChar = MS_LBRACKET;
708                         m_aCurToken.nGroup       = TG::LBrace;
709                         m_aCurToken.nLevel       = 5;
710                         m_aCurToken.aText = "[";
711                     }
712                     break;
713                 case '\\':
714                     {
715                         m_aCurToken.eType    = TESCAPE;
716                         m_aCurToken.cMathChar = '\0';
717                         m_aCurToken.nGroup       = TG::NONE;
718                         m_aCurToken.nLevel       = 5;
719                         m_aCurToken.aText = "\\";
720                     }
721                     break;
722                 case ']':
723                     {
724                         m_aCurToken.eType    = TRBRACKET;
725                         m_aCurToken.cMathChar = MS_RBRACKET;
726                         m_aCurToken.nGroup       = TG::RBrace;
727                         m_aCurToken.nLevel       = 0;
728                         m_aCurToken.aText = "]";
729                     }
730                     break;
731                 case '^':
732                     {
733                         m_aCurToken.eType    = TRSUP;
734                         m_aCurToken.cMathChar = '\0';
735                         m_aCurToken.nGroup       = TG::Power;
736                         m_aCurToken.nLevel       = 0;
737                         m_aCurToken.aText = "^";
738                     }
739                     break;
740                 case '`':
741                     {
742                         m_aCurToken.eType    = TSBLANK;
743                         m_aCurToken.cMathChar = '\0';
744                         m_aCurToken.nGroup       = TG::Blank;
745                         m_aCurToken.nLevel       = 5;
746                         m_aCurToken.aText = "`";
747                     }
748                     break;
749                 case '{':
750                     {
751                         m_aCurToken.eType    = TLGROUP;
752                         m_aCurToken.cMathChar = MS_LBRACE;
753                         m_aCurToken.nGroup       = TG::NONE;
754                         m_aCurToken.nLevel       = 5;
755                         m_aCurToken.aText = "{";
756                     }
757                     break;
758                 case '|':
759                     {
760                         m_aCurToken.eType    = TOR;
761                         m_aCurToken.cMathChar = MS_OR;
762                         m_aCurToken.nGroup       = TG::Sum;
763                         m_aCurToken.nLevel       = 0;
764                         m_aCurToken.aText = "|";
765                     }
766                     break;
767                 case '}':
768                     {
769                         m_aCurToken.eType    = TRGROUP;
770                         m_aCurToken.cMathChar = MS_RBRACE;
771                         m_aCurToken.nGroup       = TG::NONE;
772                         m_aCurToken.nLevel       = 0;
773                         m_aCurToken.aText = "}";
774                     }
775                     break;
776                 case '~':
777                     {
778                         m_aCurToken.eType    = TBLANK;
779                         m_aCurToken.cMathChar = '\0';
780                         m_aCurToken.nGroup       = TG::Blank;
781                         m_aCurToken.nLevel       = 5;
782                         m_aCurToken.aText = "~";
783                     }
784                     break;
785                 case '#':
786                     {
787                         if (m_aBufferString.match("##", nRealStart))
788                         {
789                             m_aCurToken.eType    = TDPOUND;
790                             m_aCurToken.cMathChar = '\0';
791                             m_aCurToken.nGroup       = TG::NONE;
792                             m_aCurToken.nLevel       = 0;
793                             m_aCurToken.aText = "##";
794 
795                             rnEndPos = nRealStart + 2;
796                         }
797                         else
798                         {
799                             m_aCurToken.eType    = TPOUND;
800                             m_aCurToken.cMathChar = '\0';
801                             m_aCurToken.nGroup       = TG::NONE;
802                             m_aCurToken.nLevel       = 0;
803                             m_aCurToken.aText = "#";
804                         }
805                     }
806                     break;
807                 case '&':
808                     {
809                         m_aCurToken.eType    = TAND;
810                         m_aCurToken.cMathChar = MS_AND;
811                         m_aCurToken.nGroup       = TG::Product;
812                         m_aCurToken.nLevel       = 0;
813                         m_aCurToken.aText = "&";
814                     }
815                     break;
816                 case '(':
817                     {
818                         m_aCurToken.eType    = TLPARENT;
819                         m_aCurToken.cMathChar = MS_LPARENT;
820                         m_aCurToken.nGroup       = TG::LBrace;
821                         m_aCurToken.nLevel       = 5;     //! 0 to continue expression
822                         m_aCurToken.aText = "(";
823                     }
824                     break;
825                 case ')':
826                     {
827                         m_aCurToken.eType    = TRPARENT;
828                         m_aCurToken.cMathChar = MS_RPARENT;
829                         m_aCurToken.nGroup       = TG::RBrace;
830                         m_aCurToken.nLevel       = 0;     //! 0 to terminate expression
831                         m_aCurToken.aText = ")";
832                     }
833                     break;
834                 case '*':
835                     {
836                         m_aCurToken.eType    = TMULTIPLY;
837                         m_aCurToken.cMathChar = MS_MULTIPLY;
838                         m_aCurToken.nGroup       = TG::Product;
839                         m_aCurToken.nLevel       = 0;
840                         m_aCurToken.aText = "*";
841                     }
842                     break;
843                 case '+':
844                     {
845                         if (m_aBufferString.match("+-", nRealStart))
846                         {
847                             m_aCurToken.eType    = TPLUSMINUS;
848                             m_aCurToken.cMathChar = MS_PLUSMINUS;
849                             m_aCurToken.nGroup       = TG::UnOper | TG::Sum;
850                             m_aCurToken.nLevel       = 5;
851                             m_aCurToken.aText = "+-";
852 
853                             rnEndPos = nRealStart + 2;
854                         }
855                         else
856                         {
857                             m_aCurToken.eType    = TPLUS;
858                             m_aCurToken.cMathChar = MS_PLUS;
859                             m_aCurToken.nGroup       = TG::UnOper | TG::Sum;
860                             m_aCurToken.nLevel       = 5;
861                             m_aCurToken.aText = "+";
862                         }
863                     }
864                     break;
865                 case '-':
866                     {
867                         if (m_aBufferString.match("-+", nRealStart))
868                         {
869                             m_aCurToken.eType    = TMINUSPLUS;
870                             m_aCurToken.cMathChar = MS_MINUSPLUS;
871                             m_aCurToken.nGroup       = TG::UnOper | TG::Sum;
872                             m_aCurToken.nLevel       = 5;
873                             m_aCurToken.aText = "-+";
874 
875                             rnEndPos = nRealStart + 2;
876                         }
877                         else if (m_aBufferString.match("->", nRealStart))
878                         {
879                             m_aCurToken.eType    = TRIGHTARROW;
880                             m_aCurToken.cMathChar = MS_RIGHTARROW;
881                             m_aCurToken.nGroup       = TG::Standalone;
882                             m_aCurToken.nLevel       = 5;
883                             m_aCurToken.aText = "->";
884 
885                             rnEndPos = nRealStart + 2;
886                         }
887                         else
888                         {
889                             m_aCurToken.eType    = TMINUS;
890                             m_aCurToken.cMathChar = MS_MINUS;
891                             m_aCurToken.nGroup       = TG::UnOper | TG::Sum;
892                             m_aCurToken.nLevel       = 5;
893                             m_aCurToken.aText = "-";
894                         }
895                     }
896                     break;
897                 case '.':
898                     {
899                         // Only one character? Then it can't be a number.
900                         if (m_nBufferIndex < m_aBufferString.getLength() - 1)
901                         {
902                             // for compatibility with SO5.2
903                             // texts like .34 ...56 ... h ...78..90
904                             // will be treated as numbers
905                             m_aCurToken.eType     = TNUMBER;
906                             m_aCurToken.cMathChar = '\0';
907                             m_aCurToken.nGroup    = TG::NONE;
908                             m_aCurToken.nLevel    = 5;
909 
910                             sal_Int32 nTxtStart = m_nBufferIndex;
911                             sal_Unicode cChar;
912                             // if the equation ends with dot(.) then increment m_nBufferIndex till end of string only
913                             do
914                             {
915                                 cChar = m_aBufferString[ ++m_nBufferIndex ];
916                             }
917                             while ( (cChar == '.' || rtl::isAsciiDigit( cChar )) &&
918                                      ( m_nBufferIndex < m_aBufferString.getLength() - 1 ) );
919 
920                             m_aCurToken.aText = m_aBufferString.copy( nTxtStart, m_nBufferIndex - nTxtStart );
921                             aRes.EndPos = m_nBufferIndex;
922                         }
923                         else
924                             bHandled = false;
925                     }
926                     break;
927                 case '/':
928                     {
929                         m_aCurToken.eType    = TDIVIDEBY;
930                         m_aCurToken.cMathChar = MS_SLASH;
931                         m_aCurToken.nGroup       = TG::Product;
932                         m_aCurToken.nLevel       = 0;
933                         m_aCurToken.aText = "/";
934                     }
935                     break;
936                 case '=':
937                     {
938                         m_aCurToken.eType    = TASSIGN;
939                         m_aCurToken.cMathChar = MS_ASSIGN;
940                         m_aCurToken.nGroup       = TG::Relation;
941                         m_aCurToken.nLevel       = 0;
942                         m_aCurToken.aText = "=";
943                     }
944                     break;
945                 default:
946                     bHandled = false;
947             }
948         }
949     }
950     else
951         bHandled = false;
952 
953     if (!bHandled)
954     {
955         m_aCurToken.eType      = TCHARACTER;
956         m_aCurToken.cMathChar  = '\0';
957         m_aCurToken.nGroup     = TG::NONE;
958         m_aCurToken.nLevel     = 5;
959         m_aCurToken.aText      = m_aBufferString.copy( nRealStart, 1 );
960 
961         aRes.EndPos = nRealStart + 1;
962     }
963 
964     if (TEND != m_aCurToken.eType)
965         m_nBufferIndex = aRes.EndPos;
966 }
967 
968 namespace
969 {
970     SmNodeArray buildNodeArray(std::vector<std::unique_ptr<SmNode>>& rSubNodes)
971     {
972         SmNodeArray aSubArray(rSubNodes.size());
973         for (size_t i = 0; i < rSubNodes.size(); ++i)
974             aSubArray[i] = rSubNodes[i].release();
975         return aSubArray;
976     }
977 }
978 
979 // grammar
980 
981 std::unique_ptr<SmTableNode> SmParser::DoTable()
982 {
983     DepthProtect aDepthGuard(m_nParseDepth);
984     if (aDepthGuard.TooDeep())
985         throw std::range_error("parser depth limit");
986 
987     std::vector<std::unique_ptr<SmNode>> aLineArray;
988     aLineArray.push_back(DoLine());
989     while (m_aCurToken.eType == TNEWLINE)
990     {
991         NextToken();
992         aLineArray.push_back(DoLine());
993     }
994     assert(m_aCurToken.eType == TEND);
995     std::unique_ptr<SmTableNode> xSNode(new SmTableNode(m_aCurToken));
996     xSNode->SetSubNodes(buildNodeArray(aLineArray));
997     return xSNode;
998 }
999 
1000 std::unique_ptr<SmNode> SmParser::DoAlign(bool bUseExtraSpaces)
1001     // parse alignment info (if any), then go on with rest of expression
1002 {
1003     DepthProtect aDepthGuard(m_nParseDepth);
1004     if (aDepthGuard.TooDeep())
1005         throw std::range_error("parser depth limit");
1006 
1007     std::unique_ptr<SmStructureNode> xSNode;
1008 
1009     if (TokenInGroup(TG::Align))
1010     {
1011         xSNode.reset(new SmAlignNode(m_aCurToken));
1012 
1013         NextToken();
1014 
1015         // allow for just one align statement in 5.0
1016         if (TokenInGroup(TG::Align))
1017             return DoError(SmParseError::DoubleAlign);
1018     }
1019 
1020     auto pNode = DoExpression(bUseExtraSpaces);
1021 
1022     if (xSNode)
1023     {
1024         xSNode->SetSubNode(0, pNode.release());
1025         return xSNode;
1026     }
1027     return pNode;
1028 }
1029 
1030 // Postcondition: m_aCurToken.eType == TEND || m_aCurToken.eType == TNEWLINE
1031 std::unique_ptr<SmNode> SmParser::DoLine()
1032 {
1033     DepthProtect aDepthGuard(m_nParseDepth);
1034     if (aDepthGuard.TooDeep())
1035         throw std::range_error("parser depth limit");
1036 
1037     std::vector<std::unique_ptr<SmNode>> ExpressionArray;
1038 
1039     // start with single expression that may have an alignment statement
1040     // (and go on with expressions that must not have alignment
1041     // statements in 'while' loop below. See also 'Expression()'.)
1042     if (m_aCurToken.eType != TEND  &&  m_aCurToken.eType != TNEWLINE)
1043         ExpressionArray.push_back(DoAlign());
1044 
1045     while (m_aCurToken.eType != TEND  &&  m_aCurToken.eType != TNEWLINE)
1046         ExpressionArray.push_back(DoExpression());
1047 
1048     //If there's no expression, add an empty one.
1049     //this is to avoid a formula tree without any caret
1050     //positions, in visual formula editor.
1051     if(ExpressionArray.empty())
1052     {
1053         SmToken aTok;
1054         aTok.eType = TNEWLINE;
1055         ExpressionArray.emplace_back(std::unique_ptr<SmNode>(new SmExpressionNode(aTok)));
1056     }
1057 
1058     auto xSNode = std::make_unique<SmLineNode>(m_aCurToken);
1059     xSNode->SetSubNodes(buildNodeArray(ExpressionArray));
1060     return xSNode;
1061 }
1062 
1063 std::unique_ptr<SmNode> SmParser::DoExpression(bool bUseExtraSpaces)
1064 {
1065     DepthProtect aDepthGuard(m_nParseDepth);
1066     if (aDepthGuard.TooDeep())
1067         throw std::range_error("parser depth limit");
1068 
1069     std::vector<std::unique_ptr<SmNode>> RelationArray;
1070     RelationArray.push_back(DoRelation());
1071     while (m_aCurToken.nLevel >= 4)
1072         RelationArray.push_back(DoRelation());
1073 
1074     if (RelationArray.size() > 1)
1075     {
1076         std::unique_ptr<SmExpressionNode> xSNode(new SmExpressionNode(m_aCurToken));
1077         xSNode->SetSubNodes(buildNodeArray(RelationArray));
1078         xSNode->SetUseExtraSpaces(bUseExtraSpaces);
1079         return xSNode;
1080     }
1081     else
1082     {
1083         // This expression has only one node so just push this node.
1084         return std::move(RelationArray[0]);
1085     }
1086 }
1087 
1088 std::unique_ptr<SmNode> SmParser::DoRelation()
1089 {
1090     DepthProtect aDepthGuard(m_nParseDepth);
1091     if (aDepthGuard.TooDeep())
1092         throw std::range_error("parser depth limit");
1093 
1094     auto xFirst = DoSum();
1095     while (TokenInGroup(TG::Relation))
1096     {
1097         std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken));
1098         auto xSecond = DoOpSubSup();
1099         auto xThird = DoSum();
1100         xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond), std::move(xThird));
1101         xFirst = std::move(xSNode);
1102     }
1103     return xFirst;
1104 }
1105 
1106 std::unique_ptr<SmNode> SmParser::DoSum()
1107 {
1108     DepthProtect aDepthGuard(m_nParseDepth);
1109     if (aDepthGuard.TooDeep())
1110         throw std::range_error("parser depth limit");
1111 
1112     auto xFirst = DoProduct();
1113     while (TokenInGroup(TG::Sum))
1114     {
1115         std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken));
1116         auto xSecond = DoOpSubSup();
1117         auto xThird = DoProduct();
1118         xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond), std::move(xThird));
1119         xFirst = std::move(xSNode);
1120     }
1121     return xFirst;
1122 }
1123 
1124 std::unique_ptr<SmNode> SmParser::DoProduct()
1125 {
1126     DepthProtect aDepthGuard(m_nParseDepth);
1127     if (aDepthGuard.TooDeep())
1128         throw std::range_error("parser depth limit");
1129 
1130     auto xFirst = DoPower();
1131 
1132     int nDepthLimit = 0;
1133 
1134     while (TokenInGroup(TG::Product))
1135     {
1136         //this linear loop builds a recursive structure, if it gets
1137         //too deep then later processing, e.g. releasing the tree,
1138         //can exhaust stack
1139         if (nDepthLimit > DEPTH_LIMIT)
1140             throw std::range_error("parser depth limit");
1141 
1142         std::unique_ptr<SmStructureNode> xSNode;
1143         std::unique_ptr<SmNode> xOper;
1144         bool bSwitchArgs = false;
1145 
1146         SmTokenType eType = m_aCurToken.eType;
1147         switch (eType)
1148         {
1149             case TOVER:
1150                 xSNode.reset(new SmBinVerNode(m_aCurToken));
1151                 xOper.reset(new SmRectangleNode(m_aCurToken));
1152                 NextToken();
1153                 break;
1154 
1155             case TBOPER:
1156                 xSNode.reset(new SmBinHorNode(m_aCurToken));
1157 
1158                 NextToken();
1159 
1160                 //Let the glyph node know it's a binary operation
1161                 m_aCurToken.eType = TBOPER;
1162                 m_aCurToken.nGroup = TG::Product;
1163                 xOper = DoGlyphSpecial();
1164                 break;
1165 
1166             case TOVERBRACE :
1167             case TUNDERBRACE :
1168                 xSNode.reset(new SmVerticalBraceNode(m_aCurToken));
1169                 xOper.reset(new SmMathSymbolNode(m_aCurToken));
1170 
1171                 NextToken();
1172                 break;
1173 
1174             case TWIDEBACKSLASH:
1175             case TWIDESLASH:
1176             {
1177                 SmBinDiagonalNode *pSTmp = new SmBinDiagonalNode(m_aCurToken);
1178                 pSTmp->SetAscending(eType == TWIDESLASH);
1179                 xSNode.reset(pSTmp);
1180 
1181                 xOper.reset(new SmPolyLineNode(m_aCurToken));
1182                 NextToken();
1183 
1184                 bSwitchArgs = true;
1185                 break;
1186             }
1187 
1188             default:
1189                 xSNode.reset(new SmBinHorNode(m_aCurToken));
1190 
1191                 xOper = DoOpSubSup();
1192         }
1193 
1194         auto xArg = DoPower();
1195 
1196         if (bSwitchArgs)
1197         {
1198             //! vgl siehe SmBinDiagonalNode::Arrange
1199             xSNode->SetSubNodes(std::move(xFirst), std::move(xArg), std::move(xOper));
1200         }
1201         else
1202         {
1203             xSNode->SetSubNodes(std::move(xFirst), std::move(xOper), std::move(xArg));
1204         }
1205         xFirst = std::move(xSNode);
1206         ++nDepthLimit;
1207     }
1208     return xFirst;
1209 }
1210 
1211 std::unique_ptr<SmNode> SmParser::DoSubSup(TG nActiveGroup, SmNode *pGivenNode)
1212 {
1213     std::unique_ptr<SmNode> xGivenNode(pGivenNode);
1214     DepthProtect aDepthGuard(m_nParseDepth);
1215     if (aDepthGuard.TooDeep())
1216         throw std::range_error("parser depth limit");
1217 
1218     assert(nActiveGroup == TG::Power || nActiveGroup == TG::Limit);
1219     assert(m_aCurToken.nGroup == nActiveGroup);
1220 
1221     std::unique_ptr<SmSubSupNode> pNode(new SmSubSupNode(m_aCurToken));
1222     //! Of course 'm_aCurToken' is just the first sub-/supscript token.
1223     //! It should be of no further interest. The positions of the
1224     //! sub-/supscripts will be identified by the corresponding subnodes
1225     //! index in the 'aSubNodes' array (enum value from 'SmSubSup').
1226 
1227     pNode->SetUseLimits(nActiveGroup == TG::Limit);
1228 
1229     // initialize subnodes array
1230     std::vector<std::unique_ptr<SmNode>> aSubNodes(1 + SUBSUP_NUM_ENTRIES);
1231     aSubNodes[0] = std::move(xGivenNode);
1232 
1233     // process all sub-/supscripts
1234     int  nIndex = 0;
1235     while (TokenInGroup(nActiveGroup))
1236     {
1237         SmTokenType  eType (m_aCurToken.eType);
1238 
1239         switch (eType)
1240         {
1241             case TRSUB :    nIndex = static_cast<int>(RSUB);    break;
1242             case TRSUP :    nIndex = static_cast<int>(RSUP);    break;
1243             case TFROM :
1244             case TCSUB :    nIndex = static_cast<int>(CSUB);    break;
1245             case TTO :
1246             case TCSUP :    nIndex = static_cast<int>(CSUP);    break;
1247             case TLSUB :    nIndex = static_cast<int>(LSUB);    break;
1248             case TLSUP :    nIndex = static_cast<int>(LSUP);    break;
1249             default :
1250                 SAL_WARN( "starmath", "unknown case");
1251         }
1252         nIndex++;
1253         assert(1 <= nIndex  &&  nIndex <= SUBSUP_NUM_ENTRIES);
1254 
1255         std::unique_ptr<SmNode> xENode;
1256         if (aSubNodes[nIndex]) // if already occupied at earlier iteration
1257         {
1258             // forget the earlier one, remember an error instead
1259             aSubNodes[nIndex].reset();
1260             xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token.
1261         }
1262         else
1263         {
1264             // skip sub-/supscript token
1265             NextToken();
1266         }
1267 
1268         // get sub-/supscript node
1269         // (even when we saw a double-sub/supscript error in the above
1270         // in order to minimize mess and continue parsing.)
1271         std::unique_ptr<SmNode> xSNode;
1272         if (eType == TFROM  ||  eType == TTO)
1273         {
1274             // parse limits in old 4.0 and 5.0 style
1275             xSNode = DoRelation();
1276         }
1277         else
1278             xSNode = DoTerm(true);
1279 
1280         aSubNodes[nIndex] = std::move(xENode ? xENode : xSNode);
1281     }
1282 
1283     pNode->SetSubNodes(buildNodeArray(aSubNodes));
1284     return pNode;
1285 }
1286 
1287 std::unique_ptr<SmNode> SmParser::DoOpSubSup()
1288 {
1289     DepthProtect aDepthGuard(m_nParseDepth);
1290     if (aDepthGuard.TooDeep())
1291         throw std::range_error("parser depth limit");
1292 
1293     // get operator symbol
1294     auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
1295     // skip operator token
1296     NextToken();
1297     // get sub- supscripts if any
1298     if (m_aCurToken.nGroup == TG::Power)
1299         return DoSubSup(TG::Power, pNode.release());
1300     return pNode;
1301 }
1302 
1303 std::unique_ptr<SmNode> SmParser::DoPower()
1304 {
1305     DepthProtect aDepthGuard(m_nParseDepth);
1306     if (aDepthGuard.TooDeep())
1307         throw std::range_error("parser depth limit");
1308 
1309     // get body for sub- supscripts on top of stack
1310     std::unique_ptr<SmNode> xNode(DoTerm(false));
1311 
1312     if (m_aCurToken.nGroup == TG::Power)
1313         return DoSubSup(TG::Power, xNode.release());
1314     return xNode;
1315 }
1316 
1317 std::unique_ptr<SmBlankNode> SmParser::DoBlank()
1318 {
1319     DepthProtect aDepthGuard(m_nParseDepth);
1320     if (aDepthGuard.TooDeep())
1321         throw std::range_error("parser depth limit");
1322 
1323     assert(TokenInGroup(TG::Blank));
1324     std::unique_ptr<SmBlankNode> pBlankNode(new SmBlankNode(m_aCurToken));
1325 
1326     do
1327     {
1328         pBlankNode->IncreaseBy(m_aCurToken);
1329         NextToken();
1330     }
1331     while (TokenInGroup(TG::Blank));
1332 
1333     // Ignore trailing spaces, if corresponding option is set
1334     if ( m_aCurToken.eType == TNEWLINE ||
1335              (m_aCurToken.eType == TEND && !utl::ConfigManager::IsFuzzing() && SM_MOD()->GetConfig()->IsIgnoreSpacesRight()) )
1336     {
1337         pBlankNode->Clear();
1338     }
1339     return pBlankNode;
1340 }
1341 
1342 std::unique_ptr<SmNode> SmParser::DoTerm(bool bGroupNumberIdent)
1343 {
1344     DepthProtect aDepthGuard(m_nParseDepth);
1345     if (aDepthGuard.TooDeep())
1346         throw std::range_error("parser depth limit");
1347 
1348     switch (m_aCurToken.eType)
1349     {
1350         case TESCAPE :
1351             return DoEscape();
1352 
1353         case TNOSPACE :
1354         case TLGROUP :
1355         {
1356             bool bNoSpace = m_aCurToken.eType == TNOSPACE;
1357             if (bNoSpace)
1358                 NextToken();
1359             if (m_aCurToken.eType != TLGROUP)
1360                 return DoTerm(false); // nospace is no longer concerned
1361 
1362             NextToken();
1363 
1364             // allow for empty group
1365             if (m_aCurToken.eType == TRGROUP)
1366             {
1367                 std::unique_ptr<SmStructureNode> xSNode(new SmExpressionNode(m_aCurToken));
1368                 xSNode->SetSubNodes(nullptr, nullptr);
1369 
1370                 NextToken();
1371                 return std::unique_ptr<SmNode>(xSNode.release());
1372             }
1373 
1374             auto pNode = DoAlign(!bNoSpace);
1375             if (m_aCurToken.eType == TRGROUP) {
1376                 NextToken();
1377                 return pNode;
1378             }
1379             auto xSNode = std::make_unique<SmExpressionNode>(m_aCurToken);
1380             std::unique_ptr<SmNode> xError(DoError(SmParseError::RgroupExpected));
1381             xSNode->SetSubNodes(std::move(pNode), std::move(xError));
1382             return std::unique_ptr<SmNode>(xSNode.release());
1383         }
1384 
1385         case TLEFT :
1386             return DoBrace();
1387 
1388         case TBLANK :
1389         case TSBLANK :
1390             return DoBlank();
1391 
1392         case TTEXT :
1393             {
1394                 auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_TEXT);
1395                 NextToken();
1396                 return std::unique_ptr<SmNode>(pNode.release());
1397             }
1398         case TCHARACTER :
1399             {
1400                 auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_VARIABLE);
1401                 NextToken();
1402                 return std::unique_ptr<SmNode>(pNode.release());
1403             }
1404         case TIDENT :
1405         case TNUMBER :
1406         {
1407             auto pTextNode = std::make_unique<SmTextNode>(m_aCurToken,
1408                                              m_aCurToken.eType == TNUMBER ?
1409                                              FNT_NUMBER :
1410                                              FNT_VARIABLE);
1411             if (!bGroupNumberIdent)
1412             {
1413                 NextToken();
1414                 return std::unique_ptr<SmNode>(pTextNode.release());
1415             }
1416             std::vector<std::unique_ptr<SmNode>> aNodes;
1417             // Some people want to be able to write "x_2n" for "x_{2n}"
1418             // although e.g. LaTeX or AsciiMath interpret that as "x_2 n".
1419             // The tokenizer skips whitespaces so we need some additional
1420             // work to distinguish from "x_2 n".
1421             // See https://bz.apache.org/ooo/show_bug.cgi?id=11752 and
1422             // https://bugs.libreoffice.org/show_bug.cgi?id=55853
1423             sal_Int32 nBufLen = m_aBufferString.getLength();
1424 
1425             // We need to be careful to call NextToken() only after having
1426             // tested for a whitespace separator (otherwise it will be
1427             // skipped!)
1428             bool moveToNextToken = true;
1429             while (m_nBufferIndex < nBufLen &&
1430                    m_pSysCC->getType(m_aBufferString, m_nBufferIndex) !=
1431                    UnicodeType::SPACE_SEPARATOR)
1432             {
1433                 NextToken();
1434                 if (m_aCurToken.eType != TNUMBER &&
1435                     m_aCurToken.eType != TIDENT)
1436                 {
1437                     // Neither a number nor an identifier. We just moved to
1438                     // the next token, so no need to do that again.
1439                     moveToNextToken = false;
1440                     break;
1441                 }
1442                 aNodes.emplace_back(std::unique_ptr<SmNode>(new SmTextNode(m_aCurToken,
1443                                                 m_aCurToken.eType ==
1444                                                 TNUMBER ?
1445                                                 FNT_NUMBER :
1446                                                 FNT_VARIABLE)));
1447             }
1448             if (moveToNextToken)
1449                 NextToken();
1450             if (aNodes.empty())
1451                 return std::unique_ptr<SmNode>(pTextNode.release());
1452             // We have several concatenated identifiers and numbers.
1453             // Let's group them into one SmExpressionNode.
1454             aNodes.insert(aNodes.begin(), std::move(pTextNode));
1455             std::unique_ptr<SmExpressionNode> xNode(new SmExpressionNode(SmToken()));
1456             xNode->SetSubNodes(buildNodeArray(aNodes));
1457             return std::unique_ptr<SmNode>(xNode.release());
1458         }
1459         case TLEFTARROW :
1460         case TRIGHTARROW :
1461         case TUPARROW :
1462         case TDOWNARROW :
1463         case TCIRC :
1464         case TDRARROW :
1465         case TDLARROW :
1466         case TDLRARROW :
1467         case TEXISTS :
1468         case TNOTEXISTS :
1469         case TFORALL :
1470         case TPARTIAL :
1471         case TNABLA :
1472         case TTOWARD :
1473         case TDOTSAXIS :
1474         case TDOTSDIAG :
1475         case TDOTSDOWN :
1476         case TDOTSLOW :
1477         case TDOTSUP :
1478         case TDOTSVERT :
1479             {
1480                 auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
1481                 NextToken();
1482                 return std::unique_ptr<SmNode>(pNode.release());
1483             }
1484 
1485         case TSETN :
1486         case TSETZ :
1487         case TSETQ :
1488         case TSETR :
1489         case TSETC :
1490         case THBAR :
1491         case TLAMBDABAR :
1492         case TBACKEPSILON :
1493         case TALEPH :
1494         case TIM :
1495         case TRE :
1496         case TWP :
1497         case TEMPTYSET :
1498         case TINFINITY :
1499             {
1500                 auto pNode = std::make_unique<SmMathIdentifierNode>(m_aCurToken);
1501                 NextToken();
1502                 return std::unique_ptr<SmNode>(pNode.release());
1503             }
1504 
1505         case TPLACE:
1506             {
1507                 auto pNode = std::make_unique<SmPlaceNode>(m_aCurToken);
1508                 NextToken();
1509                 return std::unique_ptr<SmNode>(pNode.release());
1510             }
1511 
1512         case TSPECIAL:
1513             return DoSpecial();
1514 
1515         case TBINOM:
1516             return DoBinom();
1517 
1518         case TSTACK:
1519             return DoStack();
1520 
1521         case TMATRIX:
1522             return DoMatrix();
1523 
1524         default:
1525             if (TokenInGroup(TG::LBrace))
1526                 return DoBrace();
1527             if (TokenInGroup(TG::Oper))
1528                 return DoOperator();
1529             if (TokenInGroup(TG::UnOper))
1530                 return DoUnOper();
1531             if ( TokenInGroup(TG::Attribute) ||
1532                  TokenInGroup(TG::FontAttr) )
1533             {
1534                 std::stack<std::unique_ptr<SmStructureNode>> aStack;
1535                 bool    bIsAttr;
1536                 while ( (bIsAttr = TokenInGroup(TG::Attribute))
1537                        ||  TokenInGroup(TG::FontAttr))
1538                     aStack.push(bIsAttr ? DoAttribut() : DoFontAttribut());
1539 
1540                 auto xFirstNode = DoPower();
1541                 while (!aStack.empty())
1542                 {
1543                     std::unique_ptr<SmStructureNode> xNode = std::move(aStack.top());
1544                     aStack.pop();
1545                     xNode->SetSubNodes(nullptr, std::move(xFirstNode));
1546                     xFirstNode = std::move(xNode);
1547                 }
1548                 return xFirstNode;
1549             }
1550             if (TokenInGroup(TG::Function))
1551                 return DoFunction();
1552             return DoError(SmParseError::UnexpectedChar);
1553     }
1554 }
1555 
1556 std::unique_ptr<SmNode> SmParser::DoEscape()
1557 {
1558     DepthProtect aDepthGuard(m_nParseDepth);
1559     if (aDepthGuard.TooDeep())
1560         throw std::range_error("parser depth limit");
1561 
1562     NextToken();
1563 
1564     switch (m_aCurToken.eType)
1565     {
1566         case TLPARENT :
1567         case TRPARENT :
1568         case TLBRACKET :
1569         case TRBRACKET :
1570         case TLDBRACKET :
1571         case TRDBRACKET :
1572         case TLBRACE :
1573         case TLGROUP :
1574         case TRBRACE :
1575         case TRGROUP :
1576         case TLANGLE :
1577         case TRANGLE :
1578         case TLCEIL :
1579         case TRCEIL :
1580         case TLFLOOR :
1581         case TRFLOOR :
1582         case TLLINE :
1583         case TRLINE :
1584         case TLDLINE :
1585         case TRDLINE :
1586             {
1587                 auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
1588                 NextToken();
1589                 return std::unique_ptr<SmNode>(pNode.release());
1590             }
1591         default:
1592             return DoError(SmParseError::UnexpectedToken);
1593     }
1594 }
1595 
1596 std::unique_ptr<SmOperNode> SmParser::DoOperator()
1597 {
1598     DepthProtect aDepthGuard(m_nParseDepth);
1599     if (aDepthGuard.TooDeep())
1600         throw std::range_error("parser depth limit");
1601 
1602     assert(TokenInGroup(TG::Oper));
1603 
1604     auto xSNode = std::make_unique<SmOperNode>(m_aCurToken);
1605 
1606     // get operator
1607     auto xOperator = DoOper();
1608 
1609     if (m_aCurToken.nGroup == TG::Limit || m_aCurToken.nGroup == TG::Power)
1610         xOperator = DoSubSup(m_aCurToken.nGroup, xOperator.release());
1611 
1612     // get argument
1613     auto xArg = DoPower();
1614 
1615     xSNode->SetSubNodes(std::move(xOperator), std::move(xArg));
1616     return xSNode;
1617 }
1618 
1619 std::unique_ptr<SmNode> SmParser::DoOper()
1620 {
1621     DepthProtect aDepthGuard(m_nParseDepth);
1622     if (aDepthGuard.TooDeep())
1623         throw std::range_error("parser depth limit");
1624 
1625     SmTokenType  eType (m_aCurToken.eType);
1626     std::unique_ptr<SmNode> pNode;
1627 
1628     switch (eType)
1629     {
1630         case TSUM :
1631         case TPROD :
1632         case TCOPROD :
1633         case TINT :
1634         case TINTD :
1635         case TIINT :
1636         case TIIINT :
1637         case TLINT :
1638         case TLLINT :
1639         case TLLLINT :
1640             pNode.reset(new SmMathSymbolNode(m_aCurToken));
1641             break;
1642 
1643         case TLIM :
1644         case TLIMSUP :
1645         case TLIMINF :
1646             {
1647                 const char* pLim = nullptr;
1648                 switch (eType)
1649                 {
1650                     case TLIM :     pLim = "lim";       break;
1651                     case TLIMSUP :  pLim = "lim sup";   break;
1652                     case TLIMINF :  pLim = "lim inf";   break;
1653                     default:
1654                         break;
1655                 }
1656                 if( pLim )
1657                     m_aCurToken.aText = OUString::createFromAscii(pLim);
1658                 pNode.reset(new SmTextNode(m_aCurToken, FNT_TEXT));
1659             }
1660             break;
1661 
1662         case TOPER :
1663             NextToken();
1664 
1665             OSL_ENSURE(m_aCurToken.eType == TSPECIAL, "Sm: wrong token");
1666             pNode.reset(new SmGlyphSpecialNode(m_aCurToken));
1667             break;
1668 
1669         default :
1670             assert(false && "unknown case");
1671     }
1672 
1673     NextToken();
1674     return pNode;
1675 }
1676 
1677 std::unique_ptr<SmStructureNode> SmParser::DoUnOper()
1678 {
1679     DepthProtect aDepthGuard(m_nParseDepth);
1680     if (aDepthGuard.TooDeep())
1681         throw std::range_error("parser depth limit");
1682 
1683     assert(TokenInGroup(TG::UnOper));
1684 
1685     SmToken      aNodeToken = m_aCurToken;
1686     SmTokenType  eType      = m_aCurToken.eType;
1687     bool         bIsPostfix = eType == TFACT;
1688 
1689     std::unique_ptr<SmStructureNode> xSNode;
1690     std::unique_ptr<SmNode> xOper;
1691     std::unique_ptr<SmNode> xExtra;
1692     std::unique_ptr<SmNode> xArg;
1693 
1694     switch (eType)
1695     {
1696         case TABS :
1697         case TSQRT :
1698             NextToken();
1699             break;
1700 
1701         case TNROOT :
1702             NextToken();
1703             xExtra = DoPower();
1704             break;
1705 
1706         case TUOPER :
1707             NextToken();
1708             //Let the glyph know what it is...
1709             m_aCurToken.eType = TUOPER;
1710             m_aCurToken.nGroup = TG::UnOper;
1711             xOper = DoGlyphSpecial();
1712             break;
1713 
1714         case TPLUS :
1715         case TMINUS :
1716         case TPLUSMINUS :
1717         case TMINUSPLUS :
1718         case TNEG :
1719         case TFACT :
1720             xOper = DoOpSubSup();
1721             break;
1722 
1723         default :
1724             assert(false);
1725     }
1726 
1727     // get argument
1728     xArg = DoPower();
1729 
1730     if (eType == TABS)
1731     {
1732         xSNode.reset(new SmBraceNode(aNodeToken));
1733         xSNode->SetScaleMode(SmScaleMode::Height);
1734 
1735         // build nodes for left & right lines
1736         // (text, group, level of the used token are of no interest here)
1737         // we'll use row & column of the keyword for abs
1738         aNodeToken.eType = TABS;
1739 
1740         aNodeToken.cMathChar = MS_VERTLINE;
1741         std::unique_ptr<SmNode> xLeft(new SmMathSymbolNode(aNodeToken));
1742         std::unique_ptr<SmNode> xRight(new SmMathSymbolNode(aNodeToken));
1743 
1744         xSNode->SetSubNodes(std::move(xLeft), std::move(xArg), std::move(xRight));
1745     }
1746     else if (eType == TSQRT  ||  eType == TNROOT)
1747     {
1748         xSNode.reset(new SmRootNode(aNodeToken));
1749         xOper.reset(new SmRootSymbolNode(aNodeToken));
1750         xSNode->SetSubNodes(std::move(xExtra), std::move(xOper), std::move(xArg));
1751     }
1752     else
1753     {
1754         xSNode.reset(new SmUnHorNode(aNodeToken));
1755         if (bIsPostfix)
1756             xSNode->SetSubNodes(std::move(xArg), std::move(xOper));
1757         else
1758         {
1759             // prefix operator
1760             xSNode->SetSubNodes(std::move(xOper), std::move(xArg));
1761         }
1762     }
1763     return xSNode;
1764 }
1765 
1766 std::unique_ptr<SmStructureNode> SmParser::DoAttribut()
1767 {
1768     DepthProtect aDepthGuard(m_nParseDepth);
1769     if (aDepthGuard.TooDeep())
1770         throw std::range_error("parser depth limit");
1771 
1772     assert(TokenInGroup(TG::Attribute));
1773 
1774     auto xSNode = std::make_unique<SmAttributNode>(m_aCurToken);
1775     std::unique_ptr<SmNode> xAttr;
1776     SmScaleMode  eScaleMode = SmScaleMode::None;
1777 
1778     // get appropriate node for the attribute itself
1779     switch (m_aCurToken.eType)
1780     {   case TUNDERLINE :
1781         case TOVERLINE :
1782         case TOVERSTRIKE :
1783             xAttr.reset(new SmRectangleNode(m_aCurToken));
1784             eScaleMode = SmScaleMode::Width;
1785             break;
1786 
1787         case TWIDEVEC :
1788         case TWIDEHARPOON :
1789         case TWIDEHAT :
1790         case TWIDETILDE :
1791             xAttr.reset(new SmMathSymbolNode(m_aCurToken));
1792             eScaleMode = SmScaleMode::Width;
1793             break;
1794 
1795         default :
1796             xAttr.reset(new SmMathSymbolNode(m_aCurToken));
1797     }
1798 
1799     NextToken();
1800 
1801     xSNode->SetSubNodes(std::move(xAttr), nullptr); // the body will be filled later
1802     xSNode->SetScaleMode(eScaleMode);
1803     return xSNode;
1804 }
1805 
1806 std::unique_ptr<SmStructureNode> SmParser::DoFontAttribut()
1807 {
1808     DepthProtect aDepthGuard(m_nParseDepth);
1809     if (aDepthGuard.TooDeep())
1810         throw std::range_error("parser depth limit");
1811 
1812     assert(TokenInGroup(TG::FontAttr));
1813 
1814     switch (m_aCurToken.eType)
1815     {
1816         case TITALIC :
1817         case TNITALIC :
1818         case TBOLD :
1819         case TNBOLD :
1820         case TPHANTOM :
1821             {
1822                 auto pNode = std::make_unique<SmFontNode>(m_aCurToken);
1823                 NextToken();
1824                 return pNode;
1825             }
1826 
1827         case TSIZE :
1828             return DoFontSize();
1829 
1830         case TFONT :
1831             return DoFont();
1832 
1833         case TCOLOR :
1834             return DoColor();
1835 
1836         default :
1837             assert(false);
1838             return {};
1839     }
1840 }
1841 
1842 std::unique_ptr<SmStructureNode> SmParser::DoColor()
1843 {
1844     DepthProtect aDepthGuard(m_nParseDepth);
1845     if (aDepthGuard.TooDeep())
1846         throw std::range_error("parser depth limit");
1847 
1848     assert(m_aCurToken.eType == TCOLOR);
1849 
1850     std::unique_ptr<SmStructureNode> xNode;
1851     // last color rules, get that one
1852     SmToken  aToken;
1853     do
1854     {   NextToken();
1855 
1856         if (TokenInGroup(TG::Color))
1857         {   aToken = m_aCurToken;
1858             NextToken();
1859         }
1860         else
1861         {
1862             return DoError(SmParseError::ColorExpected);
1863         }
1864     } while (m_aCurToken.eType == TCOLOR);
1865 
1866     xNode.reset(new SmFontNode(aToken));
1867     return xNode;
1868 }
1869 
1870 std::unique_ptr<SmStructureNode> SmParser::DoFont()
1871 {
1872     DepthProtect aDepthGuard(m_nParseDepth);
1873     if (aDepthGuard.TooDeep())
1874         throw std::range_error("parser depth limit");
1875 
1876     assert(m_aCurToken.eType == TFONT);
1877 
1878     std::unique_ptr<SmStructureNode> xNode;
1879     // last font rules, get that one
1880     SmToken  aToken;
1881     do
1882     {   NextToken();
1883 
1884         if (TokenInGroup(TG::Font))
1885         {   aToken = m_aCurToken;
1886             NextToken();
1887         }
1888         else
1889         {
1890             return DoError(SmParseError::FontExpected);
1891         }
1892     } while (m_aCurToken.eType == TFONT);
1893 
1894     xNode.reset(new SmFontNode(aToken));
1895     return xNode;
1896 }
1897 
1898 
1899 // gets number used as arguments in Math formulas (e.g. 'size' command)
1900 // Format: no negative numbers, must start with a digit, no exponent notation, ...
1901 static bool lcl_IsNumber(const OUString& rText)
1902 {
1903     bool bPoint = false;
1904     const sal_Unicode* pBuffer = rText.getStr();
1905     for(sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
1906     {
1907         const sal_Unicode cChar = *pBuffer;
1908         if(cChar == '.')
1909         {
1910             if(bPoint)
1911                 return false;
1912             else
1913                 bPoint = true;
1914         }
1915         else if ( !rtl::isAsciiDigit( cChar ) )
1916             return false;
1917     }
1918     return true;
1919 }
1920 
1921 std::unique_ptr<SmStructureNode> SmParser::DoFontSize()
1922 {
1923     DepthProtect aDepthGuard(m_nParseDepth);
1924     if (aDepthGuard.TooDeep())
1925         throw std::range_error("parser depth limit");
1926 
1927     assert(m_aCurToken.eType == TSIZE);
1928 
1929     FontSizeType   Type;
1930     std::unique_ptr<SmFontNode> pFontNode(new SmFontNode(m_aCurToken));
1931 
1932     NextToken();
1933 
1934     switch (m_aCurToken.eType)
1935     {
1936         case TNUMBER:   Type = FontSizeType::ABSOLUT;  break;
1937         case TPLUS:     Type = FontSizeType::PLUS;     break;
1938         case TMINUS:    Type = FontSizeType::MINUS;    break;
1939         case TMULTIPLY: Type = FontSizeType::MULTIPLY; break;
1940         case TDIVIDEBY: Type = FontSizeType::DIVIDE;   break;
1941 
1942         default:
1943             return DoError(SmParseError::SizeExpected);
1944     }
1945 
1946     if (Type != FontSizeType::ABSOLUT)
1947     {
1948         NextToken();
1949         if (m_aCurToken.eType != TNUMBER)
1950             return DoError(SmParseError::SizeExpected);
1951     }
1952 
1953     // get number argument
1954     Fraction  aValue( 1 );
1955     if (lcl_IsNumber( m_aCurToken.aText ))
1956     {
1957         double fTmp = m_aCurToken.aText.toDouble();
1958         if (fTmp != 0.0)
1959         {
1960             aValue = fTmp;
1961 
1962             //!! keep the numerator and denominator from being too large
1963             //!! otherwise ongoing multiplications may result in overflows
1964             //!! (for example in SmNode::SetFontSize the font size calculated
1965             //!! may become 0 because of this!!! Happens e.g. for ftmp = 2.9 with Linux
1966             //!! or ftmp = 1.11111111111111111... (11/9) on every platform.)
1967             if (aValue.GetDenominator() > 1000)
1968             {
1969                 long nNum   = aValue.GetNumerator();
1970                 long nDenom = aValue.GetDenominator();
1971                 while (nDenom > 1000)
1972                 {
1973                     nNum    /= 10;
1974                     nDenom  /= 10;
1975                 }
1976                 aValue = Fraction( nNum, nDenom );
1977             }
1978         }
1979     }
1980 
1981     NextToken();
1982 
1983     pFontNode->SetSizeParameter(aValue, Type);
1984     return pFontNode;
1985 }
1986 
1987 std::unique_ptr<SmStructureNode> SmParser::DoBrace()
1988 {
1989     DepthProtect aDepthGuard(m_nParseDepth);
1990     if (aDepthGuard.TooDeep())
1991         throw std::range_error("parser depth limit");
1992 
1993     assert(m_aCurToken.eType == TLEFT  ||  TokenInGroup(TG::LBrace));
1994 
1995     std::unique_ptr<SmStructureNode> xSNode(new SmBraceNode(m_aCurToken));
1996     std::unique_ptr<SmNode> pBody, pLeft, pRight;
1997     SmScaleMode   eScaleMode = SmScaleMode::None;
1998     SmParseError  eError     = SmParseError::None;
1999 
2000     if (m_aCurToken.eType == TLEFT)
2001     {   NextToken();
2002 
2003         eScaleMode = SmScaleMode::Height;
2004 
2005         // check for left bracket
2006         if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace))
2007         {
2008             pLeft.reset(new SmMathSymbolNode(m_aCurToken));
2009 
2010             NextToken();
2011             pBody = DoBracebody(true);
2012 
2013             if (m_aCurToken.eType == TRIGHT)
2014             {   NextToken();
2015 
2016                 // check for right bracket
2017                 if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace))
2018                 {
2019                     pRight.reset(new SmMathSymbolNode(m_aCurToken));
2020                     NextToken();
2021                 }
2022                 else
2023                     eError = SmParseError::RbraceExpected;
2024             }
2025             else
2026                 eError = SmParseError::RightExpected;
2027         }
2028         else
2029             eError = SmParseError::LbraceExpected;
2030     }
2031     else
2032     {
2033         assert(TokenInGroup(TG::LBrace));
2034 
2035         pLeft.reset(new SmMathSymbolNode(m_aCurToken));
2036 
2037         NextToken();
2038         pBody = DoBracebody(false);
2039 
2040         SmTokenType  eExpectedType = TUNKNOWN;
2041         switch (pLeft->GetToken().eType)
2042         {   case TLPARENT :     eExpectedType = TRPARENT;   break;
2043             case TLBRACKET :    eExpectedType = TRBRACKET;  break;
2044             case TLBRACE :      eExpectedType = TRBRACE;    break;
2045             case TLDBRACKET :   eExpectedType = TRDBRACKET; break;
2046             case TLLINE :       eExpectedType = TRLINE;     break;
2047             case TLDLINE :      eExpectedType = TRDLINE;    break;
2048             case TLANGLE :      eExpectedType = TRANGLE;    break;
2049             case TLFLOOR :      eExpectedType = TRFLOOR;    break;
2050             case TLCEIL :       eExpectedType = TRCEIL;     break;
2051             default :
2052                 SAL_WARN("starmath", "unknown case");
2053             }
2054 
2055         if (m_aCurToken.eType == eExpectedType)
2056         {
2057             pRight.reset(new SmMathSymbolNode(m_aCurToken));
2058             NextToken();
2059         }
2060         else
2061             eError = SmParseError::ParentMismatch;
2062     }
2063 
2064     if (eError == SmParseError::None)
2065     {
2066         assert(pLeft);
2067         assert(pRight);
2068         xSNode->SetSubNodes(std::move(pLeft), std::move(pBody), std::move(pRight));
2069         xSNode->SetScaleMode(eScaleMode);
2070         return xSNode;
2071     }
2072     return DoError(eError);
2073 }
2074 
2075 std::unique_ptr<SmBracebodyNode> SmParser::DoBracebody(bool bIsLeftRight)
2076 {
2077     DepthProtect aDepthGuard(m_nParseDepth);
2078     if (aDepthGuard.TooDeep())
2079         throw std::range_error("parser depth limit");
2080 
2081     auto pBody = std::make_unique<SmBracebodyNode>(m_aCurToken);
2082 
2083     std::vector<std::unique_ptr<SmNode>> aNodes;
2084     // get body if any
2085     if (bIsLeftRight)
2086     {
2087         do
2088         {
2089             if (m_aCurToken.eType == TMLINE)
2090             {
2091                 aNodes.emplace_back(std::make_unique<SmMathSymbolNode>(m_aCurToken));
2092                 NextToken();
2093             }
2094             else if (m_aCurToken.eType != TRIGHT)
2095             {
2096                 aNodes.push_back(DoAlign());
2097                 if (m_aCurToken.eType != TMLINE  &&  m_aCurToken.eType != TRIGHT)
2098                     aNodes.emplace_back(DoError(SmParseError::RightExpected));
2099             }
2100         } while (m_aCurToken.eType != TEND  &&  m_aCurToken.eType != TRIGHT);
2101     }
2102     else
2103     {
2104         do
2105         {
2106             if (m_aCurToken.eType == TMLINE)
2107             {
2108                 aNodes.emplace_back(std::make_unique<SmMathSymbolNode>(m_aCurToken));
2109                 NextToken();
2110             }
2111             else if (!TokenInGroup(TG::RBrace))
2112             {
2113                 aNodes.push_back(DoAlign());
2114                 if (m_aCurToken.eType != TMLINE  &&  !TokenInGroup(TG::RBrace))
2115                     aNodes.emplace_back(DoError(SmParseError::RbraceExpected));
2116             }
2117         } while (m_aCurToken.eType != TEND  &&  !TokenInGroup(TG::RBrace));
2118     }
2119 
2120     pBody->SetSubNodes(buildNodeArray(aNodes));
2121     pBody->SetScaleMode(bIsLeftRight ? SmScaleMode::Height : SmScaleMode::None);
2122     return pBody;
2123 }
2124 
2125 std::unique_ptr<SmTextNode> SmParser::DoFunction()
2126 {
2127     DepthProtect aDepthGuard(m_nParseDepth);
2128     if (aDepthGuard.TooDeep())
2129         throw std::range_error("parser depth limit");
2130 
2131     switch (m_aCurToken.eType)
2132     {
2133         case TFUNC:
2134             NextToken();    // skip "FUNC"-statement
2135             [[fallthrough]];
2136 
2137         case TSIN :
2138         case TCOS :
2139         case TTAN :
2140         case TCOT :
2141         case TASIN :
2142         case TACOS :
2143         case TATAN :
2144         case TACOT :
2145         case TSINH :
2146         case TCOSH :
2147         case TTANH :
2148         case TCOTH :
2149         case TASINH :
2150         case TACOSH :
2151         case TATANH :
2152         case TACOTH :
2153         case TLN :
2154         case TLOG :
2155         case TEXP :
2156             {
2157                 auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_FUNCTION);
2158                 NextToken();
2159                 return pNode;
2160             }
2161 
2162         default:
2163             assert(false);
2164             return nullptr;
2165     }
2166 }
2167 
2168 std::unique_ptr<SmTableNode> SmParser::DoBinom()
2169 {
2170     DepthProtect aDepthGuard(m_nParseDepth);
2171     if (aDepthGuard.TooDeep())
2172         throw std::range_error("parser depth limit");
2173 
2174     auto xSNode = std::make_unique<SmTableNode>(m_aCurToken);
2175 
2176     NextToken();
2177 
2178     auto xFirst = DoSum();
2179     auto xSecond = DoSum();
2180     xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond));
2181     return xSNode;
2182 }
2183 
2184 std::unique_ptr<SmStructureNode> SmParser::DoStack()
2185 {
2186     DepthProtect aDepthGuard(m_nParseDepth);
2187     if (aDepthGuard.TooDeep())
2188         throw std::range_error("parser depth limit");
2189 
2190     std::unique_ptr<SmStructureNode> xSNode(new SmTableNode(m_aCurToken));
2191     NextToken();
2192     if (m_aCurToken.eType != TLGROUP)
2193         return DoError(SmParseError::LgroupExpected);
2194     std::vector<std::unique_ptr<SmNode>> aExprArr;
2195     do
2196     {
2197         NextToken();
2198         aExprArr.push_back(DoAlign());
2199     }
2200     while (m_aCurToken.eType == TPOUND);
2201 
2202     if (m_aCurToken.eType == TRGROUP)
2203         NextToken();
2204     else
2205         aExprArr.emplace_back(DoError(SmParseError::RgroupExpected));
2206 
2207     xSNode->SetSubNodes(buildNodeArray(aExprArr));
2208     return xSNode;
2209 }
2210 
2211 std::unique_ptr<SmStructureNode> SmParser::DoMatrix()
2212 {
2213     DepthProtect aDepthGuard(m_nParseDepth);
2214     if (aDepthGuard.TooDeep())
2215         throw std::range_error("parser depth limit");
2216 
2217     std::unique_ptr<SmMatrixNode> xMNode(new SmMatrixNode(m_aCurToken));
2218     NextToken();
2219     if (m_aCurToken.eType != TLGROUP)
2220         return DoError(SmParseError::LgroupExpected);
2221 
2222     std::vector<std::unique_ptr<SmNode>> aExprArr;
2223     do
2224     {
2225         NextToken();
2226         aExprArr.push_back(DoAlign());
2227     }
2228     while (m_aCurToken.eType == TPOUND);
2229 
2230     size_t nCol = aExprArr.size();
2231     size_t nRow = 1;
2232     while (m_aCurToken.eType == TDPOUND)
2233     {
2234         NextToken();
2235         for (size_t i = 0; i < nCol; i++)
2236         {
2237             auto xNode = DoAlign();
2238             if (i < (nCol - 1))
2239             {
2240                 if (m_aCurToken.eType == TPOUND)
2241                     NextToken();
2242                 else
2243                     xNode = DoError(SmParseError::PoundExpected);
2244             }
2245             aExprArr.emplace_back(std::move(xNode));
2246         }
2247         ++nRow;
2248     }
2249 
2250     if (m_aCurToken.eType == TRGROUP)
2251         NextToken();
2252     else
2253     {
2254         std::unique_ptr<SmNode> xENode(DoError(SmParseError::RgroupExpected));
2255         if (aExprArr.empty())
2256             nRow = nCol = 1;
2257         else
2258             aExprArr.pop_back();
2259         aExprArr.emplace_back(std::move(xENode));
2260     }
2261 
2262     xMNode->SetSubNodes(buildNodeArray(aExprArr));
2263     xMNode->SetRowCol(static_cast<sal_uInt16>(nRow),
2264                       static_cast<sal_uInt16>(nCol));
2265     return std::unique_ptr<SmStructureNode>(xMNode.release());
2266 }
2267 
2268 std::unique_ptr<SmSpecialNode> SmParser::DoSpecial()
2269 {
2270     DepthProtect aDepthGuard(m_nParseDepth);
2271     if (aDepthGuard.TooDeep())
2272         throw std::range_error("parser depth limit");
2273 
2274     bool bReplace = false;
2275     OUString &rName = m_aCurToken.aText;
2276     OUString aNewName;
2277 
2278     // conversion of symbol names for 6.0 (XML) file format
2279     // (name change on import / export.
2280     // UI uses localized names XML file format does not.)
2281     if( rName.startsWith("%") )
2282     {
2283         if (IsImportSymbolNames())
2284         {
2285             aNewName = SmLocalizedSymbolData::GetUiSymbolName(rName.copy(1));
2286             bReplace = true;
2287         }
2288         else if (IsExportSymbolNames())
2289         {
2290             aNewName = SmLocalizedSymbolData::GetExportSymbolName(rName.copy(1));
2291             bReplace = true;
2292         }
2293     }
2294     if (!aNewName.isEmpty())
2295         aNewName = "%" + aNewName;
2296 
2297 
2298     if (bReplace && !aNewName.isEmpty() && rName != aNewName)
2299     {
2300         Replace(GetTokenIndex(), rName.getLength(), aNewName);
2301         rName = aNewName;
2302     }
2303 
2304     // add symbol name to list of used symbols
2305     const OUString aSymbolName(m_aCurToken.aText.copy(1));
2306     if (!aSymbolName.isEmpty())
2307         m_aUsedSymbols.insert( aSymbolName );
2308 
2309     auto pNode = std::make_unique<SmSpecialNode>(m_aCurToken);
2310     NextToken();
2311     return pNode;
2312 }
2313 
2314 std::unique_ptr<SmGlyphSpecialNode> SmParser::DoGlyphSpecial()
2315 {
2316     DepthProtect aDepthGuard(m_nParseDepth);
2317     if (aDepthGuard.TooDeep())
2318         throw std::range_error("parser depth limit");
2319 
2320     auto pNode = std::make_unique<SmGlyphSpecialNode>(m_aCurToken);
2321     NextToken();
2322     return pNode;
2323 }
2324 
2325 std::unique_ptr<SmExpressionNode> SmParser::DoError(SmParseError eError)
2326 {
2327     DepthProtect aDepthGuard(m_nParseDepth);
2328     if (aDepthGuard.TooDeep())
2329         throw std::range_error("parser depth limit");
2330 
2331     auto xSNode = std::make_unique<SmExpressionNode>(m_aCurToken);
2332     std::unique_ptr<SmErrorNode> pErr(new SmErrorNode(m_aCurToken));
2333     xSNode->SetSubNodes(std::move(pErr), nullptr);
2334 
2335     AddError(eError, xSNode.get());
2336 
2337     NextToken();
2338 
2339     return xSNode;
2340 }
2341 
2342 // end grammar
2343 
2344 
2345 SmParser::SmParser()
2346     : m_nCurError( 0 )
2347     , m_nBufferIndex( 0 )
2348     , m_nTokenIndex( 0 )
2349     , m_nRow( 0 )
2350     , m_nColOff( 0 )
2351     , m_bImportSymNames( false )
2352     , m_bExportSymNames( false )
2353     , m_nParseDepth(0)
2354     , m_aNumCC( LanguageTag( LANGUAGE_ENGLISH_US ) )
2355     , m_pSysCC( SM_MOD()->GetSysLocale().GetCharClassPtr() )
2356 {
2357 }
2358 
2359 std::unique_ptr<SmTableNode> SmParser::Parse(const OUString &rBuffer)
2360 {
2361     m_aUsedSymbols.clear();
2362 
2363     m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
2364     m_nBufferIndex  = 0;
2365     m_nTokenIndex   = 0;
2366     m_nRow          = 1;
2367     m_nColOff       = 0;
2368     m_nCurError     = -1;
2369 
2370     m_aErrDescList.clear();
2371 
2372     NextToken();
2373     return DoTable();
2374 }
2375 
2376 std::unique_ptr<SmNode> SmParser::ParseExpression(const OUString &rBuffer)
2377 {
2378     m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
2379     m_nBufferIndex  = 0;
2380     m_nTokenIndex   = 0;
2381     m_nRow          = 1;
2382     m_nColOff       = 0;
2383     m_nCurError     = -1;
2384 
2385     m_aErrDescList.clear();
2386 
2387     NextToken();
2388     return DoExpression();
2389 }
2390 
2391 
2392 void SmParser::AddError(SmParseError Type, SmNode *pNode)
2393 {
2394     std::unique_ptr<SmErrorDesc> pErrDesc(new SmErrorDesc);
2395 
2396     pErrDesc->m_eType = Type;
2397     pErrDesc->m_pNode = pNode;
2398     pErrDesc->m_aText = SmResId(RID_ERR_IDENT);
2399 
2400     const char* pRID;
2401     switch (Type)
2402     {
2403         case SmParseError::UnexpectedChar:     pRID = RID_ERR_UNEXPECTEDCHARACTER; break;
2404         case SmParseError::UnexpectedToken:    pRID = RID_ERR_UNEXPECTEDTOKEN;     break;
2405         case SmParseError::PoundExpected:      pRID = RID_ERR_POUNDEXPECTED;       break;
2406         case SmParseError::ColorExpected:      pRID = RID_ERR_COLOREXPECTED;       break;
2407         case SmParseError::LgroupExpected:     pRID = RID_ERR_LGROUPEXPECTED;      break;
2408         case SmParseError::RgroupExpected:     pRID = RID_ERR_RGROUPEXPECTED;      break;
2409         case SmParseError::LbraceExpected:     pRID = RID_ERR_LBRACEEXPECTED;      break;
2410         case SmParseError::RbraceExpected:     pRID = RID_ERR_RBRACEEXPECTED;      break;
2411         case SmParseError::ParentMismatch:     pRID = RID_ERR_PARENTMISMATCH;      break;
2412         case SmParseError::RightExpected:      pRID = RID_ERR_RIGHTEXPECTED;       break;
2413         case SmParseError::FontExpected:       pRID = RID_ERR_FONTEXPECTED;        break;
2414         case SmParseError::SizeExpected:       pRID = RID_ERR_SIZEEXPECTED;        break;
2415         case SmParseError::DoubleAlign:        pRID = RID_ERR_DOUBLEALIGN;         break;
2416         case SmParseError::DoubleSubsupscript: pRID = RID_ERR_DOUBLESUBSUPSCRIPT;  break;
2417         default:
2418             assert(false);
2419             return;
2420     }
2421     pErrDesc->m_aText += SmResId(pRID);
2422 
2423     m_aErrDescList.push_back(std::move(pErrDesc));
2424 }
2425 
2426 
2427 const SmErrorDesc *SmParser::NextError()
2428 {
2429     if ( !m_aErrDescList.empty() )
2430         if (m_nCurError > 0) return m_aErrDescList[ --m_nCurError ].get();
2431         else
2432         {
2433             m_nCurError = 0;
2434             return m_aErrDescList[ m_nCurError ].get();
2435         }
2436     else return nullptr;
2437 }
2438 
2439 
2440 const SmErrorDesc *SmParser::PrevError()
2441 {
2442     if ( !m_aErrDescList.empty() )
2443         if (m_nCurError < static_cast<int>(m_aErrDescList.size() - 1)) return m_aErrDescList[ ++m_nCurError ].get();
2444         else
2445         {
2446             m_nCurError = static_cast<int>(m_aErrDescList.size() - 1);
2447             return m_aErrDescList[ m_nCurError ].get();
2448         }
2449     else return nullptr;
2450 }
2451 
2452 
2453 const SmErrorDesc *SmParser::GetError()
2454 {
2455     if ( !m_aErrDescList.empty() )
2456         return m_aErrDescList.front().get();
2457     return nullptr;
2458 }
2459 
2460 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
2461