xref: /core/basic/source/comp/token.cxx (revision e6ab9174)
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 
21 #include <array>
22 
23 #include <basic/sberrors.hxx>
24 #include <sal/macros.h>
25 #include <basiccharclass.hxx>
26 #include <token.hxx>
27 
28 struct TokenTable { SbiToken t; const char *s; };
29 
30 static const TokenTable aTokTable_Basic [] = {
31     { CAT,      "&" },
32     { MUL,      "*" },
33     { PLUS,     "+" },
34     { MINUS,    "-" },
35     { DIV,      "/" },
36     { EOS,      ":" },
37     { ASSIGN,   ":=" },
38     { LT,       "<" },
39     { LE,       "<=" },
40     { NE,       "<>" },
41     { EQ,       "=" },
42     { GT,       ">" },
43     { GE,       ">=" },
44     { ACCESS,   "Access" },
45     { ALIAS,    "Alias" },
46     { AND,      "And" },
47     { ANY,      "Any" },
48     { APPEND,   "Append" },
49     { AS,       "As" },
50     { ATTRIBUTE,"Attribute" },
51     { BASE,     "Base" },
52     { BINARY,   "Binary" },
53     { TBOOLEAN, "Boolean" },
54     { BYREF,    "ByRef", },
55     { TBYTE,    "Byte", },
56     { BYVAL,    "ByVal", },
57     { CALL,     "Call" },
58     { CASE,     "Case" },
59     { CDECL_,   "Cdecl" },
60     { CLASSMODULE, "ClassModule" },
61     { CLOSE,    "Close" },
62     { COMPARE,  "Compare" },
63     { COMPATIBLE,"Compatible" },
64     { CONST_,   "Const" },
65     { TCURRENCY,"Currency" },
66     { TDATE,    "Date" },
67     { DECLARE,  "Declare" },
68     { DEFBOOL,  "DefBool" },
69     { DEFCUR,   "DefCur" },
70     { DEFDATE,  "DefDate" },
71     { DEFDBL,   "DefDbl" },
72     { DEFERR,   "DefErr" },
73     { DEFINT,   "DefInt" },
74     { DEFLNG,   "DefLng" },
75     { DEFOBJ,   "DefObj" },
76     { DEFSNG,   "DefSng" },
77     { DEFSTR,   "DefStr" },
78     { DEFVAR,   "DefVar" },
79     { DIM,      "Dim" },
80     { DO,       "Do" },
81     { TDOUBLE,  "Double" },
82     { EACH,     "Each" },
83     { ELSE,     "Else" },
84     { ELSEIF,   "ElseIf" },
85     { END,      "End" },
86     { ENDENUM,  "End Enum" },
87     { ENDFUNC,  "End Function" },
88     { ENDIF,    "End If" },
89     { ENDPROPERTY, "End Property" },
90     { ENDSELECT,"End Select" },
91     { ENDSUB,   "End Sub" },
92     { ENDTYPE,  "End Type" },
93     { ENDIF,    "EndIf" },
94     { ENUM,     "Enum" },
95     { EQV,      "Eqv" },
96     { ERASE,    "Erase" },
97     { ERROR_,   "Error" },
98     { EXIT,     "Exit" },
99     { BASIC_EXPLICIT, "Explicit" },
100     { FOR,      "For" },
101     { FUNCTION, "Function" },
102     { GET,      "Get" },
103     { GLOBAL,   "Global" },
104     { GOSUB,    "GoSub" },
105     { GOTO,     "GoTo" },
106     { IF,       "If" },
107     { IMP,      "Imp" },
108     { IMPLEMENTS, "Implements" },
109     { IN_,      "In" },
110     { INPUT,    "Input" },              // also INPUT #
111     { TINTEGER, "Integer" },
112     { IS,       "Is" },
113     { LET,      "Let" },
114     { LIB,      "Lib" },
115     { LIKE,     "Like" },
116     { LINE,     "Line" },
117     { LINEINPUT,"Line Input" },
118     { LOCAL,    "Local" },
119     { LOCK,     "Lock" },
120     { TLONG,    "Long" },
121     { LOOP,     "Loop" },
122     { LPRINT,   "LPrint" },
123     { LSET,     "LSet" }, // JSM
124     { MOD,      "Mod" },
125     { NAME,     "Name" },
126     { NEW,      "New" },
127     { NEXT,     "Next" },
128     { NOT,      "Not" },
129     { TOBJECT,  "Object" },
130     { ON,       "On" },
131     { OPEN,     "Open" },
132     { OPTION,   "Option" },
133     { OPTIONAL_, "Optional" },
134     { OR,       "Or" },
135     { OUTPUT,   "Output" },
136     { PARAMARRAY,   "ParamArray" },
137     { PRESERVE, "Preserve" },
138     { PRINT,    "Print" },
139     { PRIVATE,  "Private" },
140     { PROPERTY, "Property" },
141     { PTRSAFE,  "PtrSafe" },
142     { PUBLIC,   "Public" },
143     { RANDOM,   "Random" },
144     { READ,     "Read" },
145     { REDIM,    "ReDim" },
146     { REM,      "Rem" },
147     { RESUME,   "Resume" },
148     { RETURN,   "Return" },
149     { RSET,     "RSet" }, // JSM
150     { SELECT,   "Select" },
151     { SET,      "Set" },
152     { SHARED,   "Shared" },
153     { TSINGLE,  "Single" },
154     { STATIC,   "Static" },
155     { STEP,     "Step" },
156     { STOP,     "Stop" },
157     { TSTRING,  "String" },
158     { SUB,      "Sub" },
159     { STOP,     "System" },
160     { TEXT,     "Text" },
161     { THEN,     "Then" },
162     { TO,       "To", },
163     { TYPE,     "Type" },
164     { TYPEOF,   "TypeOf" },
165     { UNTIL,    "Until" },
166     { TVARIANT, "Variant" },
167     { VBASUPPORT,   "VbaSupport" },
168     { WEND,     "Wend" },
169     { WHILE,    "While" },
170     { WITH,     "With" },
171     { WITHEVENTS,   "WithEvents" },
172     { WRITE,    "Write" },              // also WRITE #
173     { XOR,      "Xor" },
174 };
175 
176 // #i109076
177 class TokenLabelInfo
178 {
179     std::array<bool,VBASUPPORT+1> m_pTokenCanBeLabelTab;
180 
181 public:
182     TokenLabelInfo();
183 
184     bool canTokenBeLabel( SbiToken eTok )
185         { return m_pTokenCanBeLabelTab[eTok]; }
186 };
187 
188 class StaticTokenLabelInfo: public ::rtl::Static< TokenLabelInfo, StaticTokenLabelInfo >{};
189 
190 // #i109076
191 TokenLabelInfo::TokenLabelInfo()
192 {
193     m_pTokenCanBeLabelTab.fill(false);
194 
195     // Token accepted as label by VBA
196     static const SbiToken eLabelToken[] = { ACCESS, ALIAS, APPEND, BASE, BINARY, CLASSMODULE,
197                                COMPARE, COMPATIBLE, DEFERR, ERROR_, BASIC_EXPLICIT, LIB, LINE, LPRINT, NAME,
198                                TOBJECT, OUTPUT, PROPERTY, RANDOM, READ, STEP, STOP, TEXT, VBASUPPORT };
199     for( SbiToken eTok : eLabelToken )
200     {
201         m_pTokenCanBeLabelTab[eTok] = true;
202     }
203 }
204 
205 
206 SbiTokenizer::SbiTokenizer( const OUString& rSrc, StarBASIC* pb )
207     : SbiScanner(rSrc, pb)
208     , eCurTok(NIL)
209     , ePush(NIL)
210     , nPLine(0)
211     , nPCol1(0)
212     , nPCol2(0)
213     , bEof(false)
214     , bEos(true)
215     , bAs(false)
216     , bErrorIsSymbol(true)
217 {
218 }
219 
220 void SbiTokenizer::Push( SbiToken t )
221 {
222     if( ePush != NIL )
223         Error( ERRCODE_BASIC_INTERNAL_ERROR, "PUSH" );
224     else ePush = t;
225 }
226 
227 void SbiTokenizer::Error( ErrCode code, const OUString &aMsg )
228 {
229     aError = aMsg;
230     Error( code );
231 }
232 
233 void SbiTokenizer::Error( ErrCode code, SbiToken tok )
234 {
235     aError = Symbol( tok );
236     Error( code );
237 }
238 
239 // reading in the next token without absorbing it
240 
241 SbiToken SbiTokenizer::Peek()
242 {
243     if( ePush == NIL )
244     {
245         sal_uInt16 nOldLine = nLine;
246         sal_uInt16 nOldCol1 = nCol1;
247         sal_uInt16 nOldCol2 = nCol2;
248         ePush = Next();
249         nPLine = nLine; nLine = nOldLine;
250         nPCol1 = nCol1; nCol1 = nOldCol1;
251         nPCol2 = nCol2; nCol2 = nOldCol2;
252     }
253     return eCurTok = ePush;
254 }
255 
256 // For decompilation. Numbers and symbols return an empty string.
257 
258 const OUString& SbiTokenizer::Symbol( SbiToken t )
259 {
260     // character token?
261     if( t < FIRSTKWD )
262     {
263         aSym = OUString(sal::static_int_cast<sal_Unicode>(t));
264         return aSym;
265     }
266     switch( t )
267     {
268     case NEG   :
269         aSym = "-";
270         return aSym;
271     case EOS   :
272         aSym = ":/CRLF";
273         return aSym;
274     case EOLN  :
275         aSym = "CRLF";
276         return aSym;
277     default:
278         break;
279     }
280     for( auto& rTok : aTokTable_Basic )
281     {
282         if( rTok.t == t )
283         {
284             aSym = OStringToOUString(rTok.s, RTL_TEXTENCODING_ASCII_US);
285             return aSym;
286         }
287     }
288     const sal_Unicode *p = aSym.getStr();
289     if (*p <= ' ')
290     {
291         aSym = "???";
292     }
293     return aSym;
294 }
295 
296 // Reading in the next token and put it down.
297 // Tokens that don't appear in the token table
298 // are directly returned as a character.
299 // Some words are treated in a special way.
300 
301 SbiToken SbiTokenizer::Next()
302 {
303     if (bEof)
304     {
305         return EOLN;
306     }
307     // have read in one already?
308     if( ePush != NIL )
309     {
310         eCurTok = ePush;
311         ePush = NIL;
312         nLine = nPLine;
313         nCol1 = nPCol1;
314         nCol2 = nPCol2;
315         bEos = IsEoln( eCurTok );
316         return eCurTok;
317     }
318     const TokenTable *tp;
319 
320     if( !NextSym() )
321     {
322         bEof = bEos = true;
323         return eCurTok = EOLN;
324     }
325 
326     if( aSym.startsWith("\n") )
327     {
328         bEos = true;
329         return eCurTok = EOLN;
330     }
331     bEos = false;
332 
333     if( bNumber )
334     {
335         return eCurTok = NUMBER;
336     }
337     else if( ( eScanType == SbxDATE || eScanType == SbxSTRING ) && !bSymbol )
338     {
339         return eCurTok = FIXSTRING;
340     }
341     else if( aSym.isEmpty() )
342     {
343         //something went wrong
344         bEof = bEos = true;
345         return eCurTok = EOLN;
346     }
347     // Special cases of characters that are between "Z" and "a". ICompare()
348     // evaluates the position of these characters in different ways.
349     else if( aSym[0] == '^' )
350     {
351         return eCurTok = EXPON;
352     }
353     else if( aSym[0] == '\\' )
354     {
355         return eCurTok = IDIV;
356     }
357     else
358     {
359         if( eScanType != SbxVARIANT )
360             return eCurTok = SYMBOL;
361         // valid token?
362         short lb = 0;
363         short ub = SAL_N_ELEMENTS(aTokTable_Basic)-1;
364         short delta;
365         do
366         {
367             delta = (ub - lb) >> 1;
368             tp = &aTokTable_Basic[ lb + delta ];
369             sal_Int32 res = aSym.compareToIgnoreAsciiCaseAscii( tp->s );
370 
371             if( res == 0 )
372             {
373                 goto special;
374             }
375             if( res < 0 )
376             {
377                 if ((ub - lb) == 2)
378                 {
379                     ub = lb;
380                 }
381                 else
382                 {
383                     ub = ub - delta;
384                 }
385             }
386             else
387             {
388                 if ((ub -lb) == 2)
389                 {
390                     lb = ub;
391                 }
392                 else
393                 {
394                     lb = lb + delta;
395                 }
396             }
397         }
398         while( delta );
399         // Symbol? if not >= token
400         sal_Unicode ch = aSym[0];
401         if( !BasicCharClass::isAlpha( ch, bCompatible ) && !bSymbol )
402         {
403             return eCurTok = static_cast<SbiToken>(ch & 0x00FF);
404         }
405         return eCurTok = SYMBOL;
406     }
407 special:
408     // #i92642
409     bool bStartOfLine = (eCurTok == NIL || eCurTok == REM || eCurTok == EOLN ||
410             eCurTok == THEN || eCurTok == ELSE); // single line If
411     if( !bStartOfLine && (tp->t == NAME || tp->t == LINE) )
412     {
413         return eCurTok = SYMBOL;
414     }
415     else if( tp->t == TEXT )
416     {
417         return eCurTok = SYMBOL;
418     }
419     // maybe we can expand this for other statements that have parameters
420     // that are keywords ( and those keywords are only used within such
421     // statements )
422     // what's happening here is that if we come across 'append' ( and we are
423     // not in the middle of parsing a special statement ( like 'Open')
424     // we just treat keyword 'append' as a normal 'SYMBOL'.
425     // Also we accept Dim APPEND
426     else if ( ( !bInStatement || eCurTok == DIM ) && tp->t == APPEND )
427     {
428         return eCurTok = SYMBOL;
429     }
430     // #i92642: Special LINE token handling -> SbiParser::Line()
431 
432     // END IF, CASE, SUB, DEF, FUNCTION, TYPE, CLASS, WITH
433     if( tp->t == END )
434     {
435         // from 15.3.96, special treatment for END, at Peek() the current
436         // time is lost, so memorize everything and restore after
437         sal_uInt16 nOldLine = nLine;
438         sal_uInt16 nOldCol  = nCol;
439         sal_uInt16 nOldCol1 = nCol1;
440         sal_uInt16 nOldCol2 = nCol2;
441         OUString aOldSym = aSym;
442         SaveLine();             // save pLine in the scanner
443 
444         eCurTok = Peek();
445         switch( eCurTok )
446         {
447         case IF:       Next(); eCurTok = ENDIF; break;
448         case SELECT:   Next(); eCurTok = ENDSELECT; break;
449         case SUB:      Next(); eCurTok = ENDSUB; break;
450         case FUNCTION: Next(); eCurTok = ENDFUNC; break;
451         case PROPERTY: Next(); eCurTok = ENDPROPERTY; break;
452         case TYPE:     Next(); eCurTok = ENDTYPE; break;
453         case ENUM:     Next(); eCurTok = ENDENUM; break;
454         case WITH:     Next(); eCurTok = ENDWITH; break;
455         default :      eCurTok = END; break;
456         }
457         nCol1 = nOldCol1;
458         if( eCurTok == END )
459         {
460             // reset everything so that token is read completely newly after END
461             ePush = NIL;
462             nLine = nOldLine;
463             nCol  = nOldCol;
464             nCol2 = nOldCol2;
465             aSym = aOldSym;
466             RestoreLine();
467         }
468         return eCurTok;
469     }
470     // are data types keywords?
471     // there is ERROR(), DATA(), STRING() etc.
472     eCurTok = tp->t;
473     // AS: data types are keywords
474     if( tp->t == AS )
475     {
476         bAs = true;
477     }
478     else
479     {
480         if( bAs )
481         {
482             bAs = false;
483         }
484         else if( eCurTok >= DATATYPE1 && eCurTok <= DATATYPE2 && (bErrorIsSymbol || eCurTok != ERROR_) )
485         {
486             eCurTok = SYMBOL;
487         }
488     }
489 
490     // CLASSMODULE, PROPERTY, GET, ENUM token only visible in compatible mode
491     SbiToken eTok = tp->t;
492     if( bCompatible )
493     {
494         // #129904 Suppress system
495         if( eTok == STOP && aSym.equalsIgnoreAsciiCase("system") )
496         {
497             eCurTok = SYMBOL;
498         }
499         if( eTok == GET && bStartOfLine )
500         {
501             eCurTok = SYMBOL;
502         }
503     }
504     else
505     {
506         if( eTok == CLASSMODULE ||
507             eTok == IMPLEMENTS ||
508             eTok == PARAMARRAY ||
509             eTok == ENUM ||
510             eTok == PROPERTY ||
511             eTok == GET ||
512             eTok == TYPEOF )
513         {
514             eCurTok = SYMBOL;
515         }
516     }
517 
518     bEos = IsEoln( eCurTok );
519     return eCurTok;
520 }
521 
522 bool SbiTokenizer::MayBeLabel( bool bNeedsColon )
523 {
524     if( eCurTok == SYMBOL || StaticTokenLabelInfo::get().canTokenBeLabel( eCurTok ) )
525     {
526         return !bNeedsColon || DoesColonFollow();
527     }
528     else
529     {
530         return ( eCurTok == NUMBER
531                   && eScanType == SbxINTEGER
532                   && nVal >= 0 );
533     }
534 }
535 
536 
537 OUString SbiTokenizer::GetKeywordCase( const OUString& sKeyword )
538 {
539     for( auto& rTok : aTokTable_Basic )
540     {
541         if( sKeyword.equalsIgnoreAsciiCaseAscii(rTok.s) )
542             return OStringToOUString(rTok.s, RTL_TEXTENCODING_ASCII_US);
543     }
544     return OUString();
545 }
546 
547 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
548