1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 2 /* 3 * This file is part of the LibreOffice project. 4 * 5 * This Source Code Form is subject to the terms of the Mozilla Public 6 * License, v. 2.0. If a copy of the MPL was not distributed with this 7 * file, You can obtain one at http://mozilla.org/MPL/2.0/. 8 * 9 * This file incorporates work covered by the following license notice: 10 * 11 * Licensed to the Apache Software Foundation (ASF) under one or more 12 * contributor license agreements. See the NOTICE file distributed 13 * with this work for additional information regarding copyright 14 * ownership. The ASF licenses this file to you under the Apache 15 * License, Version 2.0 (the "License"); you may not use this file 16 * except in compliance with the License. You may obtain a copy of 17 * the License at http://www.apache.org/licenses/LICENSE-2.0 . 18 */ 19 20 21 #include <array> 22 23 #include <basic/sberrors.hxx> 24 #include <sal/macros.h> 25 #include <basiccharclass.hxx> 26 #include <token.hxx> 27 28 struct TokenTable { SbiToken t; const char *s; }; 29 30 static const TokenTable aTokTable_Basic [] = { 31 { CAT, "&" }, 32 { MUL, "*" }, 33 { PLUS, "+" }, 34 { MINUS, "-" }, 35 { DIV, "/" }, 36 { EOS, ":" }, 37 { ASSIGN, ":=" }, 38 { LT, "<" }, 39 { LE, "<=" }, 40 { NE, "<>" }, 41 { EQ, "=" }, 42 { GT, ">" }, 43 { GE, ">=" }, 44 { ACCESS, "Access" }, 45 { ALIAS, "Alias" }, 46 { AND, "And" }, 47 { ANY, "Any" }, 48 { APPEND, "Append" }, 49 { AS, "As" }, 50 { ATTRIBUTE,"Attribute" }, 51 { BASE, "Base" }, 52 { BINARY, "Binary" }, 53 { TBOOLEAN, "Boolean" }, 54 { BYREF, "ByRef", }, 55 { TBYTE, "Byte", }, 56 { BYVAL, "ByVal", }, 57 { CALL, "Call" }, 58 { CASE, "Case" }, 59 { CDECL_, "Cdecl" }, 60 { CLASSMODULE, "ClassModule" }, 61 { CLOSE, "Close" }, 62 { COMPARE, "Compare" }, 63 { COMPATIBLE,"Compatible" }, 64 { CONST_, "Const" }, 65 { TCURRENCY,"Currency" }, 66 { TDATE, "Date" }, 67 { DECLARE, "Declare" }, 68 { DEFBOOL, "DefBool" }, 69 { DEFCUR, "DefCur" }, 70 { DEFDATE, "DefDate" }, 71 { DEFDBL, "DefDbl" }, 72 { DEFERR, "DefErr" }, 73 { DEFINT, "DefInt" }, 74 { DEFLNG, "DefLng" }, 75 { DEFOBJ, "DefObj" }, 76 { DEFSNG, "DefSng" }, 77 { DEFSTR, "DefStr" }, 78 { DEFVAR, "DefVar" }, 79 { DIM, "Dim" }, 80 { DO, "Do" }, 81 { TDOUBLE, "Double" }, 82 { EACH, "Each" }, 83 { ELSE, "Else" }, 84 { ELSEIF, "ElseIf" }, 85 { END, "End" }, 86 { ENDENUM, "End Enum" }, 87 { ENDFUNC, "End Function" }, 88 { ENDIF, "End If" }, 89 { ENDPROPERTY, "End Property" }, 90 { ENDSELECT,"End Select" }, 91 { ENDSUB, "End Sub" }, 92 { ENDTYPE, "End Type" }, 93 { ENDIF, "EndIf" }, 94 { ENUM, "Enum" }, 95 { EQV, "Eqv" }, 96 { ERASE, "Erase" }, 97 { ERROR_, "Error" }, 98 { EXIT, "Exit" }, 99 { BASIC_EXPLICIT, "Explicit" }, 100 { FOR, "For" }, 101 { FUNCTION, "Function" }, 102 { GET, "Get" }, 103 { GLOBAL, "Global" }, 104 { GOSUB, "GoSub" }, 105 { GOTO, "GoTo" }, 106 { IF, "If" }, 107 { IMP, "Imp" }, 108 { IMPLEMENTS, "Implements" }, 109 { IN_, "In" }, 110 { INPUT, "Input" }, // also INPUT # 111 { TINTEGER, "Integer" }, 112 { IS, "Is" }, 113 { LET, "Let" }, 114 { LIB, "Lib" }, 115 { LIKE, "Like" }, 116 { LINE, "Line" }, 117 { LINEINPUT,"Line Input" }, 118 { LOCAL, "Local" }, 119 { LOCK, "Lock" }, 120 { TLONG, "Long" }, 121 { LOOP, "Loop" }, 122 { LPRINT, "LPrint" }, 123 { LSET, "LSet" }, // JSM 124 { MOD, "Mod" }, 125 { NAME, "Name" }, 126 { NEW, "New" }, 127 { NEXT, "Next" }, 128 { NOT, "Not" }, 129 { TOBJECT, "Object" }, 130 { ON, "On" }, 131 { OPEN, "Open" }, 132 { OPTION, "Option" }, 133 { OPTIONAL_, "Optional" }, 134 { OR, "Or" }, 135 { OUTPUT, "Output" }, 136 { PARAMARRAY, "ParamArray" }, 137 { PRESERVE, "Preserve" }, 138 { PRINT, "Print" }, 139 { PRIVATE, "Private" }, 140 { PROPERTY, "Property" }, 141 { PTRSAFE, "PtrSafe" }, 142 { PUBLIC, "Public" }, 143 { RANDOM, "Random" }, 144 { READ, "Read" }, 145 { REDIM, "ReDim" }, 146 { REM, "Rem" }, 147 { RESUME, "Resume" }, 148 { RETURN, "Return" }, 149 { RSET, "RSet" }, // JSM 150 { SELECT, "Select" }, 151 { SET, "Set" }, 152 { SHARED, "Shared" }, 153 { TSINGLE, "Single" }, 154 { STATIC, "Static" }, 155 { STEP, "Step" }, 156 { STOP, "Stop" }, 157 { TSTRING, "String" }, 158 { SUB, "Sub" }, 159 { STOP, "System" }, 160 { TEXT, "Text" }, 161 { THEN, "Then" }, 162 { TO, "To", }, 163 { TYPE, "Type" }, 164 { TYPEOF, "TypeOf" }, 165 { UNTIL, "Until" }, 166 { TVARIANT, "Variant" }, 167 { VBASUPPORT, "VbaSupport" }, 168 { WEND, "Wend" }, 169 { WHILE, "While" }, 170 { WITH, "With" }, 171 { WITHEVENTS, "WithEvents" }, 172 { WRITE, "Write" }, // also WRITE # 173 { XOR, "Xor" }, 174 }; 175 176 // #i109076 177 class TokenLabelInfo 178 { 179 std::array<bool,VBASUPPORT+1> m_pTokenCanBeLabelTab; 180 181 public: 182 TokenLabelInfo(); 183 184 bool canTokenBeLabel( SbiToken eTok ) 185 { return m_pTokenCanBeLabelTab[eTok]; } 186 }; 187 188 class StaticTokenLabelInfo: public ::rtl::Static< TokenLabelInfo, StaticTokenLabelInfo >{}; 189 190 // #i109076 191 TokenLabelInfo::TokenLabelInfo() 192 { 193 m_pTokenCanBeLabelTab.fill(false); 194 195 // Token accepted as label by VBA 196 static const SbiToken eLabelToken[] = { ACCESS, ALIAS, APPEND, BASE, BINARY, CLASSMODULE, 197 COMPARE, COMPATIBLE, DEFERR, ERROR_, BASIC_EXPLICIT, LIB, LINE, LPRINT, NAME, 198 TOBJECT, OUTPUT, PROPERTY, RANDOM, READ, STEP, STOP, TEXT, VBASUPPORT }; 199 for( SbiToken eTok : eLabelToken ) 200 { 201 m_pTokenCanBeLabelTab[eTok] = true; 202 } 203 } 204 205 206 SbiTokenizer::SbiTokenizer( const OUString& rSrc, StarBASIC* pb ) 207 : SbiScanner(rSrc, pb) 208 , eCurTok(NIL) 209 , ePush(NIL) 210 , nPLine(0) 211 , nPCol1(0) 212 , nPCol2(0) 213 , bEof(false) 214 , bEos(true) 215 , bAs(false) 216 , bErrorIsSymbol(true) 217 { 218 } 219 220 void SbiTokenizer::Push( SbiToken t ) 221 { 222 if( ePush != NIL ) 223 Error( ERRCODE_BASIC_INTERNAL_ERROR, "PUSH" ); 224 else ePush = t; 225 } 226 227 void SbiTokenizer::Error( ErrCode code, const OUString &aMsg ) 228 { 229 aError = aMsg; 230 Error( code ); 231 } 232 233 void SbiTokenizer::Error( ErrCode code, SbiToken tok ) 234 { 235 aError = Symbol( tok ); 236 Error( code ); 237 } 238 239 // reading in the next token without absorbing it 240 241 SbiToken SbiTokenizer::Peek() 242 { 243 if( ePush == NIL ) 244 { 245 sal_uInt16 nOldLine = nLine; 246 sal_uInt16 nOldCol1 = nCol1; 247 sal_uInt16 nOldCol2 = nCol2; 248 ePush = Next(); 249 nPLine = nLine; nLine = nOldLine; 250 nPCol1 = nCol1; nCol1 = nOldCol1; 251 nPCol2 = nCol2; nCol2 = nOldCol2; 252 } 253 return eCurTok = ePush; 254 } 255 256 // For decompilation. Numbers and symbols return an empty string. 257 258 const OUString& SbiTokenizer::Symbol( SbiToken t ) 259 { 260 // character token? 261 if( t < FIRSTKWD ) 262 { 263 aSym = OUString(sal::static_int_cast<sal_Unicode>(t)); 264 return aSym; 265 } 266 switch( t ) 267 { 268 case NEG : 269 aSym = "-"; 270 return aSym; 271 case EOS : 272 aSym = ":/CRLF"; 273 return aSym; 274 case EOLN : 275 aSym = "CRLF"; 276 return aSym; 277 default: 278 break; 279 } 280 for( auto& rTok : aTokTable_Basic ) 281 { 282 if( rTok.t == t ) 283 { 284 aSym = OStringToOUString(rTok.s, RTL_TEXTENCODING_ASCII_US); 285 return aSym; 286 } 287 } 288 const sal_Unicode *p = aSym.getStr(); 289 if (*p <= ' ') 290 { 291 aSym = "???"; 292 } 293 return aSym; 294 } 295 296 // Reading in the next token and put it down. 297 // Tokens that don't appear in the token table 298 // are directly returned as a character. 299 // Some words are treated in a special way. 300 301 SbiToken SbiTokenizer::Next() 302 { 303 if (bEof) 304 { 305 return EOLN; 306 } 307 // have read in one already? 308 if( ePush != NIL ) 309 { 310 eCurTok = ePush; 311 ePush = NIL; 312 nLine = nPLine; 313 nCol1 = nPCol1; 314 nCol2 = nPCol2; 315 bEos = IsEoln( eCurTok ); 316 return eCurTok; 317 } 318 const TokenTable *tp; 319 320 if( !NextSym() ) 321 { 322 bEof = bEos = true; 323 return eCurTok = EOLN; 324 } 325 326 if( aSym.startsWith("\n") ) 327 { 328 bEos = true; 329 return eCurTok = EOLN; 330 } 331 bEos = false; 332 333 if( bNumber ) 334 { 335 return eCurTok = NUMBER; 336 } 337 else if( ( eScanType == SbxDATE || eScanType == SbxSTRING ) && !bSymbol ) 338 { 339 return eCurTok = FIXSTRING; 340 } 341 else if( aSym.isEmpty() ) 342 { 343 //something went wrong 344 bEof = bEos = true; 345 return eCurTok = EOLN; 346 } 347 // Special cases of characters that are between "Z" and "a". ICompare() 348 // evaluates the position of these characters in different ways. 349 else if( aSym[0] == '^' ) 350 { 351 return eCurTok = EXPON; 352 } 353 else if( aSym[0] == '\\' ) 354 { 355 return eCurTok = IDIV; 356 } 357 else 358 { 359 if( eScanType != SbxVARIANT ) 360 return eCurTok = SYMBOL; 361 // valid token? 362 short lb = 0; 363 short ub = SAL_N_ELEMENTS(aTokTable_Basic)-1; 364 short delta; 365 do 366 { 367 delta = (ub - lb) >> 1; 368 tp = &aTokTable_Basic[ lb + delta ]; 369 sal_Int32 res = aSym.compareToIgnoreAsciiCaseAscii( tp->s ); 370 371 if( res == 0 ) 372 { 373 goto special; 374 } 375 if( res < 0 ) 376 { 377 if ((ub - lb) == 2) 378 { 379 ub = lb; 380 } 381 else 382 { 383 ub = ub - delta; 384 } 385 } 386 else 387 { 388 if ((ub -lb) == 2) 389 { 390 lb = ub; 391 } 392 else 393 { 394 lb = lb + delta; 395 } 396 } 397 } 398 while( delta ); 399 // Symbol? if not >= token 400 sal_Unicode ch = aSym[0]; 401 if( !BasicCharClass::isAlpha( ch, bCompatible ) && !bSymbol ) 402 { 403 return eCurTok = static_cast<SbiToken>(ch & 0x00FF); 404 } 405 return eCurTok = SYMBOL; 406 } 407 special: 408 // #i92642 409 bool bStartOfLine = (eCurTok == NIL || eCurTok == REM || eCurTok == EOLN || 410 eCurTok == THEN || eCurTok == ELSE); // single line If 411 if( !bStartOfLine && (tp->t == NAME || tp->t == LINE) ) 412 { 413 return eCurTok = SYMBOL; 414 } 415 else if( tp->t == TEXT ) 416 { 417 return eCurTok = SYMBOL; 418 } 419 // maybe we can expand this for other statements that have parameters 420 // that are keywords ( and those keywords are only used within such 421 // statements ) 422 // what's happening here is that if we come across 'append' ( and we are 423 // not in the middle of parsing a special statement ( like 'Open') 424 // we just treat keyword 'append' as a normal 'SYMBOL'. 425 // Also we accept Dim APPEND 426 else if ( ( !bInStatement || eCurTok == DIM ) && tp->t == APPEND ) 427 { 428 return eCurTok = SYMBOL; 429 } 430 // #i92642: Special LINE token handling -> SbiParser::Line() 431 432 // END IF, CASE, SUB, DEF, FUNCTION, TYPE, CLASS, WITH 433 if( tp->t == END ) 434 { 435 // from 15.3.96, special treatment for END, at Peek() the current 436 // time is lost, so memorize everything and restore after 437 sal_uInt16 nOldLine = nLine; 438 sal_uInt16 nOldCol = nCol; 439 sal_uInt16 nOldCol1 = nCol1; 440 sal_uInt16 nOldCol2 = nCol2; 441 OUString aOldSym = aSym; 442 SaveLine(); // save pLine in the scanner 443 444 eCurTok = Peek(); 445 switch( eCurTok ) 446 { 447 case IF: Next(); eCurTok = ENDIF; break; 448 case SELECT: Next(); eCurTok = ENDSELECT; break; 449 case SUB: Next(); eCurTok = ENDSUB; break; 450 case FUNCTION: Next(); eCurTok = ENDFUNC; break; 451 case PROPERTY: Next(); eCurTok = ENDPROPERTY; break; 452 case TYPE: Next(); eCurTok = ENDTYPE; break; 453 case ENUM: Next(); eCurTok = ENDENUM; break; 454 case WITH: Next(); eCurTok = ENDWITH; break; 455 default : eCurTok = END; break; 456 } 457 nCol1 = nOldCol1; 458 if( eCurTok == END ) 459 { 460 // reset everything so that token is read completely newly after END 461 ePush = NIL; 462 nLine = nOldLine; 463 nCol = nOldCol; 464 nCol2 = nOldCol2; 465 aSym = aOldSym; 466 RestoreLine(); 467 } 468 return eCurTok; 469 } 470 // are data types keywords? 471 // there is ERROR(), DATA(), STRING() etc. 472 eCurTok = tp->t; 473 // AS: data types are keywords 474 if( tp->t == AS ) 475 { 476 bAs = true; 477 } 478 else 479 { 480 if( bAs ) 481 { 482 bAs = false; 483 } 484 else if( eCurTok >= DATATYPE1 && eCurTok <= DATATYPE2 && (bErrorIsSymbol || eCurTok != ERROR_) ) 485 { 486 eCurTok = SYMBOL; 487 } 488 } 489 490 // CLASSMODULE, PROPERTY, GET, ENUM token only visible in compatible mode 491 SbiToken eTok = tp->t; 492 if( bCompatible ) 493 { 494 // #129904 Suppress system 495 if( eTok == STOP && aSym.equalsIgnoreAsciiCase("system") ) 496 { 497 eCurTok = SYMBOL; 498 } 499 if( eTok == GET && bStartOfLine ) 500 { 501 eCurTok = SYMBOL; 502 } 503 } 504 else 505 { 506 if( eTok == CLASSMODULE || 507 eTok == IMPLEMENTS || 508 eTok == PARAMARRAY || 509 eTok == ENUM || 510 eTok == PROPERTY || 511 eTok == GET || 512 eTok == TYPEOF ) 513 { 514 eCurTok = SYMBOL; 515 } 516 } 517 518 bEos = IsEoln( eCurTok ); 519 return eCurTok; 520 } 521 522 bool SbiTokenizer::MayBeLabel( bool bNeedsColon ) 523 { 524 if( eCurTok == SYMBOL || StaticTokenLabelInfo::get().canTokenBeLabel( eCurTok ) ) 525 { 526 return !bNeedsColon || DoesColonFollow(); 527 } 528 else 529 { 530 return ( eCurTok == NUMBER 531 && eScanType == SbxINTEGER 532 && nVal >= 0 ); 533 } 534 } 535 536 537 OUString SbiTokenizer::GetKeywordCase( const OUString& sKeyword ) 538 { 539 for( auto& rTok : aTokTable_Basic ) 540 { 541 if( sKeyword.equalsIgnoreAsciiCaseAscii(rTok.s) ) 542 return OStringToOUString(rTok.s, RTL_TEXTENCODING_ASCII_US); 543 } 544 return OUString(); 545 } 546 547 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ 548
