1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 2 /* 3 * This file is part of the LibreOffice project. 4 * 5 * This Source Code Form is subject to the terms of the Mozilla Public 6 * License, v. 2.0. If a copy of the MPL was not distributed with this 7 * file, You can obtain one at http://mozilla.org/MPL/2.0/. 8 * 9 * This file incorporates work covered by the following license notice: 10 * 11 * Licensed to the Apache Software Foundation (ASF) under one or more 12 * contributor license agreements. See the NOTICE file distributed 13 * with this work for additional information regarding copyright 14 * ownership. The ASF licenses this file to you under the Apache 15 * License, Version 2.0 (the "License"); you may not use this file 16 * except in compliance with the License. You may obtain a copy of 17 * the License at http://www.apache.org/licenses/LICENSE-2.0 . 18 */ 19 20 #include <basiccharclass.hxx> 21 #include <scanner.hxx> 22 #include <sbintern.hxx> 23 #include <runtime.hxx> 24 25 #include <basic/sberrors.hxx> 26 #include <i18nlangtag/lang.h> 27 #include <svl/zforlist.hxx> 28 #include <rtl/character.hxx> 29 30 SbiScanner::SbiScanner( const OUString& rBuf, StarBASIC* p ) : aBuf( rBuf ) 31 { 32 pBasic = p; 33 nLineIdx = -1; 34 nVal = 0; 35 eScanType = SbxVARIANT; 36 nErrors = 0; 37 nBufPos = 0; 38 nSavedCol1 = 0; 39 nColLock = 0; 40 nLine = 0; 41 nCol1 = 0; 42 nCol2 = 0; 43 nCol = 0; 44 bError = 45 bAbort = 46 bSpaces = 47 bNumber = 48 bSymbol = 49 bCompatible = 50 bVBASupportOn = 51 bInStatement = 52 bPrevLineExtentsComment = false; 53 bHash = true; 54 nSaveLineIdx = -1; 55 } 56 57 void SbiScanner::LockColumn() 58 { 59 if( !nColLock++ ) 60 nSavedCol1 = nCol1; 61 } 62 63 void SbiScanner::UnlockColumn() 64 { 65 if( nColLock ) 66 nColLock--; 67 } 68 69 void SbiScanner::GenError( ErrCode code ) 70 { 71 if( GetSbData()->bBlockCompilerError ) 72 { 73 bAbort = true; 74 return; 75 } 76 if( !bError ) 77 { 78 bool bRes = true; 79 // report only one error per statement 80 bError = true; 81 if( pBasic ) 82 { 83 // in case of EXPECTED or UNEXPECTED it always refers 84 // to the last token, so take the Col1 over 85 sal_Int32 nc = nColLock ? nSavedCol1 : nCol1; 86 if ( code.anyOf( 87 ERRCODE_BASIC_EXPECTED, 88 ERRCODE_BASIC_UNEXPECTED, 89 ERRCODE_BASIC_SYMBOL_EXPECTED, 90 ERRCODE_BASIC_LABEL_EXPECTED) ) 91 { 92 nc = nCol1; 93 if( nc > nCol2 ) nCol2 = nc; 94 } 95 bRes = pBasic->CError( code, aError, nLine, nc, nCol2 ); 96 } 97 bAbort = bAbort || !bRes || ( code == ERRCODE_BASIC_NO_MEMORY || code == ERRCODE_BASIC_PROG_TOO_LARGE ); 98 } 99 nErrors++; 100 } 101 102 103 // used by SbiTokenizer::MayBeLabel() to detect a label 104 bool SbiScanner::DoesColonFollow() 105 { 106 if(nCol < aLine.getLength() && aLine[nCol] == ':') 107 { 108 ++nLineIdx; ++nCol; 109 return true; 110 } 111 else 112 return false; 113 } 114 115 // test for legal suffix 116 static SbxDataType GetSuffixType( sal_Unicode c ) 117 { 118 switch (c) 119 { 120 case '%': 121 return SbxINTEGER; 122 case '&': 123 return SbxLONG; 124 case '!': 125 return SbxSINGLE; 126 case '#': 127 return SbxDOUBLE; 128 case '@': 129 return SbxCURRENCY; 130 case '$': 131 return SbxSTRING; 132 default: 133 return SbxVARIANT; 134 } 135 } 136 137 // reading the next symbol into the variables aSym, nVal and eType 138 // return value is sal_False at EOF or errors 139 #define BUF_SIZE 80 140 141 void SbiScanner::scanAlphanumeric() 142 { 143 sal_Int32 n = nCol; 144 while(nCol < aLine.getLength() && (BasicCharClass::isAlphaNumeric(aLine[nCol], bCompatible) || aLine[nCol] == '_')) 145 { 146 ++nLineIdx; 147 ++nCol; 148 } 149 aSym = aLine.copy(n, nCol - n); 150 } 151 152 void SbiScanner::scanGoto() 153 { 154 sal_Int32 n = nCol; 155 while(n < aLine.getLength() && BasicCharClass::isWhitespace(aLine[n])) 156 ++n; 157 158 if(n + 1 < aLine.getLength()) 159 { 160 OUString aTemp = aLine.copy(n, 2); 161 if(aTemp.equalsIgnoreAsciiCase("to")) 162 { 163 aSym = "goto"; 164 nLineIdx += n + 2 - nCol; 165 nCol = n + 2; 166 } 167 } 168 } 169 170 bool SbiScanner::readLine() 171 { 172 if(nBufPos >= aBuf.getLength()) 173 return false; 174 175 sal_Int32 n = nBufPos; 176 sal_Int32 nLen = aBuf.getLength(); 177 178 while(n < nLen && aBuf[n] != '\r' && aBuf[n] != '\n') 179 ++n; 180 181 // Trim trailing whitespace 182 sal_Int32 nEnd = n; 183 while(nBufPos < nEnd && BasicCharClass::isWhitespace(aBuf[nEnd - 1])) 184 --nEnd; 185 186 aLine = aBuf.copy(nBufPos, nEnd - nBufPos); 187 188 // Fast-forward past the line ending 189 if(n + 1 < nLen && aBuf[n] == '\r' && aBuf[n + 1] == '\n') 190 n += 2; 191 else if(n < nLen) 192 ++n; 193 194 nBufPos = n; 195 nLineIdx = 0; 196 197 ++nLine; 198 nCol = nCol1 = nCol2 = 0; 199 nColLock = 0; 200 201 return true; 202 } 203 204 bool SbiScanner::NextSym() 205 { 206 // memorize for the EOLN-case 207 sal_Int32 nOldLine = nLine; 208 sal_Int32 nOldCol1 = nCol1; 209 sal_Int32 nOldCol2 = nCol2; 210 sal_Unicode buf[ BUF_SIZE ], *p = buf; 211 212 eScanType = SbxVARIANT; 213 aSym.clear(); 214 bHash = bSymbol = bNumber = bSpaces = false; 215 bool bCompilerDirective = false; 216 217 // read in line? 218 if (nLineIdx == -1) 219 { 220 if(!readLine()) 221 return false; 222 223 nOldLine = nLine; 224 nOldCol1 = nOldCol2 = 0; 225 } 226 227 const sal_Int32 nLineIdxScanStart = nLineIdx; 228 229 if(nCol < aLine.getLength() && BasicCharClass::isWhitespace(aLine[nCol])) 230 { 231 bSpaces = true; 232 while(nCol < aLine.getLength() && BasicCharClass::isWhitespace(aLine[nCol])) 233 { 234 ++nLineIdx; 235 ++nCol; 236 } 237 } 238 239 nCol1 = nCol; 240 241 // only blank line? 242 if(nCol >= aLine.getLength()) 243 goto eoln; 244 245 if( bPrevLineExtentsComment ) 246 goto PrevLineCommentLbl; 247 248 if(nCol < aLine.getLength() && aLine[nCol] == '#') 249 { 250 sal_Int32 nLineTempIdx = nLineIdx; 251 do 252 { 253 nLineTempIdx++; 254 } while (nLineTempIdx < aLine.getLength() && !BasicCharClass::isWhitespace(aLine[nLineTempIdx]) && aLine[nLineTempIdx] != '#'); 255 // leave it if it is a date literal - it will be handled later 256 if (nLineTempIdx >= aLine.getLength() || aLine[nLineTempIdx] != '#') 257 { 258 ++nLineIdx; 259 ++nCol; 260 //ignore compiler directives (# is first non-space character) 261 if (nOldCol2 == 0) 262 bCompilerDirective = true; 263 else 264 bHash = true; 265 } 266 } 267 268 // copy character if symbol 269 if(nCol < aLine.getLength() && (BasicCharClass::isAlpha(aLine[nCol], bCompatible) || aLine[nCol] == '_')) 270 { 271 // if there's nothing behind '_' , it's the end of a line! 272 if(nCol + 1 == aLine.getLength() && aLine[nCol] == '_') 273 { 274 // Note that nCol is not incremented here... 275 ++nLineIdx; 276 goto eoln; 277 } 278 279 bSymbol = true; 280 281 scanAlphanumeric(); 282 283 // Special handling for "go to" 284 if(nCol < aLine.getLength() && bCompatible && aSym.equalsIgnoreAsciiCase("go")) 285 scanGoto(); 286 287 // replace closing '_' by space when end of line is following 288 // (wrong line continuation otherwise) 289 if (nCol == aLine.getLength() && aLine[nCol - 1] == '_') 290 { 291 // We are going to modify a potentially shared string, so force 292 // a copy, so that aSym is not modified by the following operation 293 OUString aSymCopy( aSym.getStr(), aSym.getLength() ); 294 aSym = aSymCopy; 295 296 // HACK: modifying a potentially shared string here! 297 const_cast<sal_Unicode*>(aLine.getStr())[nLineIdx - 1] = ' '; 298 } 299 300 // type recognition? 301 // don't test the exclamation mark 302 // if there's a symbol behind it 303 else if((nCol >= aLine.getLength() || aLine[nCol] != '!') || 304 (nCol + 1 >= aLine.getLength() || !BasicCharClass::isAlpha(aLine[nCol + 1], bCompatible))) 305 { 306 if(nCol < aLine.getLength()) 307 { 308 SbxDataType t(GetSuffixType(aLine[nCol])); 309 if( t != SbxVARIANT ) 310 { 311 eScanType = t; 312 ++nLineIdx; 313 ++nCol; 314 } 315 } 316 } 317 } 318 319 // read in and convert if number 320 else if((nCol < aLine.getLength() && rtl::isAsciiDigit(aLine[nCol])) || 321 (nCol + 1 < aLine.getLength() && aLine[nCol] == '.' && rtl::isAsciiDigit(aLine[nCol + 1]))) 322 { 323 short exp = 0; 324 short dec = 0; 325 eScanType = SbxDOUBLE; 326 bool bScanError = false; 327 bool bBufOverflow = false; 328 // All this because of 'D' or 'd' floating point type, sigh... 329 while(!bScanError && nCol < aLine.getLength() && strchr("0123456789.DEde", aLine[nCol])) 330 { 331 // from 4.1.1996: buffer full? -> go on scanning empty 332 if( (p-buf) == (BUF_SIZE-1) ) 333 { 334 bBufOverflow = true; 335 ++nLineIdx; 336 ++nCol; 337 continue; 338 } 339 // point or exponent? 340 if(aLine[nCol] == '.') 341 { 342 if( ++dec > 1 ) 343 bScanError = true; 344 else 345 *p++ = '.'; 346 } 347 else if(strchr("DdEe", aLine[nCol])) 348 { 349 if (++exp > 1) 350 bScanError = true; 351 else 352 { 353 *p++ = 'E'; 354 if (nCol + 1 < aLine.getLength() && (aLine[nCol+1] == '+' || aLine[nCol+1] == '-')) 355 { 356 ++nLineIdx; 357 ++nCol; 358 if( (p-buf) == (BUF_SIZE-1) ) 359 { 360 bBufOverflow = true; 361 continue; 362 } 363 *p++ = aLine[nCol]; 364 } 365 } 366 } 367 else 368 { 369 *p++ = aLine[nCol]; 370 } 371 ++nLineIdx; 372 ++nCol; 373 } 374 *p = 0; 375 aSym = p; bNumber = true; 376 377 // For bad characters, scan and parse errors generate only one error. 378 ErrCode nError = ERRCODE_NONE; 379 if (bScanError) 380 { 381 --nLineIdx; 382 --nCol; 383 aError = OUString( aLine[nCol]); 384 nError = ERRCODE_BASIC_BAD_CHAR_IN_NUMBER; 385 } 386 387 rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok; 388 const sal_Unicode* pParseEnd = buf; 389 nVal = rtl_math_uStringToDouble( buf, buf+(p-buf), '.', ',', &eStatus, &pParseEnd ); 390 if (pParseEnd != buf+(p-buf)) 391 { 392 // e.g. "12e" or "12e+", or with bScanError "12d"+"E". 393 sal_Int32 nChars = buf+(p-buf) - pParseEnd; 394 nLineIdx -= nChars; 395 nCol -= nChars; 396 // For bScanError, nLineIdx and nCol were already decremented, just 397 // add that character to the parse end. 398 if (bScanError) 399 ++nChars; 400 // Copy error position from original string, not the buffer 401 // replacement where "12dE" => "12EE". 402 aError = aLine.copy( nCol, nChars); 403 nError = ERRCODE_BASIC_BAD_CHAR_IN_NUMBER; 404 } 405 else if (eStatus != rtl_math_ConversionStatus_Ok) 406 { 407 // Keep the scan error and character at position, if any. 408 if (!nError) 409 nError = ERRCODE_BASIC_MATH_OVERFLOW; 410 } 411 412 if (nError) 413 GenError( nError ); 414 415 if( !dec && !exp ) 416 { 417 if( nVal >= SbxMININT && nVal <= SbxMAXINT ) 418 eScanType = SbxINTEGER; 419 else if( nVal >= SbxMINLNG && nVal <= SbxMAXLNG ) 420 eScanType = SbxLONG; 421 } 422 423 if( bBufOverflow ) 424 GenError( ERRCODE_BASIC_MATH_OVERFLOW ); 425 426 // type recognition? 427 if( nCol < aLine.getLength() ) 428 { 429 SbxDataType t(GetSuffixType(aLine[nCol])); 430 if( t != SbxVARIANT ) 431 { 432 eScanType = t; 433 ++nLineIdx; 434 ++nCol; 435 } 436 } 437 } 438 439 // Hex/octal number? Read in and convert: 440 else if(aLine.getLength() - nCol > 1 && aLine[nCol] == '&') 441 { 442 ++nLineIdx; ++nCol; 443 sal_Unicode base = 16; 444 sal_Unicode xch = aLine[nCol]; 445 ++nLineIdx; ++nCol; 446 switch( rtl::toAsciiUpperCase( xch ) ) 447 { 448 case 'O': 449 base = 8; 450 break; 451 case 'H': 452 break; 453 default : 454 // treated as an operator 455 --nLineIdx; --nCol; nCol1 = nCol-1; 456 aSym = "&"; 457 return true; 458 } 459 bNumber = true; 460 // Hex literals are signed Integers ( as defined by basic 461 // e.g. -2,147,483,648 through 2,147,483,647 (signed) 462 sal_uInt64 lu = 0; 463 bool bOverflow = false; 464 while(nCol < aLine.getLength() && BasicCharClass::isAlphaNumeric(aLine[nCol], false)) 465 { 466 sal_Unicode ch = rtl::toAsciiUpperCase(aLine[nCol]); 467 ++nLineIdx; ++nCol; 468 if( ((base == 16 ) && rtl::isAsciiHexDigit( ch ) ) || 469 ((base == 8) && rtl::isAsciiOctalDigit( ch ))) 470 { 471 int i = ch - '0'; 472 if( i > 9 ) i -= 7; 473 lu = ( lu * base ) + i; 474 if( lu > SAL_MAX_UINT32 ) 475 { 476 bOverflow = true; 477 } 478 } 479 else 480 { 481 aError = OUString(ch); 482 GenError( ERRCODE_BASIC_BAD_CHAR_IN_NUMBER ); 483 } 484 } 485 if(nCol < aLine.getLength() && aLine[nCol] == '&') 486 { 487 ++nLineIdx; 488 ++nCol; 489 } 490 // tdf#62326 - If the value of the hex string lies within the range of 0x8000 (SbxMAXINT + 1) 491 // and 0xFFFF (SbxMAXUINT) inclusive, cast the value to 16 bit in order to get 492 // signed integers, e.g., SbxMININT through SbxMAXINT 493 sal_Int32 ls = (lu > SbxMAXINT && lu <= SbxMAXUINT) ? static_cast<sal_Int16>(lu) : static_cast<sal_Int32>(lu); 494 nVal = static_cast<double>(ls); 495 eScanType = ( ls >= SbxMININT && ls <= SbxMAXINT ) ? SbxINTEGER : SbxLONG; 496 if( bOverflow ) 497 GenError( ERRCODE_BASIC_MATH_OVERFLOW ); 498 } 499 500 // Strings: 501 else if (nLineIdx < aLine.getLength() && (aLine[nLineIdx] == '"' || aLine[nLineIdx] == '[')) 502 { 503 sal_Unicode cSep = aLine[nLineIdx]; 504 if( cSep == '[' ) 505 { 506 bSymbol = true; 507 cSep = ']'; 508 } 509 sal_Int32 n = nCol + 1; 510 while (nLineIdx < aLine.getLength()) 511 { 512 do 513 { 514 nLineIdx++; 515 nCol++; 516 } 517 while (nLineIdx < aLine.getLength() && (aLine[nLineIdx] != cSep)); 518 if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == cSep) 519 { 520 nLineIdx++; nCol++; 521 if (nLineIdx >= aLine.getLength() || aLine[nLineIdx] != cSep || cSep == ']') 522 { 523 // If VBA Interop then doesn't eat the [] chars 524 if ( cSep == ']' && bVBASupportOn ) 525 aSym = aLine.copy( n - 1, nCol - n + 1); 526 else 527 aSym = aLine.copy( n, nCol - n - 1 ); 528 // get out duplicate string delimiters 529 OUStringBuffer aSymBuf(aSym.getLength()); 530 for ( sal_Int32 i = 0, len = aSym.getLength(); i < len; ++i ) 531 { 532 aSymBuf.append( aSym[i] ); 533 if ( aSym[i] == cSep && ( i+1 < len ) && aSym[i+1] == cSep ) 534 ++i; 535 } 536 aSym = aSymBuf.makeStringAndClear(); 537 if( cSep != ']' ) 538 eScanType = SbxSTRING; 539 break; 540 } 541 } 542 else 543 { 544 aError = OUString(cSep); 545 GenError( ERRCODE_BASIC_EXPECTED ); 546 } 547 } 548 } 549 550 // Date: 551 else if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == '#') 552 { 553 sal_Int32 n = nCol + 1; 554 do 555 { 556 nLineIdx++; 557 nCol++; 558 } 559 while (nLineIdx < aLine.getLength() && (aLine[nLineIdx] != '#')); 560 if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == '#') 561 { 562 nLineIdx++; nCol++; 563 aSym = aLine.copy( n, nCol - n - 1 ); 564 565 // parse date literal 566 std::shared_ptr<SvNumberFormatter> pFormatter; 567 if (GetSbData()->pInst) 568 { 569 pFormatter = GetSbData()->pInst->GetNumberFormatter(); 570 } 571 else 572 { 573 sal_uInt32 nDummy; 574 pFormatter = SbiInstance::PrepareNumberFormatter( nDummy, nDummy, nDummy ); 575 } 576 sal_uInt32 nIndex = pFormatter->GetStandardIndex( LANGUAGE_ENGLISH_US); 577 bool bSuccess = pFormatter->IsNumberFormat(aSym, nIndex, nVal); 578 if( bSuccess ) 579 { 580 SvNumFormatType nType_ = pFormatter->GetType(nIndex); 581 if( !(nType_ & SvNumFormatType::DATE) ) 582 bSuccess = false; 583 } 584 585 if (!bSuccess) 586 GenError( ERRCODE_BASIC_CONVERSION ); 587 588 bNumber = true; 589 eScanType = SbxDOUBLE; 590 } 591 else 592 { 593 aError = OUString('#'); 594 GenError( ERRCODE_BASIC_EXPECTED ); 595 } 596 } 597 // invalid characters: 598 else if (nLineIdx < aLine.getLength() && aLine[nLineIdx] >= 0x7F) 599 { 600 GenError( ERRCODE_BASIC_SYNTAX ); nLineIdx++; nCol++; 601 } 602 // other groups: 603 else 604 { 605 sal_Int32 n = 1; 606 auto nChar = nLineIdx < aLine.getLength() ? aLine[nLineIdx] : 0; 607 ++nLineIdx; 608 if (nLineIdx < aLine.getLength()) 609 { 610 switch (nChar) 611 { 612 case '<': if( aLine[nLineIdx] == '>' || aLine[nLineIdx] == '=' ) n = 2; break; 613 case '>': if( aLine[nLineIdx] == '=' ) n = 2; break; 614 case ':': if( aLine[nLineIdx] == '=' ) n = 2; break; 615 } 616 } 617 aSym = aLine.copy(nCol, std::min(n, aLine.getLength() - nCol)); 618 nLineIdx += n-1; nCol = nCol + n; 619 } 620 621 nCol2 = nCol-1; 622 623 PrevLineCommentLbl: 624 625 if( bPrevLineExtentsComment || (eScanType != SbxSTRING && 626 ( bCompilerDirective || 627 aSym.startsWith("'") || 628 aSym.equalsIgnoreAsciiCase( "REM" ) ) ) ) 629 { 630 bPrevLineExtentsComment = false; 631 aSym = "REM"; 632 sal_Int32 nLen = aLine.getLength() - nLineIdx; 633 if( bCompatible && aLine[nLineIdx + nLen - 1] == '_' && aLine[nLineIdx + nLen - 2] == ' ' ) 634 bPrevLineExtentsComment = true; 635 nCol2 = nCol2 + nLen; 636 nLineIdx = -1; 637 } 638 639 if (nLineIdx == nLineIdxScanStart) 640 { 641 GenError( ERRCODE_BASIC_SYMBOL_EXPECTED ); 642 return false; 643 } 644 645 return true; 646 647 648 eoln: 649 if( nCol && aLine[--nLineIdx] == '_' ) 650 { 651 nLineIdx = -1; 652 bool bRes = NextSym(); 653 if( aSym.startsWith(".") ) 654 { 655 // object _ 656 // .Method 657 // ^^^ <- spaces is legal in MSO VBA 658 bSpaces = false; 659 } 660 return bRes; 661 } 662 else 663 { 664 nLineIdx = -1; 665 nLine = nOldLine; 666 nCol1 = nOldCol1; 667 nCol2 = nOldCol2; 668 aSym = "\n"; 669 nColLock = 0; 670 return true; 671 } 672 } 673 674 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ 675
