1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 2 /* 3 * This file is part of the LibreOffice project. 4 * 5 * This Source Code Form is subject to the terms of the Mozilla Public 6 * License, v. 2.0. If a copy of the MPL was not distributed with this 7 * file, You can obtain one at http://mozilla.org/MPL/2.0/. 8 * 9 * This file incorporates work covered by the following license notice: 10 * 11 * Licensed to the Apache Software Foundation (ASF) under one or more 12 * contributor license agreements. See the NOTICE file distributed 13 * with this work for additional information regarding copyright 14 * ownership. The ASF licenses this file to you under the Apache 15 * License, Version 2.0 (the "License"); you may not use this file 16 * except in compliance with the License. You may obtain a copy of 17 * the License at http://www.apache.org/licenses/LICENSE-2.0 . 18 */ 19 20 #include <basiccharclass.hxx> 21 #include <scanner.hxx> 22 #include <sbintern.hxx> 23 #include <runtime.hxx> 24 25 #include <basic/sberrors.hxx> 26 #include <i18nlangtag/lang.h> 27 #include <svl/numformat.hxx> 28 #include <svl/zforlist.hxx> 29 #include <rtl/character.hxx> 30 31 SbiScanner::SbiScanner(const OUString& rBuf, StarBASIC* p) 32 : aBuf(rBuf) 33 , nLineIdx(-1) 34 , nSaveLineIdx(-1) 35 , pBasic(p) 36 , eScanType(SbxVARIANT) 37 , nVal(0) 38 , nSavedCol1(0) 39 , nCol(0) 40 , nErrors(0) 41 , nColLock(0) 42 , nBufPos(0) 43 , nLine(0) 44 , nCol1(0) 45 , nCol2(0) 46 , bSymbol(false) 47 , bNumber(false) 48 , bSpaces(false) 49 , bAbort(false) 50 , bHash(true) 51 , bError(false) 52 , bCompatible(false) 53 , bVBASupportOn(false) 54 , bPrevLineExtentsComment(false) 55 , bInStatement(false) 56 { 57 } 58 59 void SbiScanner::LockColumn() 60 { 61 if( !nColLock++ ) 62 nSavedCol1 = nCol1; 63 } 64 65 void SbiScanner::UnlockColumn() 66 { 67 if( nColLock ) 68 nColLock--; 69 } 70 71 void SbiScanner::GenError( ErrCode code ) 72 { 73 if( GetSbData()->bBlockCompilerError ) 74 { 75 bAbort = true; 76 return; 77 } 78 if( !bError ) 79 { 80 bool bRes = true; 81 // report only one error per statement 82 bError = true; 83 if( pBasic ) 84 { 85 // in case of EXPECTED or UNEXPECTED it always refers 86 // to the last token, so take the Col1 over 87 sal_Int32 nc = nColLock ? nSavedCol1 : nCol1; 88 if ( code.anyOf( 89 ERRCODE_BASIC_EXPECTED, 90 ERRCODE_BASIC_UNEXPECTED, 91 ERRCODE_BASIC_SYMBOL_EXPECTED, 92 ERRCODE_BASIC_LABEL_EXPECTED) ) 93 { 94 nc = nCol1; 95 if( nc > nCol2 ) nCol2 = nc; 96 } 97 bRes = pBasic->CError( code, aError, nLine, nc, nCol2 ); 98 } 99 bAbort = bAbort || !bRes || ( code == ERRCODE_BASIC_NO_MEMORY || code == ERRCODE_BASIC_PROG_TOO_LARGE ); 100 } 101 nErrors++; 102 } 103 104 105 // used by SbiTokenizer::MayBeLabel() to detect a label 106 bool SbiScanner::DoesColonFollow() 107 { 108 if(nCol < aLine.getLength() && aLine[nCol] == ':') 109 { 110 ++nLineIdx; ++nCol; 111 return true; 112 } 113 else 114 return false; 115 } 116 117 // test for legal suffix 118 static SbxDataType GetSuffixType( sal_Unicode c ) 119 { 120 switch (c) 121 { 122 case '%': 123 return SbxINTEGER; 124 case '&': 125 return SbxLONG; 126 case '!': 127 return SbxSINGLE; 128 case '#': 129 return SbxDOUBLE; 130 case '@': 131 return SbxCURRENCY; 132 case '$': 133 return SbxSTRING; 134 default: 135 return SbxVARIANT; 136 } 137 } 138 139 // reading the next symbol into the variables aSym, nVal and eType 140 // return value is sal_False at EOF or errors 141 #define BUF_SIZE 80 142 143 void SbiScanner::scanAlphanumeric() 144 { 145 sal_Int32 n = nCol; 146 while(nCol < aLine.getLength() && (BasicCharClass::isAlphaNumeric(aLine[nCol], bCompatible) || aLine[nCol] == '_')) 147 { 148 ++nLineIdx; 149 ++nCol; 150 } 151 aSym = aLine.copy(n, nCol - n); 152 } 153 154 void SbiScanner::scanGoto() 155 { 156 sal_Int32 n = nCol; 157 while(n < aLine.getLength() && BasicCharClass::isWhitespace(aLine[n])) 158 ++n; 159 160 if(n + 1 < aLine.getLength()) 161 { 162 OUString aTemp = aLine.copy(n, 2); 163 if(aTemp.equalsIgnoreAsciiCase("to")) 164 { 165 aSym = "goto"; 166 nLineIdx += n + 2 - nCol; 167 nCol = n + 2; 168 } 169 } 170 } 171 172 bool SbiScanner::readLine() 173 { 174 if(nBufPos >= aBuf.getLength()) 175 return false; 176 177 sal_Int32 n = nBufPos; 178 sal_Int32 nLen = aBuf.getLength(); 179 180 while(n < nLen && aBuf[n] != '\r' && aBuf[n] != '\n') 181 ++n; 182 183 // Trim trailing whitespace 184 sal_Int32 nEnd = n; 185 while(nBufPos < nEnd && BasicCharClass::isWhitespace(aBuf[nEnd - 1])) 186 --nEnd; 187 188 aLine = aBuf.copy(nBufPos, nEnd - nBufPos); 189 190 // Fast-forward past the line ending 191 if(n + 1 < nLen && aBuf[n] == '\r' && aBuf[n + 1] == '\n') 192 n += 2; 193 else if(n < nLen) 194 ++n; 195 196 nBufPos = n; 197 nLineIdx = 0; 198 199 ++nLine; 200 nCol = nCol1 = nCol2 = 0; 201 nColLock = 0; 202 203 return true; 204 } 205 206 bool SbiScanner::NextSym() 207 { 208 // memorize for the EOLN-case 209 sal_Int32 nOldLine = nLine; 210 sal_Int32 nOldCol1 = nCol1; 211 sal_Int32 nOldCol2 = nCol2; 212 sal_Unicode buf[ BUF_SIZE ], *p = buf; 213 214 eScanType = SbxVARIANT; 215 aSym.clear(); 216 bHash = bSymbol = bNumber = bSpaces = false; 217 bool bCompilerDirective = false; 218 219 // read in line? 220 if (nLineIdx == -1) 221 { 222 if(!readLine()) 223 return false; 224 225 nOldLine = nLine; 226 nOldCol1 = nOldCol2 = 0; 227 } 228 229 const sal_Int32 nLineIdxScanStart = nLineIdx; 230 231 if(nCol < aLine.getLength() && BasicCharClass::isWhitespace(aLine[nCol])) 232 { 233 bSpaces = true; 234 while(nCol < aLine.getLength() && BasicCharClass::isWhitespace(aLine[nCol])) 235 { 236 ++nLineIdx; 237 ++nCol; 238 } 239 } 240 241 nCol1 = nCol; 242 243 // only blank line? 244 if(nCol >= aLine.getLength()) 245 goto eoln; 246 247 if( bPrevLineExtentsComment ) 248 goto PrevLineCommentLbl; 249 250 if(nCol < aLine.getLength() && aLine[nCol] == '#') 251 { 252 sal_Int32 nLineTempIdx = nLineIdx; 253 do 254 { 255 nLineTempIdx++; 256 } while (nLineTempIdx < aLine.getLength() && !BasicCharClass::isWhitespace(aLine[nLineTempIdx]) 257 && aLine[nLineTempIdx] != '#' && aLine[nLineTempIdx] != ','); 258 // leave it if it is a date literal - it will be handled later 259 if (nLineTempIdx >= aLine.getLength() || aLine[nLineTempIdx] != '#') 260 { 261 ++nLineIdx; 262 ++nCol; 263 //ignore compiler directives (# is first non-space character) 264 if (nOldCol2 == 0) 265 bCompilerDirective = true; 266 else 267 bHash = true; 268 } 269 } 270 271 // copy character if symbol 272 if(nCol < aLine.getLength() && (BasicCharClass::isAlpha(aLine[nCol], bCompatible) || aLine[nCol] == '_')) 273 { 274 // if there's nothing behind '_' , it's the end of a line! 275 if(nCol + 1 == aLine.getLength() && aLine[nCol] == '_') 276 { 277 // Note that nCol is not incremented here... 278 ++nLineIdx; 279 goto eoln; 280 } 281 282 bSymbol = true; 283 284 scanAlphanumeric(); 285 286 // Special handling for "go to" 287 if(nCol < aLine.getLength() && bCompatible && aSym.equalsIgnoreAsciiCase("go")) 288 scanGoto(); 289 290 // replace closing '_' by space when end of line is following 291 // (wrong line continuation otherwise) 292 if (nCol == aLine.getLength() && aLine[nCol - 1] == '_') 293 { 294 // We are going to modify a potentially shared string, so force 295 // a copy, so that aSym is not modified by the following operation 296 OUString aSymCopy( aSym.getStr(), aSym.getLength() ); 297 aSym = aSymCopy; 298 299 // HACK: modifying a potentially shared string here! 300 const_cast<sal_Unicode*>(aLine.getStr())[nLineIdx - 1] = ' '; 301 } 302 303 // type recognition? 304 // don't test the exclamation mark 305 // if there's a symbol behind it 306 else if((nCol >= aLine.getLength() || aLine[nCol] != '!') || 307 (nCol + 1 >= aLine.getLength() || !BasicCharClass::isAlpha(aLine[nCol + 1], bCompatible))) 308 { 309 if(nCol < aLine.getLength()) 310 { 311 SbxDataType t(GetSuffixType(aLine[nCol])); 312 if( t != SbxVARIANT ) 313 { 314 eScanType = t; 315 ++nLineIdx; 316 ++nCol; 317 } 318 } 319 } 320 } 321 322 // read in and convert if number 323 else if((nCol < aLine.getLength() && rtl::isAsciiDigit(aLine[nCol])) || 324 (nCol + 1 < aLine.getLength() && aLine[nCol] == '.' && rtl::isAsciiDigit(aLine[nCol + 1]))) 325 { 326 short exp = 0; 327 short dec = 0; 328 eScanType = SbxDOUBLE; 329 bool bScanError = false; 330 bool bBufOverflow = false; 331 // All this because of 'D' or 'd' floating point type, sigh... 332 while(!bScanError && nCol < aLine.getLength() && strchr("0123456789.DEde", aLine[nCol])) 333 { 334 // from 4.1.1996: buffer full? -> go on scanning empty 335 if( (p-buf) == (BUF_SIZE-1) ) 336 { 337 bBufOverflow = true; 338 ++nLineIdx; 339 ++nCol; 340 continue; 341 } 342 // point or exponent? 343 if(aLine[nCol] == '.') 344 { 345 if( ++dec > 1 ) 346 bScanError = true; 347 else 348 *p++ = '.'; 349 } 350 else if(strchr("DdEe", aLine[nCol])) 351 { 352 if (++exp > 1) 353 bScanError = true; 354 else 355 { 356 *p++ = 'E'; 357 if (nCol + 1 < aLine.getLength() && (aLine[nCol+1] == '+' || aLine[nCol+1] == '-')) 358 { 359 ++nLineIdx; 360 ++nCol; 361 if( (p-buf) == (BUF_SIZE-1) ) 362 { 363 bBufOverflow = true; 364 continue; 365 } 366 *p++ = aLine[nCol]; 367 } 368 } 369 } 370 else 371 { 372 *p++ = aLine[nCol]; 373 } 374 ++nLineIdx; 375 ++nCol; 376 } 377 *p = 0; 378 aSym = p; bNumber = true; 379 380 // For bad characters, scan and parse errors generate only one error. 381 ErrCode nError = ERRCODE_NONE; 382 if (bScanError) 383 { 384 --nLineIdx; 385 --nCol; 386 aError = OUString( aLine[nCol]); 387 nError = ERRCODE_BASIC_BAD_CHAR_IN_NUMBER; 388 } 389 390 rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok; 391 const sal_Unicode* pParseEnd = buf; 392 nVal = rtl_math_uStringToDouble( buf, buf+(p-buf), '.', ',', &eStatus, &pParseEnd ); 393 if (pParseEnd != buf+(p-buf)) 394 { 395 // e.g. "12e" or "12e+", or with bScanError "12d"+"E". 396 sal_Int32 nChars = buf+(p-buf) - pParseEnd; 397 nLineIdx -= nChars; 398 nCol -= nChars; 399 // For bScanError, nLineIdx and nCol were already decremented, just 400 // add that character to the parse end. 401 if (bScanError) 402 ++nChars; 403 // Copy error position from original string, not the buffer 404 // replacement where "12dE" => "12EE". 405 aError = aLine.copy( nCol, nChars); 406 nError = ERRCODE_BASIC_BAD_CHAR_IN_NUMBER; 407 } 408 else if (eStatus != rtl_math_ConversionStatus_Ok) 409 { 410 // Keep the scan error and character at position, if any. 411 if (!nError) 412 nError = ERRCODE_BASIC_MATH_OVERFLOW; 413 } 414 415 if (nError) 416 GenError( nError ); 417 418 if( !dec && !exp ) 419 { 420 if( nVal >= SbxMININT && nVal <= SbxMAXINT ) 421 eScanType = SbxINTEGER; 422 else if( nVal >= SbxMINLNG && nVal <= SbxMAXLNG ) 423 eScanType = SbxLONG; 424 } 425 426 if( bBufOverflow ) 427 GenError( ERRCODE_BASIC_MATH_OVERFLOW ); 428 429 // type recognition? 430 if( nCol < aLine.getLength() ) 431 { 432 SbxDataType t(GetSuffixType(aLine[nCol])); 433 if( t != SbxVARIANT ) 434 { 435 eScanType = t; 436 ++nLineIdx; 437 ++nCol; 438 } 439 // tdf#130476 - don't allow String trailing data type character with numbers 440 if ( t == SbxSTRING ) 441 { 442 GenError( ERRCODE_BASIC_SYNTAX ); 443 } 444 } 445 } 446 447 // Hex/octal number? Read in and convert: 448 else if(aLine.getLength() - nCol > 1 && aLine[nCol] == '&') 449 { 450 ++nLineIdx; ++nCol; 451 sal_Unicode base = 16; 452 sal_Unicode xch = aLine[nCol]; 453 ++nLineIdx; ++nCol; 454 switch( rtl::toAsciiUpperCase( xch ) ) 455 { 456 case 'O': 457 base = 8; 458 break; 459 case 'H': 460 break; 461 default : 462 // treated as an operator 463 --nLineIdx; --nCol; nCol1 = nCol-1; 464 aSym = "&"; 465 return true; 466 } 467 bNumber = true; 468 // Hex literals are signed Integers ( as defined by basic 469 // e.g. -2,147,483,648 through 2,147,483,647 (signed) 470 sal_uInt64 lu = 0; 471 bool bOverflow = false; 472 while(nCol < aLine.getLength() && BasicCharClass::isAlphaNumeric(aLine[nCol], false)) 473 { 474 sal_Unicode ch = rtl::toAsciiUpperCase(aLine[nCol]); 475 ++nLineIdx; ++nCol; 476 if( ((base == 16 ) && rtl::isAsciiHexDigit( ch ) ) || 477 ((base == 8) && rtl::isAsciiOctalDigit( ch ))) 478 { 479 int i = ch - '0'; 480 if( i > 9 ) i -= 7; 481 lu = ( lu * base ) + i; 482 if( lu > SAL_MAX_UINT32 ) 483 { 484 bOverflow = true; 485 } 486 } 487 else 488 { 489 aError = OUString(ch); 490 GenError( ERRCODE_BASIC_BAD_CHAR_IN_NUMBER ); 491 } 492 } 493 494 // tdf#130476 - take into account trailing data type characters 495 if( nCol < aLine.getLength() ) 496 { 497 SbxDataType t(GetSuffixType(aLine[nCol])); 498 if( t != SbxVARIANT ) 499 { 500 eScanType = t; 501 ++nLineIdx; 502 ++nCol; 503 } 504 // tdf#130476 - don't allow String trailing data type character with numbers 505 if ( t == SbxSTRING ) 506 { 507 GenError( ERRCODE_BASIC_SYNTAX ); 508 } 509 } 510 511 // tdf#130476 - take into account trailing data type characters 512 switch ( eScanType ) 513 { 514 case SbxINTEGER: 515 nVal = static_cast<double>( static_cast<sal_Int16>(lu) ); 516 if ( lu > SbxMAXUINT ) 517 { 518 bOverflow = true; 519 } 520 break; 521 case SbxLONG: nVal = static_cast<double>( static_cast<sal_Int32>(lu) ); break; 522 case SbxVARIANT: 523 { 524 // tdf#62326 - If the value of the hex string without explicit type character lies within 525 // the range of 0x8000 (SbxMAXINT + 1) and 0xFFFF (SbxMAXUINT) inclusive, cast the value 526 // to 16 bit in order to get signed integers, e.g., SbxMININT through SbxMAXINT 527 sal_Int32 ls = (lu > SbxMAXINT && lu <= SbxMAXUINT) ? static_cast<sal_Int16>(lu) : static_cast<sal_Int32>(lu); 528 eScanType = ( ls >= SbxMININT && ls <= SbxMAXINT ) ? SbxINTEGER : SbxLONG; 529 nVal = static_cast<double>(ls); 530 break; 531 } 532 default: 533 nVal = static_cast<double>(lu); 534 break; 535 } 536 if( bOverflow ) 537 GenError( ERRCODE_BASIC_MATH_OVERFLOW ); 538 } 539 540 // Strings: 541 else if (nLineIdx < aLine.getLength() && (aLine[nLineIdx] == '"' || aLine[nLineIdx] == '[')) 542 { 543 sal_Unicode cSep = aLine[nLineIdx]; 544 if( cSep == '[' ) 545 { 546 bSymbol = true; 547 cSep = ']'; 548 } 549 sal_Int32 n = nCol + 1; 550 while (nLineIdx < aLine.getLength()) 551 { 552 do 553 { 554 nLineIdx++; 555 nCol++; 556 } 557 while (nLineIdx < aLine.getLength() && (aLine[nLineIdx] != cSep)); 558 if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == cSep) 559 { 560 nLineIdx++; nCol++; 561 if (nLineIdx >= aLine.getLength() || aLine[nLineIdx] != cSep || cSep == ']') 562 { 563 // If VBA Interop then doesn't eat the [] chars 564 if ( cSep == ']' && bVBASupportOn ) 565 aSym = aLine.copy( n - 1, nCol - n + 1); 566 else 567 aSym = aLine.copy( n, nCol - n - 1 ); 568 // get out duplicate string delimiters 569 OUStringBuffer aSymBuf(aSym.getLength()); 570 for ( sal_Int32 i = 0, len = aSym.getLength(); i < len; ++i ) 571 { 572 aSymBuf.append( aSym[i] ); 573 if ( aSym[i] == cSep && ( i+1 < len ) && aSym[i+1] == cSep ) 574 ++i; 575 } 576 aSym = aSymBuf.makeStringAndClear(); 577 if( cSep != ']' ) 578 eScanType = SbxSTRING; 579 break; 580 } 581 } 582 else 583 { 584 aError = OUString(cSep); 585 GenError( ERRCODE_BASIC_EXPECTED ); 586 } 587 } 588 } 589 590 // Date: 591 else if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == '#') 592 { 593 sal_Int32 n = nCol + 1; 594 do 595 { 596 nLineIdx++; 597 nCol++; 598 } 599 while (nLineIdx < aLine.getLength() && (aLine[nLineIdx] != '#')); 600 if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == '#') 601 { 602 nLineIdx++; nCol++; 603 aSym = aLine.copy( n, nCol - n - 1 ); 604 605 // parse date literal 606 std::shared_ptr<SvNumberFormatter> pFormatter; 607 if (GetSbData()->pInst) 608 { 609 pFormatter = GetSbData()->pInst->GetNumberFormatter(); 610 } 611 else 612 { 613 sal_uInt32 nDummy; 614 pFormatter = SbiInstance::PrepareNumberFormatter( nDummy, nDummy, nDummy ); 615 } 616 sal_uInt32 nIndex = pFormatter->GetStandardIndex( LANGUAGE_ENGLISH_US); 617 bool bSuccess = pFormatter->IsNumberFormat(aSym, nIndex, nVal); 618 if( bSuccess ) 619 { 620 SvNumFormatType nType_ = pFormatter->GetType(nIndex); 621 if( !(nType_ & SvNumFormatType::DATE) ) 622 bSuccess = false; 623 } 624 625 if (!bSuccess) 626 GenError( ERRCODE_BASIC_CONVERSION ); 627 628 bNumber = true; 629 eScanType = SbxDOUBLE; 630 } 631 else 632 { 633 aError = OUString('#'); 634 GenError( ERRCODE_BASIC_EXPECTED ); 635 } 636 } 637 // invalid characters: 638 else if (nLineIdx < aLine.getLength() && aLine[nLineIdx] >= 0x7F) 639 { 640 GenError( ERRCODE_BASIC_SYNTAX ); nLineIdx++; nCol++; 641 } 642 // other groups: 643 else 644 { 645 sal_Int32 n = 1; 646 auto nChar = nLineIdx < aLine.getLength() ? aLine[nLineIdx] : 0; 647 ++nLineIdx; 648 if (nLineIdx < aLine.getLength()) 649 { 650 switch (nChar) 651 { 652 case '<': if( aLine[nLineIdx] == '>' || aLine[nLineIdx] == '=' ) n = 2; break; 653 case '>': if( aLine[nLineIdx] == '=' ) n = 2; break; 654 case ':': if( aLine[nLineIdx] == '=' ) n = 2; break; 655 } 656 } 657 aSym = aLine.copy(nCol, std::min(n, aLine.getLength() - nCol)); 658 nLineIdx += n-1; nCol = nCol + n; 659 } 660 661 nCol2 = nCol-1; 662 663 PrevLineCommentLbl: 664 665 if( bPrevLineExtentsComment || (eScanType != SbxSTRING && 666 ( bCompilerDirective || 667 aSym.startsWith("'") || 668 aSym.equalsIgnoreAsciiCase( "REM" ) ) ) ) 669 { 670 bPrevLineExtentsComment = false; 671 aSym = "REM"; 672 sal_Int32 nLen = aLine.getLength() - nLineIdx; 673 if( bCompatible && aLine[nLineIdx + nLen - 1] == '_' && aLine[nLineIdx + nLen - 2] == ' ' ) 674 bPrevLineExtentsComment = true; 675 nCol2 = nCol2 + nLen; 676 nLineIdx = -1; 677 } 678 679 if (nLineIdx == nLineIdxScanStart) 680 { 681 GenError( ERRCODE_BASIC_SYMBOL_EXPECTED ); 682 return false; 683 } 684 685 return true; 686 687 688 eoln: 689 if( nCol && aLine[--nLineIdx] == '_' ) 690 { 691 nLineIdx = -1; 692 bool bRes = NextSym(); 693 if( aSym.startsWith(".") ) 694 { 695 // object _ 696 // .Method 697 // ^^^ <- spaces is legal in MSO VBA 698 bSpaces = false; 699 } 700 return bRes; 701 } 702 else 703 { 704 nLineIdx = -1; 705 nLine = nOldLine; 706 nCol1 = nOldCol1; 707 nCol2 = nOldCol2; 708 aSym = "\n"; 709 nColLock = 0; 710 return true; 711 } 712 } 713 714 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ 715
