xref: /core/basic/source/comp/scanner.cxx (revision ef38b9af)
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <basiccharclass.hxx>
21 #include <scanner.hxx>
22 #include <sbintern.hxx>
23 #include <runtime.hxx>
24 
25 #include <basic/sberrors.hxx>
26 #include <i18nlangtag/lang.h>
27 #include <svl/numformat.hxx>
28 #include <svl/zforlist.hxx>
29 #include <rtl/character.hxx>
30 
31 SbiScanner::SbiScanner(const OUString& rBuf, StarBASIC* p)
32     : aBuf(rBuf)
33     , nLineIdx(-1)
34     , nSaveLineIdx(-1)
35     , pBasic(p)
36     , eScanType(SbxVARIANT)
37     , nVal(0)
38     , nSavedCol1(0)
39     , nCol(0)
40     , nErrors(0)
41     , nColLock(0)
42     , nBufPos(0)
43     , nLine(0)
44     , nCol1(0)
45     , nCol2(0)
46     , bSymbol(false)
47     , bNumber(false)
48     , bSpaces(false)
49     , bAbort(false)
50     , bHash(true)
51     , bError(false)
52     , bCompatible(false)
53     , bVBASupportOn(false)
54     , bPrevLineExtentsComment(false)
55     , bInStatement(false)
56 {
57 }
58 
59 void SbiScanner::LockColumn()
60 {
61     if( !nColLock++ )
62         nSavedCol1 = nCol1;
63 }
64 
65 void SbiScanner::UnlockColumn()
66 {
67     if( nColLock )
68         nColLock--;
69 }
70 
71 void SbiScanner::GenError( ErrCode code )
72 {
73     if( GetSbData()->bBlockCompilerError )
74     {
75         bAbort = true;
76         return;
77     }
78     if( !bError )
79     {
80         bool bRes = true;
81         // report only one error per statement
82         bError = true;
83         if( pBasic )
84         {
85             // in case of EXPECTED or UNEXPECTED it always refers
86             // to the last token, so take the Col1 over
87             sal_Int32 nc = nColLock ? nSavedCol1 : nCol1;
88             if ( code.anyOf(
89                     ERRCODE_BASIC_EXPECTED,
90                     ERRCODE_BASIC_UNEXPECTED,
91                     ERRCODE_BASIC_SYMBOL_EXPECTED,
92                     ERRCODE_BASIC_LABEL_EXPECTED) )
93             {
94                     nc = nCol1;
95                     if( nc > nCol2 ) nCol2 = nc;
96             }
97             bRes = pBasic->CError( code, aError, nLine, nc, nCol2 );
98         }
99         bAbort = bAbort || !bRes  || ( code == ERRCODE_BASIC_NO_MEMORY || code == ERRCODE_BASIC_PROG_TOO_LARGE );
100     }
101     nErrors++;
102 }
103 
104 
105 // used by SbiTokenizer::MayBeLabel() to detect a label
106 bool SbiScanner::DoesColonFollow()
107 {
108     if(nCol < aLine.getLength() && aLine[nCol] == ':')
109     {
110         ++nLineIdx; ++nCol;
111         return true;
112     }
113     else
114         return false;
115 }
116 
117 // test for legal suffix
118 static SbxDataType GetSuffixType( sal_Unicode c )
119 {
120     switch (c)
121     {
122     case '%':
123         return SbxINTEGER;
124     case '&':
125         return SbxLONG;
126     case '!':
127         return SbxSINGLE;
128     case '#':
129         return SbxDOUBLE;
130     case '@':
131         return SbxCURRENCY;
132     case '$':
133         return SbxSTRING;
134     default:
135         return SbxVARIANT;
136     }
137 }
138 
139 // reading the next symbol into the variables aSym, nVal and eType
140 // return value is sal_False at EOF or errors
141 #define BUF_SIZE 80
142 
143 void SbiScanner::scanAlphanumeric()
144 {
145     sal_Int32 n = nCol;
146     while(nCol < aLine.getLength() && (BasicCharClass::isAlphaNumeric(aLine[nCol], bCompatible) || aLine[nCol] == '_'))
147     {
148         ++nLineIdx;
149         ++nCol;
150     }
151     aSym = aLine.copy(n, nCol - n);
152 }
153 
154 void SbiScanner::scanGoto()
155 {
156     sal_Int32 n = nCol;
157     while(n < aLine.getLength() && BasicCharClass::isWhitespace(aLine[n]))
158         ++n;
159 
160     if(n + 1 < aLine.getLength())
161     {
162         OUString aTemp = aLine.copy(n, 2);
163         if(aTemp.equalsIgnoreAsciiCase("to"))
164         {
165             aSym = "goto";
166             nLineIdx += n + 2 - nCol;
167             nCol = n + 2;
168         }
169     }
170 }
171 
172 bool SbiScanner::readLine()
173 {
174     if(nBufPos >= aBuf.getLength())
175         return false;
176 
177     sal_Int32 n = nBufPos;
178     sal_Int32 nLen = aBuf.getLength();
179 
180     while(n < nLen && aBuf[n] != '\r' && aBuf[n] != '\n')
181         ++n;
182 
183     // Trim trailing whitespace
184     sal_Int32 nEnd = n;
185     while(nBufPos < nEnd && BasicCharClass::isWhitespace(aBuf[nEnd - 1]))
186         --nEnd;
187 
188     aLine = aBuf.copy(nBufPos, nEnd - nBufPos);
189 
190     // Fast-forward past the line ending
191     if(n + 1 < nLen && aBuf[n] == '\r' && aBuf[n + 1] == '\n')
192         n += 2;
193     else if(n < nLen)
194         ++n;
195 
196     nBufPos = n;
197     nLineIdx = 0;
198 
199     ++nLine;
200     nCol = nCol1 = nCol2 = 0;
201     nColLock = 0;
202 
203     return true;
204 }
205 
206 bool SbiScanner::NextSym()
207 {
208     // memorize for the EOLN-case
209     sal_Int32 nOldLine = nLine;
210     sal_Int32 nOldCol1 = nCol1;
211     sal_Int32 nOldCol2 = nCol2;
212     sal_Unicode buf[ BUF_SIZE ], *p = buf;
213 
214     eScanType = SbxVARIANT;
215     aSym.clear();
216     bHash = bSymbol = bNumber = bSpaces = false;
217     bool bCompilerDirective = false;
218 
219     // read in line?
220     if (nLineIdx == -1)
221     {
222         if(!readLine())
223             return false;
224 
225         nOldLine = nLine;
226         nOldCol1 = nOldCol2 = 0;
227     }
228 
229     const sal_Int32 nLineIdxScanStart = nLineIdx;
230 
231     if(nCol < aLine.getLength() && BasicCharClass::isWhitespace(aLine[nCol]))
232     {
233         bSpaces = true;
234         while(nCol < aLine.getLength() && BasicCharClass::isWhitespace(aLine[nCol]))
235         {
236             ++nLineIdx;
237             ++nCol;
238         }
239     }
240 
241     nCol1 = nCol;
242 
243     // only blank line?
244     if(nCol >= aLine.getLength())
245         goto eoln;
246 
247     if( bPrevLineExtentsComment )
248         goto PrevLineCommentLbl;
249 
250     if(nCol < aLine.getLength() && aLine[nCol] == '#')
251     {
252         sal_Int32 nLineTempIdx = nLineIdx;
253         do
254         {
255             nLineTempIdx++;
256         } while (nLineTempIdx < aLine.getLength() && !BasicCharClass::isWhitespace(aLine[nLineTempIdx])
257             && aLine[nLineTempIdx] != '#' && aLine[nLineTempIdx] != ',');
258         // leave it if it is a date literal - it will be handled later
259         if (nLineTempIdx >= aLine.getLength() || aLine[nLineTempIdx] != '#')
260         {
261             ++nLineIdx;
262             ++nCol;
263             //ignore compiler directives (# is first non-space character)
264             if (nOldCol2 == 0)
265                 bCompilerDirective = true;
266             else
267                 bHash = true;
268         }
269     }
270 
271     // copy character if symbol
272     if(nCol < aLine.getLength() && (BasicCharClass::isAlpha(aLine[nCol], bCompatible) || aLine[nCol] == '_'))
273     {
274         // if there's nothing behind '_' , it's the end of a line!
275         if(nCol + 1 == aLine.getLength() && aLine[nCol] == '_')
276         {
277             // Note that nCol is not incremented here...
278             ++nLineIdx;
279             goto eoln;
280         }
281 
282         bSymbol = true;
283 
284         scanAlphanumeric();
285 
286         // Special handling for "go to"
287         if(nCol < aLine.getLength() && bCompatible && aSym.equalsIgnoreAsciiCase("go"))
288             scanGoto();
289 
290         // replace closing '_' by space when end of line is following
291         // (wrong line continuation otherwise)
292         if (nCol == aLine.getLength() && aLine[nCol - 1] == '_')
293         {
294             // We are going to modify a potentially shared string, so force
295             // a copy, so that aSym is not modified by the following operation
296             OUString aSymCopy( aSym.getStr(), aSym.getLength() );
297             aSym = aSymCopy;
298 
299             // HACK: modifying a potentially shared string here!
300             const_cast<sal_Unicode*>(aLine.getStr())[nLineIdx - 1] = ' ';
301         }
302 
303         // type recognition?
304         // don't test the exclamation mark
305         // if there's a symbol behind it
306         else if((nCol >= aLine.getLength() || aLine[nCol] != '!') ||
307                 (nCol + 1 >= aLine.getLength() || !BasicCharClass::isAlpha(aLine[nCol + 1], bCompatible)))
308         {
309             if(nCol < aLine.getLength())
310             {
311                 SbxDataType t(GetSuffixType(aLine[nCol]));
312                 if( t != SbxVARIANT )
313                 {
314                     eScanType = t;
315                     ++nLineIdx;
316                     ++nCol;
317                 }
318             }
319         }
320     }
321 
322     // read in and convert if number
323     else if((nCol < aLine.getLength() && rtl::isAsciiDigit(aLine[nCol])) ||
324             (nCol + 1 < aLine.getLength() && aLine[nCol] == '.' && rtl::isAsciiDigit(aLine[nCol + 1])))
325     {
326         short exp = 0;
327         short dec = 0;
328         eScanType = SbxDOUBLE;
329         bool bScanError = false;
330         bool bBufOverflow = false;
331         // All this because of 'D' or 'd' floating point type, sigh...
332         while(!bScanError && nCol < aLine.getLength() && strchr("0123456789.DEde", aLine[nCol]))
333         {
334             // from 4.1.1996: buffer full? -> go on scanning empty
335             if( (p-buf) == (BUF_SIZE-1) )
336             {
337                 bBufOverflow = true;
338                 ++nLineIdx;
339                 ++nCol;
340                 continue;
341             }
342             // point or exponent?
343             if(aLine[nCol] == '.')
344             {
345                 if( ++dec > 1 )
346                     bScanError = true;
347                 else
348                     *p++ = '.';
349             }
350             else if(strchr("DdEe", aLine[nCol]))
351             {
352                 if (++exp > 1)
353                     bScanError = true;
354                 else
355                 {
356                     *p++ = 'E';
357                     if (nCol + 1 < aLine.getLength() && (aLine[nCol+1] == '+' || aLine[nCol+1] == '-'))
358                     {
359                         ++nLineIdx;
360                         ++nCol;
361                         if( (p-buf) == (BUF_SIZE-1) )
362                         {
363                             bBufOverflow = true;
364                             continue;
365                         }
366                         *p++ = aLine[nCol];
367                     }
368                 }
369             }
370             else
371             {
372                 *p++ = aLine[nCol];
373             }
374             ++nLineIdx;
375             ++nCol;
376         }
377         *p = 0;
378         aSym = p; bNumber = true;
379 
380         // For bad characters, scan and parse errors generate only one error.
381         ErrCode nError = ERRCODE_NONE;
382         if (bScanError)
383         {
384             --nLineIdx;
385             --nCol;
386             aError = OUString( aLine[nCol]);
387             nError = ERRCODE_BASIC_BAD_CHAR_IN_NUMBER;
388         }
389 
390         rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
391         const sal_Unicode* pParseEnd = buf;
392         nVal = rtl_math_uStringToDouble( buf, buf+(p-buf), '.', ',', &eStatus, &pParseEnd );
393         if (pParseEnd != buf+(p-buf))
394         {
395             // e.g. "12e" or "12e+", or with bScanError "12d"+"E".
396             sal_Int32 nChars = buf+(p-buf) - pParseEnd;
397             nLineIdx -= nChars;
398             nCol -= nChars;
399             // For bScanError, nLineIdx and nCol were already decremented, just
400             // add that character to the parse end.
401             if (bScanError)
402                 ++nChars;
403             // Copy error position from original string, not the buffer
404             // replacement where "12dE" => "12EE".
405             aError = aLine.copy( nCol, nChars);
406             nError = ERRCODE_BASIC_BAD_CHAR_IN_NUMBER;
407         }
408         else if (eStatus != rtl_math_ConversionStatus_Ok)
409         {
410             // Keep the scan error and character at position, if any.
411             if (!nError)
412                 nError = ERRCODE_BASIC_MATH_OVERFLOW;
413         }
414 
415         if (nError)
416             GenError( nError );
417 
418         if( !dec && !exp )
419         {
420             if( nVal >= SbxMININT && nVal <= SbxMAXINT )
421                 eScanType = SbxINTEGER;
422             else if( nVal >= SbxMINLNG && nVal <= SbxMAXLNG )
423                     eScanType = SbxLONG;
424         }
425 
426         if( bBufOverflow )
427             GenError( ERRCODE_BASIC_MATH_OVERFLOW );
428 
429         // type recognition?
430         if( nCol < aLine.getLength() )
431         {
432             SbxDataType t(GetSuffixType(aLine[nCol]));
433             if( t != SbxVARIANT )
434             {
435                 eScanType = t;
436                 ++nLineIdx;
437                 ++nCol;
438             }
439             // tdf#130476 - don't allow String trailing data type character with numbers
440             if ( t == SbxSTRING )
441             {
442                 GenError( ERRCODE_BASIC_SYNTAX );
443             }
444         }
445     }
446 
447     // Hex/octal number? Read in and convert:
448     else if(aLine.getLength() - nCol > 1 && aLine[nCol] == '&')
449     {
450         ++nLineIdx; ++nCol;
451         sal_Unicode base = 16;
452         sal_Unicode xch  = aLine[nCol];
453         ++nLineIdx; ++nCol;
454         switch( rtl::toAsciiUpperCase( xch ) )
455         {
456             case 'O':
457                 base = 8;
458                 break;
459             case 'H':
460                 break;
461             default :
462                 // treated as an operator
463                 --nLineIdx; --nCol; nCol1 = nCol-1;
464                 aSym = "&";
465                 return true;
466         }
467         bNumber = true;
468         // Hex literals are signed Integers ( as defined by basic
469         // e.g. -2,147,483,648 through 2,147,483,647 (signed)
470         sal_uInt64 lu = 0;
471         bool bOverflow = false;
472         while(nCol < aLine.getLength() && BasicCharClass::isAlphaNumeric(aLine[nCol], false))
473         {
474             sal_Unicode ch = rtl::toAsciiUpperCase(aLine[nCol]);
475             ++nLineIdx; ++nCol;
476             if( ((base == 16 ) && rtl::isAsciiHexDigit( ch ) ) ||
477                      ((base == 8) && rtl::isAsciiOctalDigit( ch )))
478             {
479                 int i = ch  - '0';
480                 if( i > 9 ) i -= 7;
481                 lu = ( lu * base ) + i;
482                 if( lu > SAL_MAX_UINT32 )
483                 {
484                     bOverflow = true;
485                 }
486             }
487             else
488             {
489                 aError = OUString(ch);
490                 GenError( ERRCODE_BASIC_BAD_CHAR_IN_NUMBER );
491             }
492         }
493 
494         // tdf#130476 - take into account trailing data type characters
495         if( nCol < aLine.getLength() )
496         {
497             SbxDataType t(GetSuffixType(aLine[nCol]));
498             if( t != SbxVARIANT )
499             {
500                 eScanType = t;
501                 ++nLineIdx;
502                 ++nCol;
503             }
504             // tdf#130476 - don't allow String trailing data type character with numbers
505             if ( t == SbxSTRING )
506             {
507                 GenError( ERRCODE_BASIC_SYNTAX );
508             }
509         }
510 
511         // tdf#130476 - take into account trailing data type characters
512         switch ( eScanType )
513         {
514             case SbxINTEGER:
515                 nVal = static_cast<double>( static_cast<sal_Int16>(lu) );
516                 if ( lu > SbxMAXUINT )
517                 {
518                     bOverflow = true;
519                 }
520                 break;
521             case SbxLONG: nVal = static_cast<double>( static_cast<sal_Int32>(lu) ); break;
522             case SbxVARIANT:
523             {
524                 // tdf#62326 - If the value of the hex string without explicit type character lies within
525                 // the range of 0x8000 (SbxMAXINT + 1) and 0xFFFF (SbxMAXUINT) inclusive, cast the value
526                 // to 16 bit in order to get signed integers, e.g., SbxMININT through SbxMAXINT
527                 sal_Int32 ls = (lu > SbxMAXINT && lu <= SbxMAXUINT) ? static_cast<sal_Int16>(lu) : static_cast<sal_Int32>(lu);
528                 eScanType = ( ls >= SbxMININT && ls <= SbxMAXINT ) ? SbxINTEGER : SbxLONG;
529                 nVal = static_cast<double>(ls);
530                 break;
531             }
532             default:
533                 nVal = static_cast<double>(lu);
534                 break;
535         }
536         if( bOverflow )
537             GenError( ERRCODE_BASIC_MATH_OVERFLOW );
538     }
539 
540     // Strings:
541     else if (nLineIdx < aLine.getLength() && (aLine[nLineIdx] == '"' || aLine[nLineIdx] == '['))
542     {
543         sal_Unicode cSep = aLine[nLineIdx];
544         if( cSep == '[' )
545         {
546             bSymbol = true;
547             cSep = ']';
548         }
549         sal_Int32 n = nCol + 1;
550         while (nLineIdx < aLine.getLength())
551         {
552             do
553             {
554                 nLineIdx++;
555                 nCol++;
556             }
557             while (nLineIdx < aLine.getLength() && (aLine[nLineIdx] != cSep));
558             if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == cSep)
559             {
560                 nLineIdx++; nCol++;
561                 if (nLineIdx >= aLine.getLength() || aLine[nLineIdx] != cSep || cSep == ']')
562                 {
563                     // If VBA Interop then doesn't eat the [] chars
564                     if ( cSep == ']' && bVBASupportOn )
565                         aSym = aLine.copy( n - 1, nCol - n  + 1);
566                     else
567                         aSym = aLine.copy( n, nCol - n - 1 );
568                     // get out duplicate string delimiters
569                     OUStringBuffer aSymBuf(aSym.getLength());
570                     for ( sal_Int32 i = 0, len = aSym.getLength(); i < len; ++i )
571                     {
572                         aSymBuf.append( aSym[i] );
573                         if ( aSym[i] == cSep && ( i+1 < len ) && aSym[i+1] == cSep )
574                             ++i;
575                     }
576                     aSym = aSymBuf.makeStringAndClear();
577                     if( cSep != ']' )
578                         eScanType = SbxSTRING;
579                     break;
580                 }
581             }
582             else
583             {
584                 aError = OUString(cSep);
585                 GenError( ERRCODE_BASIC_EXPECTED );
586             }
587         }
588     }
589 
590     // Date:
591     else if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == '#')
592     {
593         sal_Int32 n = nCol + 1;
594         do
595         {
596             nLineIdx++;
597             nCol++;
598         }
599         while (nLineIdx < aLine.getLength() && (aLine[nLineIdx] != '#'));
600         if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == '#')
601         {
602             nLineIdx++; nCol++;
603             aSym = aLine.copy( n, nCol - n - 1 );
604 
605             // parse date literal
606             std::shared_ptr<SvNumberFormatter> pFormatter;
607             if (GetSbData()->pInst)
608             {
609                 pFormatter = GetSbData()->pInst->GetNumberFormatter();
610             }
611             else
612             {
613                 sal_uInt32 nDummy;
614                 pFormatter = SbiInstance::PrepareNumberFormatter( nDummy, nDummy, nDummy );
615             }
616             sal_uInt32 nIndex = pFormatter->GetStandardIndex( LANGUAGE_ENGLISH_US);
617             bool bSuccess = pFormatter->IsNumberFormat(aSym, nIndex, nVal);
618             if( bSuccess )
619             {
620                 SvNumFormatType nType_ = pFormatter->GetType(nIndex);
621                 if( !(nType_ & SvNumFormatType::DATE) )
622                     bSuccess = false;
623             }
624 
625             if (!bSuccess)
626                 GenError( ERRCODE_BASIC_CONVERSION );
627 
628             bNumber = true;
629             eScanType = SbxDOUBLE;
630         }
631         else
632         {
633             aError = OUString('#');
634             GenError( ERRCODE_BASIC_EXPECTED );
635         }
636     }
637     // invalid characters:
638     else if (nLineIdx < aLine.getLength() && aLine[nLineIdx] >= 0x7F)
639     {
640         GenError( ERRCODE_BASIC_SYNTAX ); nLineIdx++; nCol++;
641     }
642     // other groups:
643     else
644     {
645         sal_Int32 n = 1;
646         auto nChar = nLineIdx < aLine.getLength() ? aLine[nLineIdx] : 0;
647         ++nLineIdx;
648         if (nLineIdx < aLine.getLength())
649         {
650             switch (nChar)
651             {
652                 case '<': if( aLine[nLineIdx] == '>' || aLine[nLineIdx] == '=' ) n = 2; break;
653                 case '>': if( aLine[nLineIdx] == '=' ) n = 2; break;
654                 case ':': if( aLine[nLineIdx] == '=' ) n = 2; break;
655             }
656         }
657         aSym = aLine.copy(nCol, std::min(n, aLine.getLength() - nCol));
658         nLineIdx += n-1; nCol = nCol + n;
659     }
660 
661     nCol2 = nCol-1;
662 
663 PrevLineCommentLbl:
664 
665     if( bPrevLineExtentsComment || (eScanType != SbxSTRING &&
666                                     ( bCompilerDirective ||
667                                       aSym.startsWith("'") ||
668                                       aSym.equalsIgnoreAsciiCase( "REM" ) ) ) )
669     {
670         bPrevLineExtentsComment = false;
671         aSym = "REM";
672         sal_Int32 nLen = aLine.getLength() - nLineIdx;
673         if( bCompatible && aLine[nLineIdx + nLen - 1] == '_' && aLine[nLineIdx + nLen - 2] == ' ' )
674             bPrevLineExtentsComment = true;
675         nCol2 = nCol2 + nLen;
676         nLineIdx = -1;
677     }
678 
679     if (nLineIdx == nLineIdxScanStart)
680     {
681         GenError( ERRCODE_BASIC_SYMBOL_EXPECTED );
682         return false;
683     }
684 
685     return true;
686 
687 
688 eoln:
689     if( nCol && aLine[--nLineIdx] == '_' )
690     {
691         nLineIdx = -1;
692         bool bRes = NextSym();
693         if( aSym.startsWith(".") )
694         {
695             // object _
696             //    .Method
697             // ^^^  <- spaces is legal in MSO VBA
698             bSpaces = false;
699         }
700         return bRes;
701     }
702     else
703     {
704         nLineIdx = -1;
705         nLine = nOldLine;
706         nCol1 = nOldCol1;
707         nCol2 = nOldCol2;
708         aSym = "\n";
709         nColLock = 0;
710         return true;
711     }
712 }
713 
714 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
715