ReactOS 0.4.17-dev-357-ga8f14ff
lex.c
Go to the documentation of this file.
1/*
2 * Copyright 2011 Jacek Caban for CodeWeavers
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
17 */
18
19#ifdef __REACTOS__
20#include <wine/config.h>
21#include <wine/port.h>
22#endif
23#include <assert.h>
24#include <limits.h>
25#include <math.h>
26
27#include "vbscript.h"
28#include "parse.h"
29#include "parser.tab.h"
30
31#include "wine/debug.h"
32
34
35static const struct {
36 const WCHAR *word;
37 int token;
38} keywords[] = {
39 {L"and", tAND},
40 {L"byref", tBYREF},
41 {L"byval", tBYVAL},
42 {L"call", tCALL},
43 {L"case", tCASE},
44 {L"class", tCLASS},
45 {L"const", tCONST},
46 {L"default", tDEFAULT},
47 {L"dim", tDIM},
48 {L"do", tDO},
49 {L"each", tEACH},
50 {L"else", tELSE},
51 {L"elseif", tELSEIF},
52 {L"empty", tEMPTY},
53 {L"end", tEND},
54 {L"eqv", tEQV},
55 {L"error", tERROR},
56 {L"exit", tEXIT},
57 {L"explicit", tEXPLICIT},
58 {L"false", tFALSE},
59 {L"for", tFOR},
60 {L"function", tFUNCTION},
61 {L"get", tGET},
62 {L"goto", tGOTO},
63 {L"if", tIF},
64 {L"imp", tIMP},
65 {L"in", tIN},
66 {L"is", tIS},
67 {L"let", tLET},
68 {L"loop", tLOOP},
69 {L"me", tME},
70 {L"mod", tMOD},
71 {L"new", tNEW},
72 {L"next", tNEXT},
73 {L"not", tNOT},
74 {L"nothing", tNOTHING},
75 {L"null", tNULL},
76 {L"on", tON},
77 {L"option", tOPTION},
78 {L"or", tOR},
79 {L"preserve", tPRESERVE},
80 {L"private", tPRIVATE},
81 {L"property", tPROPERTY},
82 {L"public", tPUBLIC},
83 {L"redim", tREDIM},
84 {L"rem", tREM},
85 {L"resume", tRESUME},
86 {L"select", tSELECT},
87 {L"set", tSET},
88 {L"step", tSTEP},
89 {L"stop", tSTOP},
90 {L"sub", tSUB},
91 {L"then", tTHEN},
92 {L"to", tTO},
93 {L"true", tTRUE},
94 {L"until", tUNTIL},
95 {L"wend", tWEND},
96 {L"while", tWHILE},
97 {L"with", tWITH},
98 {L"xor", tXOR}
99};
100
102{
103 return iswalnum(c) || c == '_';
104}
105
106static int check_keyword(parser_ctx_t *ctx, const WCHAR *word, const WCHAR **lval)
107{
108 const WCHAR *p1 = ctx->ptr;
109 const WCHAR *p2 = word;
110 WCHAR c;
111
112 while(p1 < ctx->end && *p2) {
113 c = towlower(*p1);
114 if(c != *p2)
115 return c - *p2;
116 p1++;
117 p2++;
118 }
119
120 if(*p2 || (p1 < ctx->end && is_identifier_char(*p1)))
121 return 1;
122
123 ctx->ptr = p1;
124 *lval = word;
125 return 0;
126}
127
128static int check_keywords(parser_ctx_t *ctx, const WCHAR **lval)
129{
130 int min = 0, max = ARRAY_SIZE(keywords)-1, r, i;
131
132 while(min <= max) {
133 i = (min+max)/2;
134
135 r = check_keyword(ctx, keywords[i].word, lval);
136 if(!r)
137 return keywords[i].token;
138
139 if(r > 0)
140 min = i+1;
141 else
142 max = i-1;
143 }
144
145 return 0;
146}
147
149{
150 const WCHAR *ptr = ctx->ptr++;
151 WCHAR *str;
152 int len;
153
154 while(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr))
155 ctx->ptr++;
156 len = ctx->ptr-ptr;
157
158 str = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
159 if(!str)
160 return 0;
161
162 memcpy(str, ptr, (len+1)*sizeof(WCHAR));
163 str[len] = 0;
164 *ret = str;
165 return tIdentifier;
166}
167
169{
170 const WCHAR *ptr = ++ctx->ptr;
171 WCHAR *rptr;
172 int len = 0;
173
174 while(ctx->ptr < ctx->end) {
175 if(*ctx->ptr == '\n' || *ctx->ptr == '\r') {
176 FIXME("newline inside string literal\n");
177 return 0;
178 }
179
180 if(*ctx->ptr == '"') {
181 if(ctx->ptr[1] != '"')
182 break;
183 len--;
184 ctx->ptr++;
185 }
186 ctx->ptr++;
187 }
188
189 if(ctx->ptr == ctx->end) {
190 FIXME("unterminated string literal\n");
191 return 0;
192 }
193
194 len += ctx->ptr-ptr;
195
196 *ret = rptr = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
197 if(!rptr)
198 return 0;
199
200 while(ptr < ctx->ptr) {
201 if(*ptr == '"')
202 ptr++;
203 *rptr++ = *ptr++;
204 }
205
206 *rptr = 0;
207 ctx->ptr++;
208 return tString;
209}
210
212{
213 const WCHAR *ptr = ++ctx->ptr;
214 WCHAR *rptr;
215 int len = 0;
216 HRESULT res;
217
218 while(ctx->ptr < ctx->end) {
219 if(*ctx->ptr == '\n' || *ctx->ptr == '\r') {
220 FIXME("newline inside date literal\n");
221 return 0;
222 }
223
224 if(*ctx->ptr == '#')
225 break;
226 ctx->ptr++;
227 }
228
229 if(ctx->ptr == ctx->end) {
230 FIXME("unterminated date literal\n");
231 return 0;
232 }
233
234 len += ctx->ptr-ptr;
235
236 rptr = malloc((len+1)*sizeof(WCHAR));
237 if(!rptr)
238 return 0;
239
240 memcpy( rptr, ptr, len * sizeof(WCHAR));
241 rptr[len] = 0;
242 res = VarDateFromStr(rptr, ctx->lcid, 0, ret);
243 free(rptr);
244 if (FAILED(res)) {
245 FIXME("Invalid date literal\n");
246 return 0;
247 }
248
249 ctx->ptr++;
250 return tDate;
251}
252
254{
255 BOOL use_int = TRUE;
256 LONGLONG d = 0, hlp;
257 int exp = 0;
258 double r;
259
260 if(*ctx->ptr == '0' && !('0' <= ctx->ptr[1] && ctx->ptr[1] <= '9') && ctx->ptr[1] != '.')
261 return *ctx->ptr++;
262
263 while(ctx->ptr < ctx->end && is_digit(*ctx->ptr)) {
264 hlp = d*10 + *(ctx->ptr++) - '0';
265 if(d>MAXLONGLONG/10 || hlp<0) {
266 exp++;
267 break;
268 }
269 else
270 d = hlp;
271 }
272 while(ctx->ptr < ctx->end && is_digit(*ctx->ptr)) {
273 exp++;
274 ctx->ptr++;
275 }
276
277 if(*ctx->ptr == '.') {
278 use_int = FALSE;
279 ctx->ptr++;
280
281 while(ctx->ptr < ctx->end && is_digit(*ctx->ptr)) {
282 hlp = d*10 + *(ctx->ptr++) - '0';
283 if(d>MAXLONGLONG/10 || hlp<0)
284 break;
285
286 d = hlp;
287 exp--;
288 }
289 while(ctx->ptr < ctx->end && is_digit(*ctx->ptr))
290 ctx->ptr++;
291 }
292
293 if(*ctx->ptr == 'e' || *ctx->ptr == 'E') {
294 int e = 0, sign = 1;
295
296 ctx->ptr++;
297 if(*ctx->ptr == '-') {
298 ctx->ptr++;
299 sign = -1;
300 }else if(*ctx->ptr == '+') {
301 ctx->ptr++;
302 }
303
304 if(!is_digit(*ctx->ptr)) {
305 FIXME("Invalid numeric literal\n");
306 return 0;
307 }
308
309 use_int = FALSE;
310
311 do {
312 e = e*10 + *(ctx->ptr++) - '0';
313 if(sign == -1 && -e+exp < -(INT_MAX/100)) {
314 /* The literal will be rounded to 0 anyway. */
315 while(is_digit(*ctx->ptr))
316 ctx->ptr++;
317 *(double*)ret = 0;
318 return tDouble;
319 }
320
321 if(sign*e + exp > INT_MAX/100) {
322 FIXME("Invalid numeric literal\n");
323 return 0;
324 }
325 } while(is_digit(*ctx->ptr));
326
327 exp += sign*e;
328 }
329
330 if(use_int && (LONG)d == d) {
331 *(LONG*)ret = d;
332 return tInt;
333 }
334
335 r = exp>=0 ? d*pow(10, exp) : d/pow(10, -exp);
336 if(isinf(r)) {
337 FIXME("Invalid numeric literal\n");
338 return 0;
339 }
340
341 *(double*)ret = r;
342 return tDouble;
343}
344
345static int hex_to_int(WCHAR c)
346{
347 if('0' <= c && c <= '9')
348 return c-'0';
349 if('a' <= c && c <= 'f')
350 return c+10-'a';
351 if('A' <= c && c <= 'F')
352 return c+10-'A';
353 return -1;
354}
355
357{
358 const WCHAR *begin = ctx->ptr;
359 unsigned l = 0, d;
360
361 while((d = hex_to_int(*++ctx->ptr)) != -1)
362 l = l*16 + d;
363
364 if(begin + 9 /* max digits+1 */ < ctx->ptr) {
365 FIXME("invalid literal\n");
366 return 0;
367 }
368
369 if(*ctx->ptr == '&') {
370 ctx->ptr++;
371 *ret = l;
372 }else {
373 *ret = l == (UINT16)l ? (INT16)l : l;
374 }
375 return tInt;
376}
377
379{
380 while(*ctx->ptr == ' ' || *ctx->ptr == '\t')
381 ctx->ptr++;
382}
383
385{
386 ctx->ptr = wcspbrk(ctx->ptr, L"\n\r");
387 if(ctx->ptr)
388 ctx->ptr++;
389 else
390 ctx->ptr = ctx->end;
391 return tNL;
392}
393
394static int parse_next_token(void *lval, unsigned *loc, parser_ctx_t *ctx)
395{
396 WCHAR c;
397
399 *loc = ctx->ptr - ctx->code;
400 if(ctx->ptr == ctx->end)
401 return ctx->last_token == tNL ? 0 : tNL;
402
403 c = *ctx->ptr;
404
405 if('0' <= c && c <= '9')
406 return parse_numeric_literal(ctx, lval);
407
408 if(iswalpha(c)) {
409 int ret = 0;
410 if(ctx->last_token != '.' && ctx->last_token != tDOT)
411 ret = check_keywords(ctx, lval);
412 if(!ret)
413 return parse_identifier(ctx, lval);
414 if(ret != tREM)
415 return ret;
416 c = '\'';
417 }
418
419 switch(c) {
420 case '\n':
421 case '\r':
422 ctx->ptr++;
423 return tNL;
424 case '\'':
425 return comment_line(ctx);
426 case ':':
427 case ')':
428 case ',':
429 case '+':
430 case '*':
431 case '/':
432 case '^':
433 case '\\':
434 case '_':
435 return *ctx->ptr++;
436 case '.':
437 /*
438 * We need to distinguish between '.' used as part of a member expression and
439 * a beginning of a dot expression (a member expression accessing with statement
440 * expression) and a floating point number like ".2" .
441 */
442 c = ctx->ptr > ctx->code ? ctx->ptr[-1] : '\n';
443 if (is_identifier_char(c) || c == ')') {
444 ctx->ptr++;
445 return '.';
446 }
447 c = ctx->ptr[1];
448 if('0' <= c && c <= '9')
449 return parse_numeric_literal(ctx, lval);
450 ctx->ptr++;
451 return tDOT;
452 case '-':
453 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '>')
454 return comment_line(ctx);
455 ctx->ptr++;
456 return '-';
457 case '(':
458 /* NOTE:
459 * We resolve empty brackets in lexer instead of parser to avoid complex conflicts
460 * in call statement special case |f()| without 'call' keyword
461 */
462 ctx->ptr++;
464 if(*ctx->ptr == ')') {
465 ctx->ptr++;
466 return tEMPTYBRACKETS;
467 }
468 /*
469 * Parser can't predict if bracket is part of argument expression or an argument
470 * in call expression. We predict it here instead.
471 */
472 if(ctx->last_token == tIdentifier || ctx->last_token == ')')
473 return '(';
474 return tEXPRLBRACKET;
475 case '"':
476 return parse_string_literal(ctx, lval);
477 case '#':
478 return parse_date_literal(ctx, lval);
479 case '&':
480 if((*++ctx->ptr == 'h' || *ctx->ptr == 'H') && hex_to_int(ctx->ptr[1]) != -1)
481 return parse_hex_literal(ctx, lval);
482 return '&';
483 case '=':
484 switch(*++ctx->ptr) {
485 case '<':
486 ctx->ptr++;
487 return tLTEQ;
488 case '>':
489 ctx->ptr++;
490 return tGTEQ;
491 }
492 return '=';
493 case '<':
494 switch(*++ctx->ptr) {
495 case '>':
496 ctx->ptr++;
497 return tNEQ;
498 case '=':
499 ctx->ptr++;
500 return tLTEQ;
501 case '!':
502 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '-')
503 return comment_line(ctx);
504 }
505 return '<';
506 case '>':
507 switch(*++ctx->ptr) {
508 case '=':
509 ctx->ptr++;
510 return tGTEQ;
511 case '<':
512 ctx->ptr++;
513 return tNEQ;
514 }
515 return '>';
516 default:
517 FIXME("Unhandled char %c in %s\n", *ctx->ptr, debugstr_w(ctx->ptr));
518 }
519
520 return 0;
521}
522
523int parser_lex(void *lval, unsigned *loc, parser_ctx_t *ctx)
524{
525 int ret;
526
527 if (ctx->last_token == tEXPRESSION)
528 {
529 ctx->last_token = tNL;
530 return tEXPRESSION;
531 }
532
533 while(1) {
534 ret = parse_next_token(lval, loc, ctx);
535 if(ret == '_') {
537 if(*ctx->ptr != '\n' && *ctx->ptr != '\r') {
538 FIXME("'_' not followed by newline\n");
539 return 0;
540 }
541 if(*ctx->ptr == '\r')
542 ctx->ptr++;
543 if(*ctx->ptr == '\n')
544 ctx->ptr++;
545 continue;
546 }
547 if(ret != tNL || ctx->last_token != tNL)
548 break;
549
550 ctx->last_nl = ctx->ptr-ctx->code;
551 }
552
553 return (ctx->last_token = ret);
554}
unsigned short UINT16
Definition: actypes.h:129
short INT16
Definition: actypes.h:130
#define is_digit(c)
Definition: astoll.c:39
#define WINE_DEFAULT_DEBUG_CHANNEL(t)
Definition: precomp.h:23
#define ARRAY_SIZE(A)
Definition: main.h:20
#define FIXME(fmt,...)
Definition: precomp.h:53
r l[0]
Definition: byte_order.h:168
#define free
Definition: debug_ros.c:5
#define malloc
Definition: debug_ros.c:4
#define TRUE
Definition: types.h:120
#define FALSE
Definition: types.h:117
double DATE
Definition: compat.h:2253
static void * parser_alloc(parser_ctx_t *ctx, DWORD size)
Definition: parser.h:60
_ACRTIMP wchar_t *__cdecl wcspbrk(const wchar_t *, const wchar_t *)
Definition: wcs.c:2021
#define INT_MAX
Definition: limits.h:26
#define isinf(x)
Definition: math.h:359
HRESULT WINAPI VarDateFromStr(OLECHAR *strIn, LCID lcid, ULONG dwFlags, DATE *pdateOut)
Definition: vartype.c:7595
return ret
Definition: mutex.c:146
#define L(x)
Definition: resources.c:13
unsigned int BOOL
Definition: ntddk_ex.h:94
double pow(double x, double y)
Definition: freeldr.c:179
GLuint GLuint end
Definition: gl.h:1545
GLdouble GLdouble GLdouble r
Definition: gl.h:2055
GLuint res
Definition: glext.h:9613
const GLubyte * c
Definition: glext.h:8905
GLenum GLsizei len
Definition: glext.h:6722
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
#define FAILED(hr)
Definition: intsafe.h:51
static BOOL skip_spaces(parser_ctx_t *ctx)
Definition: lex.c:217
static int check_keywords(parser_ctx_t *ctx, const WCHAR **lval)
Definition: lex.c:134
int hex_to_int(WCHAR c)
Definition: lex.c:120
static int parse_string_literal(parser_ctx_t *ctx, jsstr_t **ret, WCHAR endch)
Definition: lex.c:340
int parser_lex(void *lval, unsigned *loc, parser_ctx_t *ctx)
Definition: lex.c:1102
static const struct @442 keywords[]
int token
Definition: lex.c:37
static int check_keyword(parser_ctx_t *ctx, const WCHAR *word, const WCHAR **lval)
Definition: lex.c:93
BOOL is_identifier_char(WCHAR c)
Definition: lex.c:83
const WCHAR * word
Definition: lex.c:36
static int parse_identifier(parser_ctx_t *ctx, const WCHAR **ret)
Definition: lex.c:321
static BOOL parse_numeric_literal(parser_ctx_t *ctx, double *ret)
Definition: lex.c:473
#define d
Definition: ke_i.h:81
#define e
Definition: ke_i.h:82
#define c
Definition: ke_i.h:80
#define debugstr_w
Definition: kernel32.h:32
#define sign(x)
Definition: mapdesc.cc:613
#define memcpy(s1, s2, n)
Definition: mkisofs.h:878
static PVOID ptr
Definition: dispmode.c:27
DWORD exp
Definition: msg.c:18625
#define min(a, b)
Definition: monoChain.cc:55
#define MAXLONGLONG
short WCHAR
Definition: pedump.c:58
long LONG
Definition: pedump.c:60
const WCHAR * str
#define iswalnum(_c)
Definition: ctype.h:671
#define iswalpha(_c)
Definition: ctype.h:664
#define towlower(c)
Definition: wctype.h:97
#define max(a, b)
Definition: svc.c:63
int64_t LONGLONG
Definition: typedefs.h:68
static int parse_hex_literal(parser_ctx_t *ctx, LONG *ret)
Definition: lex.c:356
static int comment_line(parser_ctx_t *ctx)
Definition: lex.c:384
static int parse_date_literal(parser_ctx_t *ctx, DATE *ret)
Definition: lex.c:211
static int parse_next_token(void *lval, unsigned *loc, parser_ctx_t *ctx)
Definition: lex.c:394