ReactOS 0.4.16-dev-258-g81860b4
regexp.c
Go to the documentation of this file.
1/*
2 * Copyright 2008 Jacek Caban for CodeWeavers
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
17 */
18
19/*
20 * Code in this file is based on files:
21 * js/src/jsregexp.h
22 * js/src/jsregexp.c
23 * from Mozilla project, released under LGPL 2.1 or later.
24 *
25 * The Original Code is Mozilla Communicator client code, released
26 * March 31, 1998.
27 *
28 * The Initial Developer of the Original Code is
29 * Netscape Communications Corporation.
30 * Portions created by the Initial Developer are Copyright (C) 1998
31 * the Initial Developer. All Rights Reserved.
32 */
33
34#include <assert.h>
35
36#include "vbscript.h"
37#include "regexp.h"
38
39#include "wine/debug.h"
40
42
43/* FIXME: Better error handling */
44#define ReportRegExpError(a,b,c)
45#define ReportRegExpErrorHelper(a,b,c,d)
46#define JS_ReportErrorNumber(a,b,c,d)
47#define JS_ReportErrorFlagsAndNumber(a,b,c,d,e,f)
48#define js_ReportOutOfScriptQuota(a)
49#define JS_ReportOutOfMemory(a)
50#define JS_COUNT_OPERATION(a,b)
51
52
54
55/*
56 * This struct holds a bitmap representation of a class from a regexp.
57 * There's a list of these referenced by the classList field in the regexp_t
58 * struct below. The initial state has startIndex set to the offset in the
59 * original regexp source of the beginning of the class contents. The first
60 * use of the class converts the source representation into a bitmap.
61 *
62 */
63typedef struct RECharSet {
67 union {
68 BYTE *bits;
69 struct {
70 size_t startIndex;
71 size_t length;
72 } src;
73 } u;
75
76#define JSMSG_MIN_TOO_BIG 47
77#define JSMSG_MAX_TOO_BIG 48
78#define JSMSG_OUT_OF_ORDER 49
79#define JSMSG_OUT_OF_MEMORY 137
80
81#define LINE_SEPARATOR 0x2028
82#define PARA_SEPARATOR 0x2029
83
84#define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
85 ((c >= 'a') && (c <= 'z')) )
86#define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
87 (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
88
89#define JS_ISWORD(c) ((c) < 128 && (isalnum(c) || (c) == '_'))
90
91#define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9)
92#define JS7_UNDEC(c) ((c) - '0')
93
94typedef enum REOp {
144 REOP_LIMIT /* META: no operator >= to this */
146
147#define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS)
148
149static const char *reop_names[] = {
150 "empty",
151 "bol",
152 "eol",
153 "wbdry",
154 "wnonbdry",
155 "dot",
156 "digit",
157 "nondigit",
158 "alnum",
159 "nonalnum",
160 "space",
161 "nonspace",
162 "backref",
163 "flat",
164 "flat1",
165 "flati",
166 "flat1i",
167 "ucflat1",
168 "ucflat1i",
169 "ucflat",
170 "ucflati",
171 "class",
172 "nclass",
173 "alt",
174 "quant",
175 "star",
176 "plus",
177 "opt",
178 "lparen",
179 "rparen",
180 "jump",
181 "dotstar",
182 "lparennon",
183 "assert",
184 "assert_not",
185 "asserttest",
186 "assertnottest",
187 "minimalstar",
188 "minimalplus",
189 "minimalopt",
190 "minimalquant",
191 "endchild",
192 "repeat",
193 "minimalrepeat",
194 "altprereq",
195 "alrprereq2",
196 "endalt",
197 "concat",
198 "end",
199 NULL
200};
201
202typedef struct REProgState {
203 jsbytecode *continue_pc; /* current continuation data */
205 ptrdiff_t index; /* progress in text */
206 size_t parenSoFar; /* highest indexed paren started */
207 union {
208 struct {
209 UINT min; /* current quantifier limits */
210 UINT max;
212 struct {
213 size_t top; /* backtrack stack state */
214 size_t sz;
216 } u;
218
219typedef struct REBackTrackData {
220 size_t sz; /* size of previous stack entry */
221 jsbytecode *backtrack_pc; /* where to backtrack to */
223 const WCHAR *cp; /* index in text of match at backtrack */
224 size_t parenIndex; /* start index of saved paren contents */
225 size_t parenCount; /* # of saved paren contents */
226 size_t saveStateStackTop; /* number of parent states */
227 /* saved parent states follow */
228 /* saved paren contents follow */
230
231#define INITIAL_STATESTACK 100
232#define INITIAL_BACKTRACK 8000
233
234typedef struct REGlobalData {
235 void *cx;
236 regexp_t *regexp; /* the RE in execution */
237 BOOL ok; /* runtime error (out_of_memory only?) */
238 size_t start; /* offset to start at */
239 ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
240 const WCHAR *cpbegin; /* text base address */
241 const WCHAR *cpend; /* text limit address */
242
243 REProgState *stateStack; /* stack of state of current parents */
244 size_t stateStackTop;
245 size_t stateStackLimit;
246
247 REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
249 size_t backTrackStackSize;
250 size_t cursz; /* size of current stack entry */
251 size_t backTrackCount; /* how many times we've backtracked */
252 size_t backTrackLimit; /* upper limit on backtrack states */
253
254 heap_pool_t *pool; /* It's faster to use one malloc'd pool
255 than to malloc/free the three items
256 that are allocated from this pool */
258
259typedef struct RENode RENode;
260struct RENode {
261 REOp op; /* r.e. op bytecode */
262 RENode *next; /* next in concatenation order */
263 void *kid; /* first operand */
264 union {
265 void *kid2; /* second operand */
266 INT num; /* could be a number */
267 size_t parenIndex; /* or a parenthesis index */
268 struct { /* or a quantifier range */
269 UINT min;
270 UINT max;
273 struct { /* or a character class */
274 size_t startIndex;
275 size_t kidlen; /* length of string at kid, in jschars */
276 size_t index; /* index into class list */
277 WORD bmsize; /* bitmap size, based on max char code */
280 struct { /* or a literal sequence */
281 WCHAR chr; /* of one character */
282 size_t length; /* or many (via the kid) */
284 struct {
285 RENode *kid2; /* second operand from ALT */
286 WCHAR ch1; /* match char for ALTPREREQ */
287 WCHAR ch2; /* ditto, or class index for ALTPREREQ2 */
289 } u;
290};
291
292#define CLASS_CACHE_SIZE 4
293
294typedef struct CompilerState {
295 void *context;
296 const WCHAR *cpbegin;
297 const WCHAR *cpend;
298 const WCHAR *cp;
299 size_t parenCount;
300 size_t classCount; /* number of [] encountered */
301 size_t treeDepth; /* maximum depth of parse tree */
302 size_t progLength; /* estimated bytecode length */
303 RENode *result;
304 size_t classBitmapsMem; /* memory to hold all class bitmaps */
305 struct {
306 const WCHAR *start; /* small cache of class strings */
307 size_t length; /* since they're often the same */
308 size_t index;
310 WORD flags;
311
312 heap_pool_t *pool; /* It's faster to use one malloc'd pool
313 than to malloc/free */
315
316typedef struct EmitStateStackEntry {
317 jsbytecode *altHead; /* start of REOP_ALT* opcode */
318 jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */
319 jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
320 jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */
321 RENode *continueNode; /* original REOP_ALT* node being stacked */
322 jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */
323 JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to
324 avoid 16-bit unsigned offset overflow */
326
327/*
328 * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
329 * the getters and setters take the pc of the offset, not of the opcode before
330 * the offset.
331 */
332#define ARG_LEN 2
333#define GET_ARG(pc) ((WORD)(((pc)[0] << 8) | (pc)[1]))
334#define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
335 (pc)[1] = (jsbytecode) (arg))
336
337#define OFFSET_LEN ARG_LEN
338#define OFFSET_MAX ((1 << (ARG_LEN * 8)) - 1)
339#define GET_OFFSET(pc) GET_ARG(pc)
340
342
343/*
344 * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
345 * For sanity, we limit it to 2^24 bytes.
346 */
347#define TREE_DEPTH_MAX ((1 << 24) / sizeof(EmitStateStackEntry))
348
349/*
350 * The maximum memory that can be allocated for class bitmaps.
351 * For sanity, we limit it to 2^24 bytes.
352 */
353#define CLASS_BITMAPS_MEM_LIMIT (1 << 24)
354
355/*
356 * Functions to get size and write/read bytecode that represent small indexes
357 * compactly.
358 * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
359 * indicates that the following byte brings more bits to the index. Otherwise
360 * this is the last byte in the index bytecode representing highest index bits.
361 */
362static size_t
364{
365 size_t width;
366
367 for (width = 1; (index >>= 7) != 0; ++width) { }
368 return width;
369}
370
371static inline jsbytecode *
373{
374 size_t next;
375
376 while ((next = index >> 7) != 0) {
377 *pc++ = (jsbytecode)(index | 0x80);
378 index = next;
379 }
380 *pc++ = (jsbytecode)index;
381 return pc;
382}
383
384static inline jsbytecode *
386{
387 size_t nextByte;
388
389 nextByte = *pc++;
390 if ((nextByte & 0x80) == 0) {
391 /*
392 * Short-circuit the most common case when compact index <= 127.
393 */
394 *result = nextByte;
395 } else {
396 size_t shift = 7;
397 *result = 0x7F & nextByte;
398 do {
399 nextByte = *pc++;
400 *result |= (nextByte & 0x7F) << shift;
401 shift += 7;
402 } while ((nextByte & 0x80) != 0);
403 }
404 return pc;
405}
406
407/* Construct and initialize an RENode, returning NULL for out-of-memory */
408static RENode *
410{
411 RENode *ren;
412
413 ren = heap_pool_alloc(state->pool, sizeof(*ren));
414 if (!ren) {
415 /* js_ReportOutOfScriptQuota(cx); */
416 return NULL;
417 }
418 ren->op = op;
419 ren->next = NULL;
420 ren->kid = NULL;
421 return ren;
422}
423
424/*
425 * Validates and converts hex ascii value.
426 */
427static BOOL
429{
430 UINT cv = c;
431
432 if (cv < '0')
433 return FALSE;
434 if (cv <= '9') {
435 *digit = cv - '0';
436 return TRUE;
437 }
438 cv |= 0x20;
439 if (cv >= 'a' && cv <= 'f') {
440 *digit = cv - 'a' + 10;
441 return TRUE;
442 }
443 return FALSE;
444}
445
446typedef struct {
447 REOp op;
448 const WCHAR *errPos;
449 size_t parenIndex;
450} REOpData;
451
452#define JUMP_OFFSET_HI(off) ((jsbytecode)((off) >> 8))
453#define JUMP_OFFSET_LO(off) ((jsbytecode)(off))
454
455static BOOL
457{
458 ptrdiff_t offset = target - jump;
459
460 /* Check that target really points forward. */
461 assert(offset >= 2);
462 if ((size_t)offset > OFFSET_MAX)
463 return FALSE;
464
465 jump[0] = JUMP_OFFSET_HI(offset);
466 jump[1] = JUMP_OFFSET_LO(offset);
467 return TRUE;
468}
469
470/*
471 * Generate bytecode for the tree rooted at t using an explicit stack instead
472 * of recursion.
473 */
474static jsbytecode *
476 jsbytecode *pc, RENode *t)
477{
478 EmitStateStackEntry *emitStateSP, *emitStateStack;
479 RECharSet *charSet;
480 REOp op;
481
482 if (treeDepth == 0) {
483 emitStateStack = NULL;
484 } else {
485 emitStateStack = heap_alloc(sizeof(EmitStateStackEntry) * treeDepth);
486 if (!emitStateStack)
487 return NULL;
488 }
489 emitStateSP = emitStateStack;
490 op = t->op;
492
493 for (;;) {
494 *pc++ = op;
495 switch (op) {
496 case REOP_EMPTY:
497 --pc;
498 break;
499
500 case REOP_ALTPREREQ2:
501 case REOP_ALTPREREQ:
502 assert(emitStateSP);
503 emitStateSP->altHead = pc - 1;
504 emitStateSP->endTermFixup = pc;
505 pc += OFFSET_LEN;
506 SET_ARG(pc, t->u.altprereq.ch1);
507 pc += ARG_LEN;
508 SET_ARG(pc, t->u.altprereq.ch2);
509 pc += ARG_LEN;
510
511 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
512 pc += OFFSET_LEN;
513
514 emitStateSP->continueNode = t;
515 emitStateSP->continueOp = REOP_JUMP;
516 emitStateSP->jumpToJumpFlag = FALSE;
517 ++emitStateSP;
518 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
519 t = t->kid;
520 op = t->op;
522 continue;
523
524 case REOP_JUMP:
525 emitStateSP->nextTermFixup = pc; /* offset to following term */
526 pc += OFFSET_LEN;
527 if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
528 goto jump_too_big;
529 emitStateSP->continueOp = REOP_ENDALT;
530 ++emitStateSP;
531 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
532 t = t->u.kid2;
533 op = t->op;
535 continue;
536
537 case REOP_ENDALT:
538 /*
539 * If we already patched emitStateSP->nextTermFixup to jump to
540 * a nearer jump, to avoid 16-bit immediate offset overflow, we
541 * are done here.
542 */
543 if (emitStateSP->jumpToJumpFlag)
544 break;
545
546 /*
547 * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
548 * REOP_ENDALT is executed only on successful match of the last
549 * alternate in a group.
550 */
551 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
552 goto jump_too_big;
553 if (t->op != REOP_ALT) {
554 if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
555 goto jump_too_big;
556 }
557
558 /*
559 * If the program is bigger than the REOP_JUMP offset range, then
560 * we must check for alternates before this one that are part of
561 * the same group, and fix up their jump offsets to target jumps
562 * close enough to fit in a 16-bit unsigned offset immediate.
563 */
564 if ((size_t)(pc - re->program) > OFFSET_MAX &&
565 emitStateSP > emitStateStack) {
566 EmitStateStackEntry *esp, *esp2;
567 jsbytecode *alt, *jump;
569
570 esp2 = emitStateSP;
571 alt = esp2->altHead;
572 for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
573 if (esp->continueOp == REOP_ENDALT &&
574 !esp->jumpToJumpFlag &&
575 esp->nextTermFixup + OFFSET_LEN == alt &&
576 (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
577 ? esp->endTermFixup
578 : esp->nextTermFixup)) > OFFSET_MAX) {
579 alt = esp->altHead;
580 jump = esp->nextTermFixup;
581
582 /*
583 * The span must be 1 less than the distance from
584 * jump offset to jump offset, so we actually jump
585 * to a REOP_JUMP bytecode, not to its offset!
586 */
587 for (;;) {
588 assert(jump < esp2->nextTermFixup);
589 span = esp2->nextTermFixup - jump - 1;
590 if ((size_t)span <= OFFSET_MAX)
591 break;
592 do {
593 if (--esp2 == esp)
594 goto jump_too_big;
595 } while (esp2->continueOp != REOP_ENDALT);
596 }
597
598 jump[0] = JUMP_OFFSET_HI(span);
599 jump[1] = JUMP_OFFSET_LO(span);
600
601 if (esp->continueNode->op != REOP_ALT) {
602 /*
603 * We must patch the offset at esp->endTermFixup
604 * as well, for the REOP_ALTPREREQ{,2} opcodes.
605 * If we're unlucky and endTermFixup is more than
606 * OFFSET_MAX bytes from its target, we cheat by
607 * jumping 6 bytes to the jump whose offset is at
608 * esp->nextTermFixup, which has the same target.
609 */
610 jump = esp->endTermFixup;
611 header = esp->nextTermFixup - jump;
612 span += header;
613 if ((size_t)span > OFFSET_MAX)
614 span = header;
615
616 jump[0] = JUMP_OFFSET_HI(span);
617 jump[1] = JUMP_OFFSET_LO(span);
618 }
619
620 esp->jumpToJumpFlag = TRUE;
621 }
622 }
623 }
624 break;
625
626 case REOP_ALT:
627 assert(emitStateSP);
628 emitStateSP->altHead = pc - 1;
629 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
630 pc += OFFSET_LEN;
631 emitStateSP->continueNode = t;
632 emitStateSP->continueOp = REOP_JUMP;
633 emitStateSP->jumpToJumpFlag = FALSE;
634 ++emitStateSP;
635 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
636 t = t->kid;
637 op = t->op;
639 continue;
640
641 case REOP_FLAT:
642 /*
643 * Coalesce FLATs if possible and if it would not increase bytecode
644 * beyond preallocated limit. The latter happens only when bytecode
645 * size for coalesced string with offset p and length 2 exceeds 6
646 * bytes preallocated for 2 single char nodes, i.e. when
647 * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
648 * GetCompactIndexWidth(p) > 4.
649 * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
650 * nodes strictly decreases bytecode size, the check has to be
651 * done only for the first coalescing.
652 */
653 if (t->kid &&
654 GetCompactIndexWidth((WCHAR*)t->kid - state->cpbegin) <= 4)
655 {
656 while (t->next &&
657 t->next->op == REOP_FLAT &&
658 (WCHAR*)t->kid + t->u.flat.length ==
659 t->next->kid) {
660 t->u.flat.length += t->next->u.flat.length;
661 t->next = t->next->next;
662 }
663 }
664 if (t->kid && t->u.flat.length > 1) {
665 pc[-1] = (state->flags & REG_FOLD) ? REOP_FLATi : REOP_FLAT;
666 pc = WriteCompactIndex(pc, (WCHAR*)t->kid - state->cpbegin);
667 pc = WriteCompactIndex(pc, t->u.flat.length);
668 } else if (t->u.flat.chr < 256) {
669 pc[-1] = (state->flags & REG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
670 *pc++ = (jsbytecode) t->u.flat.chr;
671 } else {
672 pc[-1] = (state->flags & REG_FOLD)
674 : REOP_UCFLAT1;
675 SET_ARG(pc, t->u.flat.chr);
676 pc += ARG_LEN;
677 }
678 break;
679
680 case REOP_LPAREN:
681 assert(emitStateSP);
682 pc = WriteCompactIndex(pc, t->u.parenIndex);
683 emitStateSP->continueNode = t;
684 emitStateSP->continueOp = REOP_RPAREN;
685 ++emitStateSP;
686 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
687 t = t->kid;
688 op = t->op;
689 continue;
690
691 case REOP_RPAREN:
692 pc = WriteCompactIndex(pc, t->u.parenIndex);
693 break;
694
695 case REOP_BACKREF:
696 pc = WriteCompactIndex(pc, t->u.parenIndex);
697 break;
698
699 case REOP_ASSERT:
700 assert(emitStateSP);
701 emitStateSP->nextTermFixup = pc;
702 pc += OFFSET_LEN;
703 emitStateSP->continueNode = t;
704 emitStateSP->continueOp = REOP_ASSERTTEST;
705 ++emitStateSP;
706 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
707 t = t->kid;
708 op = t->op;
709 continue;
710
711 case REOP_ASSERTTEST:
713 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
714 goto jump_too_big;
715 break;
716
717 case REOP_ASSERT_NOT:
718 assert(emitStateSP);
719 emitStateSP->nextTermFixup = pc;
720 pc += OFFSET_LEN;
721 emitStateSP->continueNode = t;
722 emitStateSP->continueOp = REOP_ASSERTNOTTEST;
723 ++emitStateSP;
724 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
725 t = t->kid;
726 op = t->op;
727 continue;
728
729 case REOP_QUANT:
730 assert(emitStateSP);
731 if (t->u.range.min == 0 && t->u.range.max == (UINT)-1) {
732 pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
733 } else if (t->u.range.min == 0 && t->u.range.max == 1) {
734 pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
735 } else if (t->u.range.min == 1 && t->u.range.max == (UINT) -1) {
736 pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
737 } else {
738 if (!t->u.range.greedy)
739 pc[-1] = REOP_MINIMALQUANT;
740 pc = WriteCompactIndex(pc, t->u.range.min);
741 /*
742 * Write max + 1 to avoid using size_t(max) + 1 bytes
743 * for (UINT)-1 sentinel.
744 */
745 pc = WriteCompactIndex(pc, t->u.range.max + 1);
746 }
747 emitStateSP->nextTermFixup = pc;
748 pc += OFFSET_LEN;
749 emitStateSP->continueNode = t;
750 emitStateSP->continueOp = REOP_ENDCHILD;
751 ++emitStateSP;
752 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
753 t = t->kid;
754 op = t->op;
755 continue;
756
757 case REOP_ENDCHILD:
758 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
759 goto jump_too_big;
760 break;
761
762 case REOP_CLASS:
763 if (!t->u.ucclass.sense)
764 pc[-1] = REOP_NCLASS;
765 pc = WriteCompactIndex(pc, t->u.ucclass.index);
766 charSet = &re->classList[t->u.ucclass.index];
767 charSet->converted = FALSE;
768 charSet->length = t->u.ucclass.bmsize;
769 charSet->u.src.startIndex = t->u.ucclass.startIndex;
770 charSet->u.src.length = t->u.ucclass.kidlen;
771 charSet->sense = t->u.ucclass.sense;
772 break;
773
774 default:
775 break;
776 }
777
778 t = t->next;
779 if (t) {
780 op = t->op;
781 } else {
782 if (emitStateSP == emitStateStack)
783 break;
784 --emitStateSP;
785 t = emitStateSP->continueNode;
786 op = (REOp) emitStateSP->continueOp;
787 }
788 }
789
790 cleanup:
791 heap_free(emitStateStack);
792 return pc;
793
794 jump_too_big:
795 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
796 pc = NULL;
797 goto cleanup;
798}
799
800/*
801 * Process the op against the two top operands, reducing them to a single
802 * operand in the penultimate slot. Update progLength and treeDepth.
803 */
804static BOOL
805ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
806 INT operandSP)
807{
808 RENode *result;
809
810 switch (opData->op) {
811 case REOP_ALT:
813 if (!result)
814 return FALSE;
815 result->kid = operandStack[operandSP - 2];
816 result->u.kid2 = operandStack[operandSP - 1];
817 operandStack[operandSP - 2] = result;
818
819 if (state->treeDepth == TREE_DEPTH_MAX) {
820 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
821 return FALSE;
822 }
823 ++state->treeDepth;
824
825 /*
826 * Look at both alternates to see if there's a FLAT or a CLASS at
827 * the start of each. If so, use a prerequisite match.
828 */
829 if (((RENode *) result->kid)->op == REOP_FLAT &&
830 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
831 (state->flags & REG_FOLD) == 0) {
833 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
834 result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
835 /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
836 JUMP, <end> ... ENDALT */
837 state->progLength += 13;
838 }
839 else
840 if (((RENode *) result->kid)->op == REOP_CLASS &&
841 ((RENode *) result->kid)->u.ucclass.index < 256 &&
842 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
843 (state->flags & REG_FOLD) == 0) {
845 result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
846 result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
847 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
848 JUMP, <end> ... ENDALT */
849 state->progLength += 13;
850 }
851 else
852 if (((RENode *) result->kid)->op == REOP_FLAT &&
853 ((RENode *) result->u.kid2)->op == REOP_CLASS &&
854 ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
855 (state->flags & REG_FOLD) == 0) {
857 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
858 result->u.altprereq.ch2 =
859 ((RENode *) result->u.kid2)->u.ucclass.index;
860 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
861 JUMP, <end> ... ENDALT */
862 state->progLength += 13;
863 }
864 else {
865 /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
866 state->progLength += 7;
867 }
868 break;
869
870 case REOP_CONCAT:
871 result = operandStack[operandSP - 2];
872 while (result->next)
873 result = result->next;
874 result->next = operandStack[operandSP - 1];
875 break;
876
877 case REOP_ASSERT:
878 case REOP_ASSERT_NOT:
879 case REOP_LPARENNON:
880 case REOP_LPAREN:
881 /* These should have been processed by a close paren. */
882 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
883 opData->errPos);
884 return FALSE;
885
886 default:;
887 }
888 return TRUE;
889}
890
891/*
892 * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
893 * it being on the stack, and to propagate errors to its callers.
894 */
895#define JSREG_FIND_PAREN_COUNT 0x8000
896#define JSREG_FIND_PAREN_ERROR 0x4000
897
898/*
899 * Magic return value from FindParenCount and GetDecimalValue, to indicate
900 * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
901 * its findMax parameter is non-null.
902 */
903#define OVERFLOW_VALUE ((UINT)-1)
904
905static UINT
907{
909 int i;
910
911 if (state->flags & JSREG_FIND_PAREN_COUNT)
912 return OVERFLOW_VALUE;
913
914 /*
915 * Copy state into temp, flag it so we never report an invalid backref,
916 * and reset its members to parse the entire regexp. This is obviously
917 * suboptimal, but GetDecimalValue calls us only if a backref appears to
918 * refer to a forward parenthetical, which is rare.
919 */
920 temp = *state;
922 temp.cp = temp.cpbegin;
923 temp.parenCount = 0;
924 temp.classCount = 0;
925 temp.progLength = 0;
926 temp.treeDepth = 0;
927 temp.classBitmapsMem = 0;
928 for (i = 0; i < CLASS_CACHE_SIZE; i++)
929 temp.classCache[i].start = NULL;
930
931 if (!ParseRegExp(&temp)) {
933 return OVERFLOW_VALUE;
934 }
935 return temp.parenCount;
936}
937
938/*
939 * Extract and return a decimal value at state->cp. The initial character c
940 * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
941 * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
942 * state->flags to discover whether an error occurred under findMax.
943 */
944static UINT
947{
949 BOOL overflow = (value > max && (!findMax || value > findMax(state)));
950
951 /* The following restriction allows simpler overflow checks. */
952 assert(max <= ((UINT)-1 - 9) / 10);
953 while (state->cp < state->cpend) {
954 c = *state->cp;
955 if (!JS7_ISDEC(c))
956 break;
957 value = 10 * value + JS7_UNDEC(c);
958 if (!overflow && value > max && (!findMax || value > findMax(state)))
959 overflow = TRUE;
960 ++state->cp;
961 }
962 return overflow ? OVERFLOW_VALUE : value;
963}
964
965/*
966 * Calculate the total size of the bitmap required for a class expression.
967 */
968static BOOL
970 const WCHAR *end)
971{
972 UINT max = 0;
973 BOOL inRange = FALSE;
974 WCHAR c, rangeStart = 0;
975 UINT n, digit, nDigits, i;
976
977 target->u.ucclass.bmsize = 0;
978 target->u.ucclass.sense = TRUE;
979
980 if (src == end)
981 return TRUE;
982
983 if (*src == '^') {
984 ++src;
985 target->u.ucclass.sense = FALSE;
986 }
987
988 while (src != end) {
989 BOOL canStartRange = TRUE;
990 UINT localMax = 0;
991
992 switch (*src) {
993 case '\\':
994 ++src;
995 c = *src++;
996 switch (c) {
997 case 'b':
998 localMax = 0x8;
999 break;
1000 case 'f':
1001 localMax = 0xC;
1002 break;
1003 case 'n':
1004 localMax = 0xA;
1005 break;
1006 case 'r':
1007 localMax = 0xD;
1008 break;
1009 case 't':
1010 localMax = 0x9;
1011 break;
1012 case 'v':
1013 localMax = 0xB;
1014 break;
1015 case 'c':
1016 if (src < end && RE_IS_LETTER(*src)) {
1017 localMax = (UINT) (*src++) & 0x1F;
1018 } else {
1019 --src;
1020 localMax = '\\';
1021 }
1022 break;
1023 case 'x':
1024 nDigits = 2;
1025 goto lexHex;
1026 case 'u':
1027 nDigits = 4;
1028lexHex:
1029 n = 0;
1030 for (i = 0; (i < nDigits) && (src < end); i++) {
1031 c = *src++;
1032 if (!isASCIIHexDigit(c, &digit)) {
1033 /*
1034 * Back off to accepting the original
1035 *'\' as a literal.
1036 */
1037 src -= i + 1;
1038 n = '\\';
1039 break;
1040 }
1041 n = (n << 4) | digit;
1042 }
1043 localMax = n;
1044 break;
1045 case 'd':
1046 canStartRange = FALSE;
1047 if (inRange) {
1048 JS_ReportErrorNumber(state->context,
1049 js_GetErrorMessage, NULL,
1050 JSMSG_BAD_CLASS_RANGE);
1051 return FALSE;
1052 }
1053 localMax = '9';
1054 break;
1055 case 'D':
1056 case 's':
1057 case 'S':
1058 case 'w':
1059 case 'W':
1060 canStartRange = FALSE;
1061 if (inRange) {
1062 JS_ReportErrorNumber(state->context,
1063 js_GetErrorMessage, NULL,
1064 JSMSG_BAD_CLASS_RANGE);
1065 return FALSE;
1066 }
1067 max = 65535;
1068
1069 /*
1070 * If this is the start of a range, ensure that it's less than
1071 * the end.
1072 */
1073 localMax = 0;
1074 break;
1075 case '0':
1076 case '1':
1077 case '2':
1078 case '3':
1079 case '4':
1080 case '5':
1081 case '6':
1082 case '7':
1083 /*
1084 * This is a non-ECMA extension - decimal escapes (in this
1085 * case, octal!) are supposed to be an error inside class
1086 * ranges, but supported here for backwards compatibility.
1087 *
1088 */
1089 n = JS7_UNDEC(c);
1090 c = *src;
1091 if ('0' <= c && c <= '7') {
1092 src++;
1093 n = 8 * n + JS7_UNDEC(c);
1094 c = *src;
1095 if ('0' <= c && c <= '7') {
1096 src++;
1097 i = 8 * n + JS7_UNDEC(c);
1098 if (i <= 0377)
1099 n = i;
1100 else
1101 src--;
1102 }
1103 }
1104 localMax = n;
1105 break;
1106
1107 default:
1108 localMax = c;
1109 break;
1110 }
1111 break;
1112 default:
1113 localMax = *src++;
1114 break;
1115 }
1116
1117 if (inRange) {
1118 /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1119 if (rangeStart > localMax) {
1120 JS_ReportErrorNumber(state->context,
1121 js_GetErrorMessage, NULL,
1122 JSMSG_BAD_CLASS_RANGE);
1123 return FALSE;
1124 }
1125 inRange = FALSE;
1126 } else {
1127 if (canStartRange && src < end - 1) {
1128 if (*src == '-') {
1129 ++src;
1130 inRange = TRUE;
1131 rangeStart = (WCHAR)localMax;
1132 continue;
1133 }
1134 }
1135 if (state->flags & REG_FOLD)
1136 rangeStart = localMax; /* one run of the uc/dc loop below */
1137 }
1138
1139 if (state->flags & REG_FOLD) {
1140 WCHAR maxch = localMax;
1141
1142 for (i = rangeStart; i <= localMax; i++) {
1143 WCHAR uch, dch;
1144
1145 uch = towupper(i);
1146 dch = towlower(i);
1147 if(maxch < uch)
1148 maxch = uch;
1149 if(maxch < dch)
1150 maxch = dch;
1151 }
1152 localMax = maxch;
1153 }
1154
1155 if (localMax > max)
1156 max = localMax;
1157 }
1158 target->u.ucclass.bmsize = max;
1159 return TRUE;
1160}
1161
1162static INT
1164{
1165 UINT min, max;
1166 WCHAR c;
1167 const WCHAR *errp = state->cp++;
1168
1169 c = *state->cp;
1170 if (JS7_ISDEC(c)) {
1171 ++state->cp;
1172 min = GetDecimalValue(c, 0xFFFF, NULL, state);
1173 c = *state->cp;
1174
1175 if (!ignoreValues && min == OVERFLOW_VALUE)
1176 return JSMSG_MIN_TOO_BIG;
1177
1178 if (c == ',') {
1179 c = *++state->cp;
1180 if (JS7_ISDEC(c)) {
1181 ++state->cp;
1182 max = GetDecimalValue(c, 0xFFFF, NULL, state);
1183 c = *state->cp;
1184 if (!ignoreValues && max == OVERFLOW_VALUE)
1185 return JSMSG_MAX_TOO_BIG;
1186 if (!ignoreValues && min > max)
1187 return JSMSG_OUT_OF_ORDER;
1188 } else {
1189 max = (UINT)-1;
1190 }
1191 } else {
1192 max = min;
1193 }
1194 if (c == '}') {
1195 state->result = NewRENode(state, REOP_QUANT);
1196 if (!state->result)
1197 return JSMSG_OUT_OF_MEMORY;
1198 state->result->u.range.min = min;
1199 state->result->u.range.max = max;
1200 /*
1201 * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1202 * where <max> is written as compact(max+1) to make
1203 * (UINT)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1204 */
1205 state->progLength += (1 + GetCompactIndexWidth(min)
1207 +3);
1208 return 0;
1209 }
1210 }
1211
1212 state->cp = errp;
1213 return -1;
1214}
1215
1216static BOOL
1218{
1219 RENode *term;
1220 term = state->result;
1221 if (state->cp < state->cpend) {
1222 switch (*state->cp) {
1223 case '+':
1224 state->result = NewRENode(state, REOP_QUANT);
1225 if (!state->result)
1226 return FALSE;
1227 state->result->u.range.min = 1;
1228 state->result->u.range.max = (UINT)-1;
1229 /* <PLUS>, <next> ... <ENDCHILD> */
1230 state->progLength += 4;
1231 goto quantifier;
1232 case '*':
1233 state->result = NewRENode(state, REOP_QUANT);
1234 if (!state->result)
1235 return FALSE;
1236 state->result->u.range.min = 0;
1237 state->result->u.range.max = (UINT)-1;
1238 /* <STAR>, <next> ... <ENDCHILD> */
1239 state->progLength += 4;
1240 goto quantifier;
1241 case '?':
1242 state->result = NewRENode(state, REOP_QUANT);
1243 if (!state->result)
1244 return FALSE;
1245 state->result->u.range.min = 0;
1246 state->result->u.range.max = 1;
1247 /* <OPT>, <next> ... <ENDCHILD> */
1248 state->progLength += 4;
1249 goto quantifier;
1250 case '{': /* balance '}' */
1251 {
1252 INT err;
1253
1255 if (err == 0)
1256 goto quantifier;
1257 if (err == -1)
1258 return TRUE;
1259
1260 ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
1261 return FALSE;
1262 }
1263 default:;
1264 }
1265 }
1266 return TRUE;
1267
1268quantifier:
1269 if (state->treeDepth == TREE_DEPTH_MAX) {
1270 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1271 return FALSE;
1272 }
1273
1274 ++state->treeDepth;
1275 ++state->cp;
1276 state->result->kid = term;
1277 if (state->cp < state->cpend && *state->cp == '?') {
1278 ++state->cp;
1279 state->result->u.range.greedy = FALSE;
1280 } else {
1281 state->result->u.range.greedy = TRUE;
1282 }
1283 return TRUE;
1284}
1285
1286/*
1287 * item: assertion An item is either an assertion or
1288 * quantatom a quantified atom.
1289 *
1290 * assertion: '^' Assertions match beginning of string
1291 * (or line if the class static property
1292 * RegExp.multiline is true).
1293 * '$' End of string (or line if the class
1294 * static property RegExp.multiline is
1295 * true).
1296 * '\b' Word boundary (between \w and \W).
1297 * '\B' Word non-boundary.
1298 *
1299 * quantatom: atom An unquantified atom.
1300 * quantatom '{' n ',' m '}'
1301 * Atom must occur between n and m times.
1302 * quantatom '{' n ',' '}' Atom must occur at least n times.
1303 * quantatom '{' n '}' Atom must occur exactly n times.
1304 * quantatom '*' Zero or more times (same as {0,}).
1305 * quantatom '+' One or more times (same as {1,}).
1306 * quantatom '?' Zero or one time (same as {0,1}).
1307 *
1308 * any of which can be optionally followed by '?' for ungreedy
1309 *
1310 * atom: '(' regexp ')' A parenthesized regexp (what matched
1311 * can be addressed using a backreference,
1312 * see '\' n below).
1313 * '.' Matches any char except '\n'.
1314 * '[' classlist ']' A character class.
1315 * '[' '^' classlist ']' A negated character class.
1316 * '\f' Form Feed.
1317 * '\n' Newline (Line Feed).
1318 * '\r' Carriage Return.
1319 * '\t' Horizontal Tab.
1320 * '\v' Vertical Tab.
1321 * '\d' A digit (same as [0-9]).
1322 * '\D' A non-digit.
1323 * '\w' A word character, [0-9a-z_A-Z].
1324 * '\W' A non-word character.
1325 * '\s' A whitespace character, [ \b\f\n\r\t\v].
1326 * '\S' A non-whitespace character.
1327 * '\' n A backreference to the nth (n decimal
1328 * and positive) parenthesized expression.
1329 * '\' octal An octal escape sequence (octal must be
1330 * two or three digits long, unless it is
1331 * 0 for the null character).
1332 * '\x' hex A hex escape (hex must be two digits).
1333 * '\u' unicode A unicode escape (must be four digits).
1334 * '\c' ctrl A control character, ctrl is a letter.
1335 * '\' literalatomchar Any character except one of the above
1336 * that follow '\' in an atom.
1337 * otheratomchar Any character not first among the other
1338 * atom right-hand sides.
1339 */
1340static BOOL
1342{
1343 WCHAR c = *state->cp++;
1344 UINT nDigits;
1345 UINT num, tmp, n, i;
1346 const WCHAR *termStart;
1347
1348 switch (c) {
1349 /* assertions and atoms */
1350 case '^':
1351 state->result = NewRENode(state, REOP_BOL);
1352 if (!state->result)
1353 return FALSE;
1354 state->progLength++;
1355 return TRUE;
1356 case '$':
1357 state->result = NewRENode(state, REOP_EOL);
1358 if (!state->result)
1359 return FALSE;
1360 state->progLength++;
1361 return TRUE;
1362 case '\\':
1363 if (state->cp >= state->cpend) {
1364 /* a trailing '\' is an error */
1365 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
1366 return FALSE;
1367 }
1368 c = *state->cp++;
1369 switch (c) {
1370 /* assertion escapes */
1371 case 'b' :
1372 state->result = NewRENode(state, REOP_WBDRY);
1373 if (!state->result)
1374 return FALSE;
1375 state->progLength++;
1376 return TRUE;
1377 case 'B':
1378 state->result = NewRENode(state, REOP_WNONBDRY);
1379 if (!state->result)
1380 return FALSE;
1381 state->progLength++;
1382 return TRUE;
1383 /* Decimal escape */
1384 case '0':
1385 /* Give a strict warning. See also the note below. */
1386 WARN("non-octal digit in an escape sequence that doesn't match a back-reference\n");
1387 doOctal:
1388 num = 0;
1389 while (state->cp < state->cpend) {
1390 c = *state->cp;
1391 if (c < '0' || '7' < c)
1392 break;
1393 state->cp++;
1394 tmp = 8 * num + (UINT)JS7_UNDEC(c);
1395 if (tmp > 0377)
1396 break;
1397 num = tmp;
1398 }
1399 c = (WCHAR)num;
1400 doFlat:
1401 state->result = NewRENode(state, REOP_FLAT);
1402 if (!state->result)
1403 return FALSE;
1404 state->result->u.flat.chr = c;
1405 state->result->u.flat.length = 1;
1406 state->progLength += 3;
1407 break;
1408 case '1':
1409 case '2':
1410 case '3':
1411 case '4':
1412 case '5':
1413 case '6':
1414 case '7':
1415 case '8':
1416 case '9':
1417 termStart = state->cp - 1;
1418 num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1419 if (state->flags & JSREG_FIND_PAREN_ERROR)
1420 return FALSE;
1421 if (num == OVERFLOW_VALUE) {
1422 /* Give a strict mode warning. */
1423 WARN("back-reference exceeds number of capturing parentheses\n");
1424
1425 /*
1426 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1427 * error here. However, for compatibility with IE, we treat the
1428 * whole backref as flat if the first character in it is not a
1429 * valid octal character, and as an octal escape otherwise.
1430 */
1431 state->cp = termStart;
1432 if (c >= '8') {
1433 /* Treat this as flat. termStart - 1 is the \. */
1434 c = '\\';
1435 goto asFlat;
1436 }
1437
1438 /* Treat this as an octal escape. */
1439 goto doOctal;
1440 }
1441 assert(1 <= num && num <= 0x10000);
1442 state->result = NewRENode(state, REOP_BACKREF);
1443 if (!state->result)
1444 return FALSE;
1445 state->result->u.parenIndex = num - 1;
1446 state->progLength
1447 += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1448 break;
1449 /* Control escape */
1450 case 'f':
1451 c = 0xC;
1452 goto doFlat;
1453 case 'n':
1454 c = 0xA;
1455 goto doFlat;
1456 case 'r':
1457 c = 0xD;
1458 goto doFlat;
1459 case 't':
1460 c = 0x9;
1461 goto doFlat;
1462 case 'v':
1463 c = 0xB;
1464 goto doFlat;
1465 /* Control letter */
1466 case 'c':
1467 if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
1468 c = (WCHAR) (*state->cp++ & 0x1F);
1469 } else {
1470 /* back off to accepting the original '\' as a literal */
1471 --state->cp;
1472 c = '\\';
1473 }
1474 goto doFlat;
1475 /* HexEscapeSequence */
1476 case 'x':
1477 nDigits = 2;
1478 goto lexHex;
1479 /* UnicodeEscapeSequence */
1480 case 'u':
1481 nDigits = 4;
1482lexHex:
1483 n = 0;
1484 for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1485 UINT digit;
1486 c = *state->cp++;
1487 if (!isASCIIHexDigit(c, &digit)) {
1488 /*
1489 * Back off to accepting the original 'u' or 'x' as a
1490 * literal.
1491 */
1492 state->cp -= i + 2;
1493 n = *state->cp++;
1494 break;
1495 }
1496 n = (n << 4) | digit;
1497 }
1498 c = (WCHAR) n;
1499 goto doFlat;
1500 /* Character class escapes */
1501 case 'd':
1502 state->result = NewRENode(state, REOP_DIGIT);
1503doSimple:
1504 if (!state->result)
1505 return FALSE;
1506 state->progLength++;
1507 break;
1508 case 'D':
1509 state->result = NewRENode(state, REOP_NONDIGIT);
1510 goto doSimple;
1511 case 's':
1512 state->result = NewRENode(state, REOP_SPACE);
1513 goto doSimple;
1514 case 'S':
1515 state->result = NewRENode(state, REOP_NONSPACE);
1516 goto doSimple;
1517 case 'w':
1518 state->result = NewRENode(state, REOP_ALNUM);
1519 goto doSimple;
1520 case 'W':
1521 state->result = NewRENode(state, REOP_NONALNUM);
1522 goto doSimple;
1523 /* IdentityEscape */
1524 default:
1525 state->result = NewRENode(state, REOP_FLAT);
1526 if (!state->result)
1527 return FALSE;
1528 state->result->u.flat.chr = c;
1529 state->result->u.flat.length = 1;
1530 state->result->kid = (void *) (state->cp - 1);
1531 state->progLength += 3;
1532 break;
1533 }
1534 break;
1535 case '[':
1536 state->result = NewRENode(state, REOP_CLASS);
1537 if (!state->result)
1538 return FALSE;
1539 termStart = state->cp;
1540 state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1541 for (;;) {
1542 if (state->cp == state->cpend) {
1543 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1544 JSMSG_UNTERM_CLASS, termStart);
1545
1546 return FALSE;
1547 }
1548 if (*state->cp == '\\') {
1549 state->cp++;
1550 if (state->cp != state->cpend)
1551 state->cp++;
1552 continue;
1553 }
1554 if (*state->cp == ']') {
1555 state->result->u.ucclass.kidlen = state->cp - termStart;
1556 break;
1557 }
1558 state->cp++;
1559 }
1560 for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1561 if (!state->classCache[i].start) {
1562 state->classCache[i].start = termStart;
1563 state->classCache[i].length = state->result->u.ucclass.kidlen;
1564 state->classCache[i].index = state->classCount;
1565 break;
1566 }
1567 if (state->classCache[i].length ==
1568 state->result->u.ucclass.kidlen) {
1569 for (n = 0; ; n++) {
1570 if (n == state->classCache[i].length) {
1571 state->result->u.ucclass.index
1572 = state->classCache[i].index;
1573 goto claim;
1574 }
1575 if (state->classCache[i].start[n] != termStart[n])
1576 break;
1577 }
1578 }
1579 }
1580 state->result->u.ucclass.index = state->classCount++;
1581
1582 claim:
1583 /*
1584 * Call CalculateBitmapSize now as we want any errors it finds
1585 * to be reported during the parse phase, not at execution.
1586 */
1587 if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1588 return FALSE;
1589 /*
1590 * Update classBitmapsMem with number of bytes to hold bmsize bits,
1591 * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1592 * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1593 */
1594 n = (state->result->u.ucclass.bmsize >> 3) + 1;
1595 if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1596 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1597 return FALSE;
1598 }
1599 state->classBitmapsMem += n;
1600 /* CLASS, <index> */
1601 state->progLength
1602 += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1603 break;
1604
1605 case '.':
1606 state->result = NewRENode(state, REOP_DOT);
1607 goto doSimple;
1608
1609 case '{':
1610 {
1611 const WCHAR *errp = state->cp--;
1612 INT err;
1613
1615 state->cp = errp;
1616
1617 if (err < 0)
1618 goto asFlat;
1619
1620 /* FALL THROUGH */
1621 }
1622 case '*':
1623 case '+':
1624 case '?':
1625 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1626 JSMSG_BAD_QUANTIFIER, state->cp - 1);
1627 return FALSE;
1628 default:
1629asFlat:
1630 state->result = NewRENode(state, REOP_FLAT);
1631 if (!state->result)
1632 return FALSE;
1633 state->result->u.flat.chr = c;
1634 state->result->u.flat.length = 1;
1635 state->result->kid = (void *) (state->cp - 1);
1636 state->progLength += 3;
1637 break;
1638 }
1639 return ParseQuantifier(state);
1640}
1641
1642/*
1643 * Top-down regular expression grammar, based closely on Perl4.
1644 *
1645 * regexp: altern A regular expression is one or more
1646 * altern '|' regexp alternatives separated by vertical bar.
1647 */
1648#define INITIAL_STACK_SIZE 128
1649
1650static BOOL
1652{
1653 size_t parenIndex;
1654 RENode *operand;
1655 REOpData *operatorStack;
1656 RENode **operandStack;
1657 REOp op;
1658 INT i;
1659 BOOL result = FALSE;
1660
1661 INT operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
1662 INT operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
1663
1664 /* Watch out for empty regexp */
1665 if (state->cp == state->cpend) {
1666 state->result = NewRENode(state, REOP_EMPTY);
1667 return (state->result != NULL);
1668 }
1669
1670 operatorStack = heap_alloc(sizeof(REOpData) * operatorStackSize);
1671 if (!operatorStack)
1672 return FALSE;
1673
1674 operandStack = heap_alloc(sizeof(RENode *) * operandStackSize);
1675 if (!operandStack)
1676 goto out;
1677
1678 for (;;) {
1679 parenIndex = state->parenCount;
1680 if (state->cp == state->cpend) {
1681 /*
1682 * If we are at the end of the regexp and we're short one or more
1683 * operands, the regexp must have the form /x|/ or some such, with
1684 * left parentheses making us short more than one operand.
1685 */
1686 if (operatorSP >= operandSP) {
1687 operand = NewRENode(state, REOP_EMPTY);
1688 if (!operand)
1689 goto out;
1690 goto pushOperand;
1691 }
1692 } else {
1693 switch (*state->cp) {
1694 case '(':
1695 ++state->cp;
1696 if (state->cp + 1 < state->cpend &&
1697 *state->cp == '?' &&
1698 (state->cp[1] == '=' ||
1699 state->cp[1] == '!' ||
1700 state->cp[1] == ':')) {
1701 switch (state->cp[1]) {
1702 case '=':
1703 op = REOP_ASSERT;
1704 /* ASSERT, <next>, ... ASSERTTEST */
1705 state->progLength += 4;
1706 break;
1707 case '!':
1709 /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
1710 state->progLength += 4;
1711 break;
1712 default:
1714 break;
1715 }
1716 state->cp += 2;
1717 } else {
1718 op = REOP_LPAREN;
1719 /* LPAREN, <index>, ... RPAREN, <index> */
1720 state->progLength
1721 += 2 * (1 + GetCompactIndexWidth(parenIndex));
1722 state->parenCount++;
1723 if (state->parenCount == 65535) {
1724 ReportRegExpError(state, JSREPORT_ERROR,
1725 JSMSG_TOO_MANY_PARENS);
1726 goto out;
1727 }
1728 }
1729 goto pushOperator;
1730
1731 case ')':
1732 /*
1733 * If there's no stacked open parenthesis, throw syntax error.
1734 */
1735 for (i = operatorSP - 1; ; i--) {
1736 if (i < 0) {
1737 ReportRegExpError(state, JSREPORT_ERROR,
1738 JSMSG_UNMATCHED_RIGHT_PAREN);
1739 goto out;
1740 }
1741 if (operatorStack[i].op == REOP_ASSERT ||
1742 operatorStack[i].op == REOP_ASSERT_NOT ||
1743 operatorStack[i].op == REOP_LPARENNON ||
1744 operatorStack[i].op == REOP_LPAREN) {
1745 break;
1746 }
1747 }
1748 /* FALL THROUGH */
1749
1750 case '|':
1751 /* Expected an operand before these, so make an empty one */
1752 operand = NewRENode(state, REOP_EMPTY);
1753 if (!operand)
1754 goto out;
1755 goto pushOperand;
1756
1757 default:
1758 if (!ParseTerm(state))
1759 goto out;
1760 operand = state->result;
1761pushOperand:
1762 if (operandSP == operandStackSize) {
1763 RENode **tmp;
1764 operandStackSize += operandStackSize;
1765 tmp = heap_realloc(operandStack, sizeof(RENode *) * operandStackSize);
1766 if (!tmp)
1767 goto out;
1768 operandStack = tmp;
1769 }
1770 operandStack[operandSP++] = operand;
1771 break;
1772 }
1773 }
1774
1775 /* At the end; process remaining operators. */
1776restartOperator:
1777 if (state->cp == state->cpend) {
1778 while (operatorSP) {
1779 --operatorSP;
1780 if (!ProcessOp(state, &operatorStack[operatorSP],
1781 operandStack, operandSP))
1782 goto out;
1783 --operandSP;
1784 }
1785 assert(operandSP == 1);
1786 state->result = operandStack[0];
1787 result = TRUE;
1788 goto out;
1789 }
1790
1791 switch (*state->cp) {
1792 case '|':
1793 /* Process any stacked 'concat' operators */
1794 ++state->cp;
1795 while (operatorSP &&
1796 operatorStack[operatorSP - 1].op == REOP_CONCAT) {
1797 --operatorSP;
1798 if (!ProcessOp(state, &operatorStack[operatorSP],
1799 operandStack, operandSP)) {
1800 goto out;
1801 }
1802 --operandSP;
1803 }
1804 op = REOP_ALT;
1805 goto pushOperator;
1806
1807 case ')':
1808 /*
1809 * If there's no stacked open parenthesis, throw syntax error.
1810 */
1811 for (i = operatorSP - 1; ; i--) {
1812 if (i < 0) {
1813 ReportRegExpError(state, JSREPORT_ERROR,
1814 JSMSG_UNMATCHED_RIGHT_PAREN);
1815 goto out;
1816 }
1817 if (operatorStack[i].op == REOP_ASSERT ||
1818 operatorStack[i].op == REOP_ASSERT_NOT ||
1819 operatorStack[i].op == REOP_LPARENNON ||
1820 operatorStack[i].op == REOP_LPAREN) {
1821 break;
1822 }
1823 }
1824 ++state->cp;
1825
1826 /* Process everything on the stack until the open parenthesis. */
1827 for (;;) {
1828 assert(operatorSP);
1829 --operatorSP;
1830 switch (operatorStack[operatorSP].op) {
1831 case REOP_ASSERT:
1832 case REOP_ASSERT_NOT:
1833 case REOP_LPAREN:
1834 operand = NewRENode(state, operatorStack[operatorSP].op);
1835 if (!operand)
1836 goto out;
1837 operand->u.parenIndex =
1838 operatorStack[operatorSP].parenIndex;
1839 assert(operandSP);
1840 operand->kid = operandStack[operandSP - 1];
1841 operandStack[operandSP - 1] = operand;
1842 if (state->treeDepth == TREE_DEPTH_MAX) {
1843 ReportRegExpError(state, JSREPORT_ERROR,
1844 JSMSG_REGEXP_TOO_COMPLEX);
1845 goto out;
1846 }
1847 ++state->treeDepth;
1848 /* FALL THROUGH */
1849
1850 case REOP_LPARENNON:
1851 state->result = operandStack[operandSP - 1];
1852 if (!ParseQuantifier(state))
1853 goto out;
1854 operandStack[operandSP - 1] = state->result;
1855 goto restartOperator;
1856 default:
1857 if (!ProcessOp(state, &operatorStack[operatorSP],
1858 operandStack, operandSP))
1859 goto out;
1860 --operandSP;
1861 break;
1862 }
1863 }
1864 break;
1865
1866 case '{':
1867 {
1868 const WCHAR *errp = state->cp;
1869
1870 if (ParseMinMaxQuantifier(state, TRUE) < 0) {
1871 /*
1872 * This didn't even scan correctly as a quantifier, so we should
1873 * treat it as flat.
1874 */
1875 op = REOP_CONCAT;
1876 goto pushOperator;
1877 }
1878
1879 state->cp = errp;
1880 /* FALL THROUGH */
1881 }
1882
1883 case '+':
1884 case '*':
1885 case '?':
1886 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
1887 state->cp);
1888 result = FALSE;
1889 goto out;
1890
1891 default:
1892 /* Anything else is the start of the next term. */
1893 op = REOP_CONCAT;
1894pushOperator:
1895 if (operatorSP == operatorStackSize) {
1896 REOpData *tmp;
1897 operatorStackSize += operatorStackSize;
1898 tmp = heap_realloc(operatorStack, sizeof(REOpData) * operatorStackSize);
1899 if (!tmp)
1900 goto out;
1901 operatorStack = tmp;
1902 }
1903 operatorStack[operatorSP].op = op;
1904 operatorStack[operatorSP].errPos = state->cp;
1905 operatorStack[operatorSP++].parenIndex = parenIndex;
1906 break;
1907 }
1908 }
1909out:
1910 heap_free(operatorStack);
1911 heap_free(operandStack);
1912 return result;
1913}
1914
1915/*
1916 * Save the current state of the match - the position in the input
1917 * text as well as the position in the bytecode. The state of any
1918 * parent expressions is also saved (preceding state).
1919 * Contents of parenCount parentheses from parenIndex are also saved.
1920 */
1921static REBackTrackData *
1924 size_t parenIndex, size_t parenCount)
1925{
1926 size_t i;
1928 (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
1929
1930 size_t sz = sizeof(REBackTrackData) +
1931 gData->stateStackTop * sizeof(REProgState) +
1932 parenCount * sizeof(RECapture);
1933
1934 ptrdiff_t btsize = gData->backTrackStackSize;
1935 ptrdiff_t btincr = ((char *)result + sz) -
1936 ((char *)gData->backTrackStack + btsize);
1937
1938 TRACE("\tBT_Push: %lu,%lu\n", (ULONG_PTR)parenIndex, (ULONG_PTR)parenCount);
1939
1940 JS_COUNT_OPERATION(gData->cx, JSOW_JUMP * (1 + parenCount));
1941 if (btincr > 0) {
1942 ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
1943
1944 JS_COUNT_OPERATION(gData->cx, JSOW_ALLOCATION);
1945 btincr = ((btincr+btsize-1)/btsize)*btsize;
1946 gData->backTrackStack = heap_pool_grow(gData->pool, gData->backTrackStack, btsize, btincr);
1947 if (!gData->backTrackStack) {
1949 gData->ok = FALSE;
1950 return NULL;
1951 }
1952 gData->backTrackStackSize = btsize + btincr;
1953 result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
1954 }
1955 gData->backTrackSP = result;
1956 result->sz = gData->cursz;
1957 gData->cursz = sz;
1958
1959 result->backtrack_op = op;
1960 result->backtrack_pc = target;
1961 result->cp = cp;
1962 result->parenCount = parenCount;
1963 result->parenIndex = parenIndex;
1964
1965 result->saveStateStackTop = gData->stateStackTop;
1966 assert(gData->stateStackTop);
1967 memcpy(result + 1, gData->stateStack,
1968 sizeof(REProgState) * result->saveStateStackTop);
1969
1970 if (parenCount != 0) {
1971 memcpy((char *)(result + 1) +
1972 sizeof(REProgState) * result->saveStateStackTop,
1973 &x->parens[parenIndex],
1974 sizeof(RECapture) * parenCount);
1975 for (i = 0; i != parenCount; i++)
1976 x->parens[parenIndex + i].index = -1;
1977 }
1978
1979 return result;
1980}
1981
1982static inline match_state_t *
1983FlatNIMatcher(REGlobalData *gData, match_state_t *x, const WCHAR *matchChars,
1984 size_t length)
1985{
1986 size_t i;
1987 assert(gData->cpend >= x->cp);
1988 if (length > (size_t)(gData->cpend - x->cp))
1989 return NULL;
1990 for (i = 0; i != length; i++) {
1991 if (towupper(matchChars[i]) != towupper(x->cp[i]))
1992 return NULL;
1993 }
1994 x->cp += length;
1995 return x;
1996}
1997
1998/*
1999 * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2000 * 2. If E is not a character then go to step 6.
2001 * 3. Let ch be E's character.
2002 * 4. Let A be a one-element RECharSet containing the character ch.
2003 * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2004 * 6. E must be an integer. Let n be that integer.
2005 * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2006 * 8. Return an internal Matcher closure that takes two arguments, a State x
2007 * and a Continuation c, and performs the following:
2008 * 1. Let cap be x's captures internal array.
2009 * 2. Let s be cap[n].
2010 * 3. If s is undefined, then call c(x) and return its result.
2011 * 4. Let e be x's endIndex.
2012 * 5. Let len be s's length.
2013 * 6. Let f be e+len.
2014 * 7. If f>InputLength, return failure.
2015 * 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2016 * such that Canonicalize(s[i]) is not the same character as
2017 * Canonicalize(Input [e+i]), then return failure.
2018 * 9. Let y be the State (f, cap).
2019 * 10. Call c(y) and return its result.
2020 */
2021static match_state_t *
2022BackrefMatcher(REGlobalData *gData, match_state_t *x, size_t parenIndex)
2023{
2024 size_t len, i;
2025 const WCHAR *parenContent;
2026 RECapture *cap = &x->parens[parenIndex];
2027
2028 if (cap->index == -1)
2029 return x;
2030
2031 len = cap->length;
2032 if (x->cp + len > gData->cpend)
2033 return NULL;
2034
2035 parenContent = &gData->cpbegin[cap->index];
2036 if (gData->regexp->flags & REG_FOLD) {
2037 for (i = 0; i < len; i++) {
2038 if (towupper(parenContent[i]) != towupper(x->cp[i]))
2039 return NULL;
2040 }
2041 } else {
2042 for (i = 0; i < len; i++) {
2043 if (parenContent[i] != x->cp[i])
2044 return NULL;
2045 }
2046 }
2047 x->cp += len;
2048 return x;
2049}
2050
2051/* Add a single character to the RECharSet */
2052static void
2054{
2055 UINT byteIndex = (UINT)(c >> 3);
2056 assert(c <= cs->length);
2057 cs->u.bits[byteIndex] |= 1 << (c & 0x7);
2058}
2059
2060
2061/* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2062static void
2064{
2065 UINT i;
2066
2067 UINT byteIndex1 = c1 >> 3;
2068 UINT byteIndex2 = c2 >> 3;
2069
2070 assert(c2 <= cs->length && c1 <= c2);
2071
2072 c1 &= 0x7;
2073 c2 &= 0x7;
2074
2075 if (byteIndex1 == byteIndex2) {
2076 cs->u.bits[byteIndex1] |= ((BYTE)0xFF >> (7 - (c2 - c1))) << c1;
2077 } else {
2078 cs->u.bits[byteIndex1] |= 0xFF << c1;
2079 for (i = byteIndex1 + 1; i < byteIndex2; i++)
2080 cs->u.bits[i] = 0xFF;
2081 cs->u.bits[byteIndex2] |= (BYTE)0xFF >> (7 - c2);
2082 }
2083}
2084
2085/* Compile the source of the class into a RECharSet */
2086static BOOL
2088{
2089 const WCHAR *src, *end;
2090 BOOL inRange = FALSE;
2091 WCHAR rangeStart = 0;
2092 UINT byteLength, n;
2093 WCHAR c, thisCh;
2094 INT nDigits, i;
2095
2096 assert(!charSet->converted);
2097 /*
2098 * Assert that startIndex and length points to chars inside [] inside
2099 * source string.
2100 */
2101 assert(1 <= charSet->u.src.startIndex);
2102 assert(charSet->u.src.startIndex < gData->regexp->source_len);
2103 assert(charSet->u.src.length <= gData->regexp->source_len
2104 - 1 - charSet->u.src.startIndex);
2105
2106 charSet->converted = TRUE;
2107 src = gData->regexp->source + charSet->u.src.startIndex;
2108
2109 end = src + charSet->u.src.length;
2110
2111 assert(src[-1] == '[' && end[0] == ']');
2112
2113 byteLength = (charSet->length >> 3) + 1;
2114 charSet->u.bits = heap_alloc(byteLength);
2115 if (!charSet->u.bits) {
2116 JS_ReportOutOfMemory(gData->cx);
2117 gData->ok = FALSE;
2118 return FALSE;
2119 }
2120 memset(charSet->u.bits, 0, byteLength);
2121
2122 if (src == end)
2123 return TRUE;
2124
2125 if (*src == '^') {
2126 assert(charSet->sense == FALSE);
2127 ++src;
2128 } else {
2129 assert(charSet->sense == TRUE);
2130 }
2131
2132 while (src != end) {
2133 switch (*src) {
2134 case '\\':
2135 ++src;
2136 c = *src++;
2137 switch (c) {
2138 case 'b':
2139 thisCh = 0x8;
2140 break;
2141 case 'f':
2142 thisCh = 0xC;
2143 break;
2144 case 'n':
2145 thisCh = 0xA;
2146 break;
2147 case 'r':
2148 thisCh = 0xD;
2149 break;
2150 case 't':
2151 thisCh = 0x9;
2152 break;
2153 case 'v':
2154 thisCh = 0xB;
2155 break;
2156 case 'c':
2157 if (src < end && JS_ISWORD(*src)) {
2158 thisCh = (WCHAR)(*src++ & 0x1F);
2159 } else {
2160 --src;
2161 thisCh = '\\';
2162 }
2163 break;
2164 case 'x':
2165 nDigits = 2;
2166 goto lexHex;
2167 case 'u':
2168 nDigits = 4;
2169 lexHex:
2170 n = 0;
2171 for (i = 0; (i < nDigits) && (src < end); i++) {
2172 UINT digit;
2173 c = *src++;
2174 if (!isASCIIHexDigit(c, &digit)) {
2175 /*
2176 * Back off to accepting the original '\'
2177 * as a literal
2178 */
2179 src -= i + 1;
2180 n = '\\';
2181 break;
2182 }
2183 n = (n << 4) | digit;
2184 }
2185 thisCh = (WCHAR)n;
2186 break;
2187 case '0':
2188 case '1':
2189 case '2':
2190 case '3':
2191 case '4':
2192 case '5':
2193 case '6':
2194 case '7':
2195 /*
2196 * This is a non-ECMA extension - decimal escapes (in this
2197 * case, octal!) are supposed to be an error inside class
2198 * ranges, but supported here for backwards compatibility.
2199 */
2200 n = JS7_UNDEC(c);
2201 c = *src;
2202 if ('0' <= c && c <= '7') {
2203 src++;
2204 n = 8 * n + JS7_UNDEC(c);
2205 c = *src;
2206 if ('0' <= c && c <= '7') {
2207 src++;
2208 i = 8 * n + JS7_UNDEC(c);
2209 if (i <= 0377)
2210 n = i;
2211 else
2212 src--;
2213 }
2214 }
2215 thisCh = (WCHAR)n;
2216 break;
2217
2218 case 'd':
2219 AddCharacterRangeToCharSet(charSet, '0', '9');
2220 continue; /* don't need range processing */
2221 case 'D':
2222 AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2224 (WCHAR)('9' + 1),
2225 (WCHAR)charSet->length);
2226 continue;
2227 case 's':
2228 for (i = (INT)charSet->length; i >= 0; i--)
2229 if (iswspace(i))
2230 AddCharacterToCharSet(charSet, (WCHAR)i);
2231 continue;
2232 case 'S':
2233 for (i = (INT)charSet->length; i >= 0; i--)
2234 if (!iswspace(i))
2235 AddCharacterToCharSet(charSet, (WCHAR)i);
2236 continue;
2237 case 'w':
2238 for (i = (INT)charSet->length; i >= 0; i--)
2239 if (JS_ISWORD(i))
2240 AddCharacterToCharSet(charSet, (WCHAR)i);
2241 continue;
2242 case 'W':
2243 for (i = (INT)charSet->length; i >= 0; i--)
2244 if (!JS_ISWORD(i))
2245 AddCharacterToCharSet(charSet, (WCHAR)i);
2246 continue;
2247 default:
2248 thisCh = c;
2249 break;
2250
2251 }
2252 break;
2253
2254 default:
2255 thisCh = *src++;
2256 break;
2257
2258 }
2259 if (inRange) {
2260 if (gData->regexp->flags & REG_FOLD) {
2261 assert(rangeStart <= thisCh);
2262 for (i = rangeStart; i <= thisCh; i++) {
2263 WCHAR uch, dch;
2264
2265 AddCharacterToCharSet(charSet, i);
2266 uch = towupper(i);
2267 dch = towlower(i);
2268 if (i != uch)
2269 AddCharacterToCharSet(charSet, uch);
2270 if (i != dch)
2271 AddCharacterToCharSet(charSet, dch);
2272 }
2273 } else {
2274 AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2275 }
2276 inRange = FALSE;
2277 } else {
2278 if (gData->regexp->flags & REG_FOLD) {
2279 AddCharacterToCharSet(charSet, towupper(thisCh));
2280 AddCharacterToCharSet(charSet, towlower(thisCh));
2281 } else {
2282 AddCharacterToCharSet(charSet, thisCh);
2283 }
2284 if (src < end - 1) {
2285 if (*src == '-') {
2286 ++src;
2287 inRange = TRUE;
2288 rangeStart = thisCh;
2289 }
2290 }
2291 }
2292 }
2293 return TRUE;
2294}
2295
2296static BOOL
2298{
2299 size_t limit = gData->stateStackLimit;
2300 size_t sz = sizeof(REProgState) * limit;
2301
2302 gData->stateStack = heap_pool_grow(gData->pool, gData->stateStack, sz, sz);
2303 if (!gData->stateStack) {
2305 gData->ok = FALSE;
2306 return FALSE;
2307 }
2308 gData->stateStackLimit = limit + limit;
2309 return TRUE;
2310}
2311
2312#define PUSH_STATE_STACK(data) \
2313 do { \
2314 ++(data)->stateStackTop; \
2315 if ((data)->stateStackTop == (data)->stateStackLimit && \
2316 !ReallocStateStack((data))) { \
2317 return NULL; \
2318 } \
2319 }while(0)
2320
2321/*
2322 * Apply the current op against the given input to see if it's going to match
2323 * or fail. Return false if we don't get a match, true if we do. If updatecp is
2324 * true, then update the current state's cp. Always update startpc to the next
2325 * op.
2326 */
2327static inline match_state_t *
2329 jsbytecode **startpc, BOOL updatecp)
2330{
2332 WCHAR matchCh;
2333 size_t parenIndex;
2334 size_t offset, length, index;
2335 jsbytecode *pc = *startpc; /* pc has already been incremented past op */
2336 const WCHAR *source;
2337 const WCHAR *startcp = x->cp;
2338 WCHAR ch;
2339 RECharSet *charSet;
2340
2341 const char *opname = reop_names[op];
2342 TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
2343 (int)gData->stateStackTop * 2, "", opname);
2344
2345 switch (op) {
2346 case REOP_EMPTY:
2347 result = x;
2348 break;
2349 case REOP_BOL:
2350 if (x->cp != gData->cpbegin) {
2351 if (
2352 !(gData->regexp->flags & REG_MULTILINE)) {
2353 break;
2354 }
2355 if (!RE_IS_LINE_TERM(x->cp[-1]))
2356 break;
2357 }
2358 result = x;
2359 break;
2360 case REOP_EOL:
2361 if (x->cp != gData->cpend) {
2362 if (
2363 !(gData->regexp->flags & REG_MULTILINE)) {
2364 break;
2365 }
2366 if (!RE_IS_LINE_TERM(*x->cp))
2367 break;
2368 }
2369 result = x;
2370 break;
2371 case REOP_WBDRY:
2372 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2373 !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2374 result = x;
2375 }
2376 break;
2377 case REOP_WNONBDRY:
2378 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2379 (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2380 result = x;
2381 }
2382 break;
2383 case REOP_DOT:
2384 if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
2385 result = x;
2386 result->cp++;
2387 }
2388 break;
2389 case REOP_DIGIT:
2390 if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) {
2391 result = x;
2392 result->cp++;
2393 }
2394 break;
2395 case REOP_NONDIGIT:
2396 if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) {
2397 result = x;
2398 result->cp++;
2399 }
2400 break;
2401 case REOP_ALNUM:
2402 if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
2403 result = x;
2404 result->cp++;
2405 }
2406 break;
2407 case REOP_NONALNUM:
2408 if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
2409 result = x;
2410 result->cp++;
2411 }
2412 break;
2413 case REOP_SPACE:
2414 if (x->cp != gData->cpend && iswspace(*x->cp)) {
2415 result = x;
2416 result->cp++;
2417 }
2418 break;
2419 case REOP_NONSPACE:
2420 if (x->cp != gData->cpend && !iswspace(*x->cp)) {
2421 result = x;
2422 result->cp++;
2423 }
2424 break;
2425 case REOP_BACKREF:
2426 pc = ReadCompactIndex(pc, &parenIndex);
2427 assert(parenIndex < gData->regexp->parenCount);
2428 result = BackrefMatcher(gData, x, parenIndex);
2429 break;
2430 case REOP_FLAT:
2431 pc = ReadCompactIndex(pc, &offset);
2432 assert(offset < gData->regexp->source_len);
2433 pc = ReadCompactIndex(pc, &length);
2434 assert(1 <= length);
2435 assert(length <= gData->regexp->source_len - offset);
2436 if (length <= (size_t)(gData->cpend - x->cp)) {
2437 source = gData->regexp->source + offset;
2438 TRACE("%s\n", debugstr_wn(source, length));
2439 for (index = 0; index != length; index++) {
2440 if (source[index] != x->cp[index])
2441 return NULL;
2442 }
2443 x->cp += length;
2444 result = x;
2445 }
2446 break;
2447 case REOP_FLAT1:
2448 matchCh = *pc++;
2449 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2450 if (x->cp != gData->cpend && *x->cp == matchCh) {
2451 result = x;
2452 result->cp++;
2453 }
2454 break;
2455 case REOP_FLATi:
2456 pc = ReadCompactIndex(pc, &offset);
2457 assert(offset < gData->regexp->source_len);
2458 pc = ReadCompactIndex(pc, &length);
2459 assert(1 <= length);
2460 assert(length <= gData->regexp->source_len - offset);
2461 source = gData->regexp->source;
2462 result = FlatNIMatcher(gData, x, source + offset, length);
2463 break;
2464 case REOP_FLAT1i:
2465 matchCh = *pc++;
2466 if (x->cp != gData->cpend && towupper(*x->cp) == towupper(matchCh)) {
2467 result = x;
2468 result->cp++;
2469 }
2470 break;
2471 case REOP_UCFLAT1:
2472 matchCh = GET_ARG(pc);
2473 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2474 pc += ARG_LEN;
2475 if (x->cp != gData->cpend && *x->cp == matchCh) {
2476 result = x;
2477 result->cp++;
2478 }
2479 break;
2480 case REOP_UCFLAT1i:
2481 matchCh = GET_ARG(pc);
2482 pc += ARG_LEN;
2483 if (x->cp != gData->cpend && towupper(*x->cp) == towupper(matchCh)) {
2484 result = x;
2485 result->cp++;
2486 }
2487 break;
2488 case REOP_CLASS:
2489 pc = ReadCompactIndex(pc, &index);
2490 assert(index < gData->regexp->classCount);
2491 if (x->cp != gData->cpend) {
2492 charSet = &gData->regexp->classList[index];
2493 assert(charSet->converted);
2494 ch = *x->cp;
2495 index = ch >> 3;
2496 if (charSet->length != 0 &&
2497 ch <= charSet->length &&
2498 (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2499 result = x;
2500 result->cp++;
2501 }
2502 }
2503 break;
2504 case REOP_NCLASS:
2505 pc = ReadCompactIndex(pc, &index);
2506 assert(index < gData->regexp->classCount);
2507 if (x->cp != gData->cpend) {
2508 charSet = &gData->regexp->classList[index];
2509 assert(charSet->converted);
2510 ch = *x->cp;
2511 index = ch >> 3;
2512 if (charSet->length == 0 ||
2513 ch > charSet->length ||
2514 !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2515 result = x;
2516 result->cp++;
2517 }
2518 }
2519 break;
2520
2521 default:
2522 assert(FALSE);
2523 }
2524 if (result) {
2525 if (!updatecp)
2526 x->cp = startcp;
2527 *startpc = pc;
2528 TRACE(" *\n");
2529 return result;
2530 }
2531 x->cp = startcp;
2532 return NULL;
2533}
2534
2535static inline match_state_t *
2537{
2539 REBackTrackData *backTrackData;
2540 jsbytecode *nextpc, *testpc;
2541 REOp nextop;
2542 RECapture *cap;
2543 REProgState *curState;
2544 const WCHAR *startcp;
2545 size_t parenIndex, k;
2546 size_t parenSoFar = 0;
2547
2548 WCHAR matchCh1, matchCh2;
2549 RECharSet *charSet;
2550
2551 BOOL anchor;
2552 jsbytecode *pc = gData->regexp->program;
2553 REOp op = (REOp) *pc++;
2554
2555 /*
2556 * If the first node is a simple match, step the index into the string
2557 * until that match is made, or fail if it can't be found at all.
2558 */
2559 if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & REG_STICKY)) {
2560 anchor = FALSE;
2561 while (x->cp <= gData->cpend) {
2562 nextpc = pc; /* reset back to start each time */
2563 result = SimpleMatch(gData, x, op, &nextpc, TRUE);
2564 if (result) {
2565 anchor = TRUE;
2566 x = result;
2567 pc = nextpc; /* accept skip to next opcode */
2568 op = (REOp) *pc++;
2569 assert(op < REOP_LIMIT);
2570 break;
2571 }
2572 gData->skipped++;
2573 x->cp++;
2574 }
2575 if (!anchor)
2576 goto bad;
2577 }
2578
2579 for (;;) {
2580 const char *opname = reop_names[op];
2581 TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
2582 (int)gData->stateStackTop * 2, "", opname);
2583
2584 if (REOP_IS_SIMPLE(op)) {
2585 result = SimpleMatch(gData, x, op, &pc, TRUE);
2586 } else {
2587 curState = &gData->stateStack[gData->stateStackTop];
2588 switch (op) {
2589 case REOP_END:
2590 goto good;
2591 case REOP_ALTPREREQ2:
2592 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2593 pc += ARG_LEN;
2594 matchCh2 = GET_ARG(pc);
2595 pc += ARG_LEN;
2596 k = GET_ARG(pc);
2597 pc += ARG_LEN;
2598
2599 if (x->cp != gData->cpend) {
2600 if (*x->cp == matchCh2)
2601 goto doAlt;
2602
2603 charSet = &gData->regexp->classList[k];
2604 if (!charSet->converted && !ProcessCharSet(gData, charSet))
2605 goto bad;
2606 matchCh1 = *x->cp;
2607 k = matchCh1 >> 3;
2608 if ((charSet->length == 0 ||
2609 matchCh1 > charSet->length ||
2610 !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
2611 charSet->sense) {
2612 goto doAlt;
2613 }
2614 }
2615 result = NULL;
2616 break;
2617
2618 case REOP_ALTPREREQ:
2619 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2620 pc += ARG_LEN;
2621 matchCh1 = GET_ARG(pc);
2622 pc += ARG_LEN;
2623 matchCh2 = GET_ARG(pc);
2624 pc += ARG_LEN;
2625 if (x->cp == gData->cpend ||
2626 (*x->cp != matchCh1 && *x->cp != matchCh2)) {
2627 result = NULL;
2628 break;
2629 }
2630 /* else fall through... */
2631
2632 case REOP_ALT:
2633 doAlt:
2634 nextpc = pc + GET_OFFSET(pc); /* start of next alternate */
2635 pc += ARG_LEN; /* start of this alternate */
2636 curState->parenSoFar = parenSoFar;
2637 PUSH_STATE_STACK(gData);
2638 op = (REOp) *pc++;
2639 startcp = x->cp;
2640 if (REOP_IS_SIMPLE(op)) {
2641 if (!SimpleMatch(gData, x, op, &pc, TRUE)) {
2642 op = (REOp) *nextpc++;
2643 pc = nextpc;
2644 continue;
2645 }
2646 result = x;
2647 op = (REOp) *pc++;
2648 }
2649 nextop = (REOp) *nextpc++;
2650 if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
2651 goto bad;
2652 continue;
2653
2654 /*
2655 * Occurs at (successful) end of REOP_ALT,
2656 */
2657 case REOP_JUMP:
2658 /*
2659 * If we have not gotten a result here, it is because of an
2660 * empty match. Do the same thing REOP_EMPTY would do.
2661 */
2662 if (!result)
2663 result = x;
2664
2665 --gData->stateStackTop;
2666 pc += GET_OFFSET(pc);
2667 op = (REOp) *pc++;
2668 continue;
2669
2670 /*
2671 * Occurs at last (successful) end of REOP_ALT,
2672 */
2673 case REOP_ENDALT:
2674 /*
2675 * If we have not gotten a result here, it is because of an
2676 * empty match. Do the same thing REOP_EMPTY would do.
2677 */
2678 if (!result)
2679 result = x;
2680
2681 --gData->stateStackTop;
2682 op = (REOp) *pc++;
2683 continue;
2684
2685 case REOP_LPAREN:
2686 pc = ReadCompactIndex(pc, &parenIndex);
2687 TRACE("[ %lu ]\n", (ULONG_PTR)parenIndex);
2688 assert(parenIndex < gData->regexp->parenCount);
2689 if (parenIndex + 1 > parenSoFar)
2690 parenSoFar = parenIndex + 1;
2691 x->parens[parenIndex].index = x->cp - gData->cpbegin;
2692 x->parens[parenIndex].length = 0;
2693 op = (REOp) *pc++;
2694 continue;
2695
2696 case REOP_RPAREN:
2697 {
2698 ptrdiff_t delta;
2699
2700 pc = ReadCompactIndex(pc, &parenIndex);
2701 assert(parenIndex < gData->regexp->parenCount);
2702 cap = &x->parens[parenIndex];
2703 delta = x->cp - (gData->cpbegin + cap->index);
2704 cap->length = (delta < 0) ? 0 : (size_t) delta;
2705 op = (REOp) *pc++;
2706
2707 if (!result)
2708 result = x;
2709 continue;
2710 }
2711 case REOP_ASSERT:
2712 nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */
2713 pc += ARG_LEN; /* start of ASSERT child */
2714 op = (REOp) *pc++;
2715 testpc = pc;
2716 if (REOP_IS_SIMPLE(op) &&
2717 !SimpleMatch(gData, x, op, &testpc, FALSE)) {
2718 result = NULL;
2719 break;
2720 }
2721 curState->u.assertion.top =
2722 (char *)gData->backTrackSP - (char *)gData->backTrackStack;
2723 curState->u.assertion.sz = gData->cursz;
2724 curState->index = x->cp - gData->cpbegin;
2725 curState->parenSoFar = parenSoFar;
2726 PUSH_STATE_STACK(gData);
2728 nextpc, x, x->cp, 0, 0)) {
2729 goto bad;
2730 }
2731 continue;
2732
2733 case REOP_ASSERT_NOT:
2734 nextpc = pc + GET_OFFSET(pc);
2735 pc += ARG_LEN;
2736 op = (REOp) *pc++;
2737 testpc = pc;
2738 if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
2739 SimpleMatch(gData, x, op, &testpc, FALSE) &&
2740 *testpc == REOP_ASSERTNOTTEST) {
2741 result = NULL;
2742 break;
2743 }
2744 curState->u.assertion.top
2745 = (char *)gData->backTrackSP -
2746 (char *)gData->backTrackStack;
2747 curState->u.assertion.sz = gData->cursz;
2748 curState->index = x->cp - gData->cpbegin;
2749 curState->parenSoFar = parenSoFar;
2750 PUSH_STATE_STACK(gData);
2752 nextpc, x, x->cp, 0, 0)) {
2753 goto bad;
2754 }
2755 continue;
2756
2757 case REOP_ASSERTTEST:
2758 --gData->stateStackTop;
2759 --curState;
2760 x->cp = gData->cpbegin + curState->index;
2761 gData->backTrackSP =
2762 (REBackTrackData *) ((char *)gData->backTrackStack +
2763 curState->u.assertion.top);
2764 gData->cursz = curState->u.assertion.sz;
2765 if (result)
2766 result = x;
2767 break;
2768
2769 case REOP_ASSERTNOTTEST:
2770 --gData->stateStackTop;
2771 --curState;
2772 x->cp = gData->cpbegin + curState->index;
2773 gData->backTrackSP =
2774 (REBackTrackData *) ((char *)gData->backTrackStack +
2775 curState->u.assertion.top);
2776 gData->cursz = curState->u.assertion.sz;
2777 result = (!result) ? x : NULL;
2778 break;
2779 case REOP_STAR:
2780 curState->u.quantifier.min = 0;
2781 curState->u.quantifier.max = (UINT)-1;
2782 goto quantcommon;
2783 case REOP_PLUS:
2784 curState->u.quantifier.min = 1;
2785 curState->u.quantifier.max = (UINT)-1;
2786 goto quantcommon;
2787 case REOP_OPT:
2788 curState->u.quantifier.min = 0;
2789 curState->u.quantifier.max = 1;
2790 goto quantcommon;
2791 case REOP_QUANT:
2792 pc = ReadCompactIndex(pc, &k);
2793 curState->u.quantifier.min = k;
2794 pc = ReadCompactIndex(pc, &k);
2795 /* max is k - 1 to use one byte for (UINT)-1 sentinel. */
2796 curState->u.quantifier.max = k - 1;
2797 assert(curState->u.quantifier.min <= curState->u.quantifier.max);
2798 quantcommon:
2799 if (curState->u.quantifier.max == 0) {
2800 pc = pc + GET_OFFSET(pc);
2801 op = (REOp) *pc++;
2802 result = x;
2803 continue;
2804 }
2805 /* Step over <next> */
2806 nextpc = pc + ARG_LEN;
2807 op = (REOp) *nextpc++;
2808 startcp = x->cp;
2809 if (REOP_IS_SIMPLE(op)) {
2810 if (!SimpleMatch(gData, x, op, &nextpc, TRUE)) {
2811 if (curState->u.quantifier.min == 0)
2812 result = x;
2813 else
2814 result = NULL;
2815 pc = pc + GET_OFFSET(pc);
2816 break;
2817 }
2818 op = (REOp) *nextpc++;
2819 result = x;
2820 }
2821 curState->index = startcp - gData->cpbegin;
2822 curState->continue_op = REOP_REPEAT;
2823 curState->continue_pc = pc;
2824 curState->parenSoFar = parenSoFar;
2825 PUSH_STATE_STACK(gData);
2826 if (curState->u.quantifier.min == 0 &&
2827 !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
2828 0, 0)) {
2829 goto bad;
2830 }
2831 pc = nextpc;
2832 continue;
2833
2834 case REOP_ENDCHILD: /* marks the end of a quantifier child */
2835 pc = curState[-1].continue_pc;
2836 op = (REOp) curState[-1].continue_op;
2837
2838 if (!result)
2839 result = x;
2840 continue;
2841
2842 case REOP_REPEAT:
2843 --curState;
2844 do {
2845 --gData->stateStackTop;
2846 if (!result) {
2847 /* Failed, see if we have enough children. */
2848 if (curState->u.quantifier.min == 0)
2849 goto repeatDone;
2850 goto break_switch;
2851 }
2852 if (curState->u.quantifier.min == 0 &&
2853 x->cp == gData->cpbegin + curState->index) {
2854 /* matched an empty string, that'll get us nowhere */
2855 result = NULL;
2856 goto break_switch;
2857 }
2858 if (curState->u.quantifier.min != 0)
2859 curState->u.quantifier.min--;
2860 if (curState->u.quantifier.max != (UINT) -1)
2861 curState->u.quantifier.max--;
2862 if (curState->u.quantifier.max == 0)
2863 goto repeatDone;
2864 nextpc = pc + ARG_LEN;
2865 nextop = (REOp) *nextpc;
2866 startcp = x->cp;
2867 if (REOP_IS_SIMPLE(nextop)) {
2868 nextpc++;
2869 if (!SimpleMatch(gData, x, nextop, &nextpc, TRUE)) {
2870 if (curState->u.quantifier.min == 0)
2871 goto repeatDone;
2872 result = NULL;
2873 goto break_switch;
2874 }
2875 result = x;
2876 }
2877 curState->index = startcp - gData->cpbegin;
2878 PUSH_STATE_STACK(gData);
2879 if (curState->u.quantifier.min == 0 &&
2881 pc, x, startcp,
2882 curState->parenSoFar,
2883 parenSoFar -
2884 curState->parenSoFar)) {
2885 goto bad;
2886 }
2887 } while (*nextpc == REOP_ENDCHILD);
2888 pc = nextpc;
2889 op = (REOp) *pc++;
2890 parenSoFar = curState->parenSoFar;
2891 continue;
2892
2893 repeatDone:
2894 result = x;
2895 pc += GET_OFFSET(pc);
2896 goto break_switch;
2897
2898 case REOP_MINIMALSTAR:
2899 curState->u.quantifier.min = 0;
2900 curState->u.quantifier.max = (UINT)-1;
2901 goto minimalquantcommon;
2902 case REOP_MINIMALPLUS:
2903 curState->u.quantifier.min = 1;
2904 curState->u.quantifier.max = (UINT)-1;
2905 goto minimalquantcommon;
2906 case REOP_MINIMALOPT:
2907 curState->u.quantifier.min = 0;
2908 curState->u.quantifier.max = 1;
2909 goto minimalquantcommon;
2910 case REOP_MINIMALQUANT:
2911 pc = ReadCompactIndex(pc, &k);
2912 curState->u.quantifier.min = k;
2913 pc = ReadCompactIndex(pc, &k);
2914 /* See REOP_QUANT comments about k - 1. */
2915 curState->u.quantifier.max = k - 1;
2916 assert(curState->u.quantifier.min
2917 <= curState->u.quantifier.max);
2918 minimalquantcommon:
2919 curState->index = x->cp - gData->cpbegin;
2920 curState->parenSoFar = parenSoFar;
2921 PUSH_STATE_STACK(gData);
2922 if (curState->u.quantifier.min != 0) {
2923 curState->continue_op = REOP_MINIMALREPEAT;
2924 curState->continue_pc = pc;
2925 /* step over <next> */
2926 pc += OFFSET_LEN;
2927 op = (REOp) *pc++;
2928 } else {
2930 pc, x, x->cp, 0, 0)) {
2931 goto bad;
2932 }
2933 --gData->stateStackTop;
2934 pc = pc + GET_OFFSET(pc);
2935 op = (REOp) *pc++;
2936 }
2937 continue;
2938
2939 case REOP_MINIMALREPEAT:
2940 --gData->stateStackTop;
2941 --curState;
2942
2943 TRACE("{%d,%d}\n", curState->u.quantifier.min, curState->u.quantifier.max);
2944#define PREPARE_REPEAT() \
2945 do { \
2946 curState->index = x->cp - gData->cpbegin; \
2947 curState->continue_op = REOP_MINIMALREPEAT; \
2948 curState->continue_pc = pc; \
2949 pc += ARG_LEN; \
2950 for (k = curState->parenSoFar; k < parenSoFar; k++) \
2951 x->parens[k].index = -1; \
2952 PUSH_STATE_STACK(gData); \
2953 op = (REOp) *pc++; \
2954 assert(op < REOP_LIMIT); \
2955 }while(0)
2956
2957 if (!result) {
2958 TRACE(" -\n");
2959 /*
2960 * Non-greedy failure - try to consume another child.
2961 */
2962 if (curState->u.quantifier.max == (UINT) -1 ||
2963 curState->u.quantifier.max > 0) {
2965 continue;
2966 }
2967 /* Don't need to adjust pc since we're going to pop. */
2968 break;
2969 }
2970 if (curState->u.quantifier.min == 0 &&
2971 x->cp == gData->cpbegin + curState->index) {
2972 /* Matched an empty string, that'll get us nowhere. */
2973 result = NULL;
2974 break;
2975 }
2976 if (curState->u.quantifier.min != 0)
2977 curState->u.quantifier.min--;
2978 if (curState->u.quantifier.max != (UINT) -1)
2979 curState->u.quantifier.max--;
2980 if (curState->u.quantifier.min != 0) {
2982 continue;
2983 }
2984 curState->index = x->cp - gData->cpbegin;
2985 curState->parenSoFar = parenSoFar;
2986 PUSH_STATE_STACK(gData);
2988 pc, x, x->cp,
2989 curState->parenSoFar,
2990 parenSoFar - curState->parenSoFar)) {
2991 goto bad;
2992 }
2993 --gData->stateStackTop;
2994 pc = pc + GET_OFFSET(pc);
2995 op = (REOp) *pc++;
2996 assert(op < REOP_LIMIT);
2997 continue;
2998 default:
2999 assert(FALSE);
3000 result = NULL;
3001 }
3002 break_switch:;
3003 }
3004
3005 /*
3006 * If the match failed and there's a backtrack option, take it.
3007 * Otherwise this is a complete and utter failure.
3008 */
3009 if (!result) {
3010 if (gData->cursz == 0)
3011 return NULL;
3012
3013 /* Potentially detect explosive regex here. */
3014 gData->backTrackCount++;
3015 if (gData->backTrackLimit &&
3016 gData->backTrackCount >= gData->backTrackLimit) {
3017 JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL,
3018 JSMSG_REGEXP_TOO_COMPLEX);
3019 gData->ok = FALSE;
3020 return NULL;
3021 }
3022
3023 backTrackData = gData->backTrackSP;
3024 gData->cursz = backTrackData->sz;
3025 gData->backTrackSP =
3026 (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
3027 x->cp = backTrackData->cp;
3028 pc = backTrackData->backtrack_pc;
3029 op = (REOp) backTrackData->backtrack_op;
3030 assert(op < REOP_LIMIT);
3031 gData->stateStackTop = backTrackData->saveStateStackTop;
3032 assert(gData->stateStackTop);
3033
3034 memcpy(gData->stateStack, backTrackData + 1,
3035 sizeof(REProgState) * backTrackData->saveStateStackTop);
3036 curState = &gData->stateStack[gData->stateStackTop - 1];
3037
3038 if (backTrackData->parenCount) {
3039 memcpy(&x->parens[backTrackData->parenIndex],
3040 (char *)(backTrackData + 1) +
3041 sizeof(REProgState) * backTrackData->saveStateStackTop,
3042 sizeof(RECapture) * backTrackData->parenCount);
3043 parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
3044 } else {
3045 for (k = curState->parenSoFar; k < parenSoFar; k++)
3046 x->parens[k].index = -1;
3047 parenSoFar = curState->parenSoFar;
3048 }
3049
3050 TRACE("\tBT_Pop: %ld,%ld\n",
3051 (ULONG_PTR)backTrackData->parenIndex,
3052 (ULONG_PTR)backTrackData->parenCount);
3053 continue;
3054 }
3055 x = result;
3056
3057 /*
3058 * Continue with the expression.
3059 */
3060 op = (REOp)*pc++;
3061 assert(op < REOP_LIMIT);
3062 }
3063
3064bad:
3065 TRACE("\n");
3066 return NULL;
3067
3068good:
3069 TRACE("\n");
3070 return x;
3071}
3072
3074{
3076 const WCHAR *cp = x->cp;
3077 const WCHAR *cp2;
3078 UINT j;
3079
3080 /*
3081 * Have to include the position beyond the last character
3082 * in order to detect end-of-input/line condition.
3083 */
3084 for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
3085 gData->skipped = cp2 - cp;
3086 x->cp = cp2;
3087 for (j = 0; j < gData->regexp->parenCount; j++)
3088 x->parens[j].index = -1;
3089 result = ExecuteREBytecode(gData, x);
3090 if (!gData->ok || result || (gData->regexp->flags & REG_STICKY))
3091 return result;
3092 gData->backTrackSP = gData->backTrackStack;
3093 gData->cursz = 0;
3094 gData->stateStackTop = 0;
3095 cp2 = cp + gData->skipped;
3096 }
3097 return NULL;
3098}
3099
3101{
3102 UINT i;
3103
3106 if (!gData->backTrackStack)
3107 goto bad;
3108
3109 gData->backTrackSP = gData->backTrackStack;
3110 gData->cursz = 0;
3111 gData->backTrackCount = 0;
3112 gData->backTrackLimit = 0;
3113
3115 gData->stateStack = heap_pool_alloc(gData->pool, sizeof(REProgState) * INITIAL_STATESTACK);
3116 if (!gData->stateStack)
3117 goto bad;
3118
3119 gData->stateStackTop = 0;
3120 gData->cx = cx;
3121 gData->pool = pool;
3122 gData->regexp = re;
3123 gData->ok = TRUE;
3124
3125 for (i = 0; i < re->classCount; i++) {
3126 if (!re->classList[i].converted &&
3127 !ProcessCharSet(gData, &re->classList[i])) {
3128 return E_FAIL;
3129 }
3130 }
3131
3132 return S_OK;
3133
3134bad:
3136 gData->ok = FALSE;
3137 return E_OUTOFMEMORY;
3138}
3139
3142{
3144 REGlobalData gData;
3146 const WCHAR *str_beg = result->cp;
3147 HRESULT hres;
3148
3149 assert(result->cp != NULL);
3150
3151 gData.cpbegin = str;
3152 gData.cpend = str+str_len;
3153 gData.start = result->cp-str;
3154 gData.skipped = 0;
3155 gData.pool = pool;
3156
3157 hres = InitMatch(regexp, cx, pool, &gData);
3158 if(FAILED(hres)) {
3159 WARN("InitMatch failed\n");
3160 heap_pool_clear(mark);
3161 return hres;
3162 }
3163
3164 res = MatchRegExp(&gData, result);
3165 heap_pool_clear(mark);
3166 if(!gData.ok) {
3167 WARN("MatchRegExp failed\n");
3168 return E_FAIL;
3169 }
3170
3171 if(!res) {
3172 result->match_len = 0;
3173 return S_FALSE;
3174 }
3175
3176 result->match_len = (result->cp-str_beg) - gData.skipped;
3177 result->paren_count = regexp->parenCount;
3178 return S_OK;
3179}
3180
3182{
3183 if (re->classList) {
3184 UINT i;
3185 for (i = 0; i < re->classCount; i++) {
3186 if (re->classList[i].converted)
3187 heap_free(re->classList[i].u.bits);
3188 re->classList[i].u.bits = NULL;
3189 }
3190 heap_free(re->classList);
3191 }
3192 heap_free(re);
3193}
3194
3196 DWORD str_len, WORD flags, BOOL flat)
3197{
3198 regexp_t *re;
3199 heap_pool_t *mark;
3201 size_t resize;
3202 jsbytecode *endPC;
3203 UINT i;
3204 size_t len;
3205
3206 re = NULL;
3207 mark = heap_pool_mark(pool);
3208 len = str_len;
3209
3210 state.context = cx;
3211 state.pool = pool;
3212 state.cp = str;
3213 if (!state.cp)
3214 goto out;
3215 state.cpbegin = state.cp;
3216 state.cpend = state.cp + len;
3217 state.flags = flags;
3218 state.parenCount = 0;
3219 state.classCount = 0;
3220 state.progLength = 0;
3221 state.treeDepth = 0;
3222 state.classBitmapsMem = 0;
3223 for (i = 0; i < CLASS_CACHE_SIZE; i++)
3224 state.classCache[i].start = NULL;
3225
3226 if (len != 0 && flat) {
3227 state.result = NewRENode(&state, REOP_FLAT);
3228 if (!state.result)
3229 goto out;
3230 state.result->u.flat.chr = *state.cpbegin;
3231 state.result->u.flat.length = len;
3232 state.result->kid = (void *) state.cpbegin;
3233 /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
3234 state.progLength += 1 + GetCompactIndexWidth(0)
3236 } else {
3237 if (!ParseRegExp(&state))
3238 goto out;
3239 }
3240 resize = offsetof(regexp_t, program) + state.progLength + 1;
3241 re = heap_alloc(resize);
3242 if (!re)
3243 goto out;
3244
3245 assert(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
3246 re->classCount = state.classCount;
3247 if (re->classCount) {
3248 re->classList = heap_alloc(re->classCount * sizeof(RECharSet));
3249 if (!re->classList) {
3250 regexp_destroy(re);
3251 re = NULL;
3252 goto out;
3253 }
3254 for (i = 0; i < re->classCount; i++)
3255 re->classList[i].converted = FALSE;
3256 } else {
3257 re->classList = NULL;
3258 }
3259 endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
3260 if (!endPC) {
3261 regexp_destroy(re);
3262 re = NULL;
3263 goto out;
3264 }
3265 *endPC++ = REOP_END;
3266 /*
3267 * Check whether size was overestimated and shrink using realloc.
3268 * This is safe since no pointers to newly parsed regexp or its parts
3269 * besides re exist here.
3270 */
3271 if ((size_t)(endPC - re->program) != state.progLength + 1) {
3272 regexp_t *tmp;
3273 assert((size_t)(endPC - re->program) < state.progLength + 1);
3274 resize = offsetof(regexp_t, program) + (endPC - re->program);
3275 tmp = heap_realloc(re, resize);
3276 if (tmp)
3277 re = tmp;
3278 }
3279
3280 re->flags = flags;
3281 re->parenCount = state.parenCount;
3282 re->source = str;
3283 re->source_len = str_len;
3284
3285out:
3286 heap_pool_clear(mark);
3287 return re;
3288}
3289
3291{
3292 if(((*regexp)->flags & REG_FOLD) != (flags & REG_FOLD)) {
3293 regexp_t *new_regexp = regexp_new(cx, pool, (*regexp)->source,
3294 (*regexp)->source_len, flags, FALSE);
3295
3296 if(!new_regexp)
3297 return E_FAIL;
3298
3299 regexp_destroy(*regexp);
3300 *regexp = new_regexp;
3301 }else {
3302 (*regexp)->flags = flags;
3303 }
3304
3305 return S_OK;
3306}
static int state
Definition: maze.c:121
static void * heap_alloc(size_t len)
Definition: appwiz.h:66
static BOOL heap_free(void *mem)
Definition: appwiz.h:76
static void * heap_realloc(void *mem, size_t len)
Definition: appwiz.h:71
#define WINE_DEFAULT_DEBUG_CHANNEL(t)
Definition: precomp.h:23
#define index(s, c)
Definition: various.h:29
#define WARN(fmt,...)
Definition: precomp.h:61
#define E_OUTOFMEMORY
Definition: ddrawi.h:100
#define E_FAIL
Definition: ddrawi.h:102
#define NULL
Definition: types.h:112
#define TRUE
Definition: types.h:120
#define FALSE
Definition: types.h:117
UINT op
Definition: effect.c:236
unsigned char uch
Definition: zlib.h:47
static void cleanup(void)
Definition: main.c:1335
#define assert(x)
Definition: debug.h:53
__kernel_ptrdiff_t ptrdiff_t
Definition: linux.h:247
unsigned int BOOL
Definition: ntddk_ex.h:94
unsigned long DWORD
Definition: ntddk_ex.h:95
unsigned short WORD
Definition: ntddk_ex.h:93
GLint GLint GLint GLint GLint x
Definition: gl.h:1548
GLuint GLuint end
Definition: gl.h:1545
GLdouble GLdouble t
Definition: gl.h:2047
GLint GLint GLsizei width
Definition: gl.h:1546
GLdouble n
Definition: glext.h:7729
GLuint res
Definition: glext.h:9613
GLenum src
Definition: glext.h:6340
const GLubyte * c
Definition: glext.h:8905
GLuint index
Definition: glext.h:6031
GLenum GLenum GLvoid GLvoid GLvoid * span
Definition: glext.h:5664
GLuint program
Definition: glext.h:6723
GLint limit
Definition: glext.h:10326
GLbitfield flags
Definition: glext.h:7161
GLuint GLsizei GLsizei * length
Definition: glext.h:6040
GLuint GLuint num
Definition: glext.h:9618
GLenum GLsizei len
Definition: glext.h:6722
GLuint64EXT * result
Definition: glext.h:11304
GLenum cap
Definition: glext.h:9639
GLintptr offset
Definition: glext.h:5920
GLenum target
Definition: glext.h:7315
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble * u
Definition: glfuncs.h:240
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint GLint GLint j
Definition: glfuncs.h:250
#define cs
Definition: i386-dis.c:442
#define iswspace(_c)
Definition: ctype.h:669
#define S_OK
Definition: intsafe.h:52
#define FAILED(hr)
Definition: intsafe.h:51
#define JSREG_FIND_PAREN_COUNT
Definition: regexp.c:895
#define js_ReportOutOfScriptQuota(a)
Definition: regexp.c:48
#define JSMSG_OUT_OF_MEMORY
Definition: regexp.c:79
static size_t GetCompactIndexWidth(size_t index)
Definition: regexp.c:363
#define JS7_UNDEC(c)
Definition: regexp.c:92
#define RE_IS_LETTER(c)
Definition: regexp.c:84
static REBackTrackData * PushBackTrackState(REGlobalData *gData, REOp op, jsbytecode *target, match_state_t *x, const WCHAR *cp, size_t parenIndex, size_t parenCount)
Definition: regexp.c:1922
static match_state_t * FlatNIMatcher(REGlobalData *gData, match_state_t *x, const WCHAR *matchChars, size_t length)
Definition: regexp.c:1983
static jsbytecode * ReadCompactIndex(jsbytecode *pc, size_t *result)
Definition: regexp.c:385
static BOOL ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
Definition: regexp.c:2087
#define JS_COUNT_OPERATION(a, b)
Definition: regexp.c:50
static BOOL ParseRegExp(CompilerState *)
Definition: regexp.c:1651
#define REOP_IS_SIMPLE(op)
Definition: regexp.c:147
#define JS_ReportOutOfMemory(a)
Definition: regexp.c:49
#define ReportRegExpError(a, b, c)
Definition: regexp.c:44
#define JSMSG_MIN_TOO_BIG
Definition: regexp.c:76
#define JUMP_OFFSET_LO(off)
Definition: regexp.c:453
struct REProgState REProgState
#define RE_IS_LINE_TERM(c)
Definition: regexp.c:86
static jsbytecode * EmitREBytecode(CompilerState *state, regexp_t *re, size_t treeDepth, jsbytecode *pc, RENode *t)
Definition: regexp.c:475
BYTE JSPackedBool
Definition: regexp.c:53
static HRESULT InitMatch(regexp_t *re, void *cx, heap_pool_t *pool, REGlobalData *gData)
Definition: regexp.c:3100
static UINT GetDecimalValue(WCHAR c, UINT max, UINT(*findMax)(CompilerState *state), CompilerState *state)
Definition: regexp.c:945
static UINT FindParenCount(CompilerState *state)
Definition: regexp.c:906
#define OVERFLOW_VALUE
Definition: regexp.c:903
#define OFFSET_LEN
Definition: regexp.c:337
static INT ParseMinMaxQuantifier(CompilerState *state, BOOL ignoreValues)
Definition: regexp.c:1163
#define GET_ARG(pc)
Definition: regexp.c:333
struct REGlobalData REGlobalData
#define GET_OFFSET(pc)
Definition: regexp.c:339
#define JSMSG_MAX_TOO_BIG
Definition: regexp.c:77
HRESULT regexp_execute(regexp_t *regexp, void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_len, match_state_t *result)
Definition: regexp.c:3140
regexp_t * regexp_new(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_len, WORD flags, BOOL flat)
Definition: regexp.c:3195
#define INITIAL_BACKTRACK
Definition: regexp.c:232
static BOOL CalculateBitmapSize(CompilerState *state, RENode *target, const WCHAR *src, const WCHAR *end)
Definition: regexp.c:969
struct RECharSet RECharSet
static BOOL isASCIIHexDigit(WCHAR c, UINT *digit)
Definition: regexp.c:428
#define JUMP_OFFSET_HI(off)
Definition: regexp.c:452
#define TREE_DEPTH_MAX
Definition: regexp.c:347
static void AddCharacterToCharSet(RECharSet *cs, WCHAR c)
Definition: regexp.c:2053
static match_state_t * MatchRegExp(REGlobalData *gData, match_state_t *x)
Definition: regexp.c:3073
#define JSMSG_OUT_OF_ORDER
Definition: regexp.c:78
struct CompilerState CompilerState
#define OFFSET_MAX
Definition: regexp.c:338
#define PREPARE_REPEAT()
#define JS_ISWORD(c)
Definition: regexp.c:89
#define CLASS_BITMAPS_MEM_LIMIT
Definition: regexp.c:353
static match_state_t * BackrefMatcher(REGlobalData *gData, match_state_t *x, size_t parenIndex)
Definition: regexp.c:2022
#define SET_ARG(pc, arg)
Definition: regexp.c:334
static BOOL ParseTerm(CompilerState *state)
Definition: regexp.c:1341
#define JS_ReportErrorNumber(a, b, c, d)
Definition: regexp.c:46
void regexp_destroy(regexp_t *re)
Definition: regexp.c:3181
#define INITIAL_STACK_SIZE
Definition: regexp.c:1648
static match_state_t * ExecuteREBytecode(REGlobalData *gData, match_state_t *x)
Definition: regexp.c:2536
#define PUSH_STATE_STACK(data)
Definition: regexp.c:2312
struct EmitStateStackEntry EmitStateStackEntry
REOp
Definition: regexp.c:94
@ REOP_LPAREN
Definition: regexp.c:123
@ REOP_SPACE
Definition: regexp.c:105
@ REOP_WBDRY
Definition: regexp.c:98
@ REOP_UCFLAT
Definition: regexp.c:114
@ REOP_ALTPREREQ
Definition: regexp.c:139
@ REOP_REPEAT
Definition: regexp.c:137
@ REOP_QUANT
Definition: regexp.c:119
@ REOP_UCFLAT1i
Definition: regexp.c:113
@ REOP_ASSERTTEST
Definition: regexp.c:130
@ REOP_EMPTY
Definition: regexp.c:95
@ REOP_OPT
Definition: regexp.c:122
@ REOP_DOTSTAR
Definition: regexp.c:126
@ REOP_FLAT1i
Definition: regexp.c:111
@ REOP_MINIMALPLUS
Definition: regexp.c:133
@ REOP_JUMP
Definition: regexp.c:125
@ REOP_ALT
Definition: regexp.c:118
@ REOP_NONSPACE
Definition: regexp.c:106
@ REOP_DOT
Definition: regexp.c:100
@ REOP_BACKREF
Definition: regexp.c:107
@ REOP_BOL
Definition: regexp.c:96
@ REOP_MINIMALREPEAT
Definition: regexp.c:138
@ REOP_ENDCHILD
Definition: regexp.c:136
@ REOP_LIMIT
Definition: regexp.c:144
@ REOP_NCLASS
Definition: regexp.c:117
@ REOP_ASSERT
Definition: regexp.c:128
@ REOP_ALNUM
Definition: regexp.c:103
@ REOP_WNONBDRY
Definition: regexp.c:99
@ REOP_MINIMALOPT
Definition: regexp.c:134
@ REOP_END
Definition: regexp.c:143
@ REOP_UCFLATi
Definition: regexp.c:115
@ REOP_CLASS
Definition: regexp.c:116
@ REOP_MINIMALQUANT
Definition: regexp.c:135
@ REOP_RPAREN
Definition: regexp.c:124
@ REOP_DIGIT
Definition: regexp.c:101
@ REOP_NONDIGIT
Definition: regexp.c:102
@ REOP_ENDALT
Definition: regexp.c:141
@ REOP_ALTPREREQ2
Definition: regexp.c:140
@ REOP_FLAT1
Definition: regexp.c:109
@ REOP_ASSERTNOTTEST
Definition: regexp.c:131
@ REOP_EOL
Definition: regexp.c:97
@ REOP_ASSERT_NOT
Definition: regexp.c:129
@ REOP_FLATi
Definition: regexp.c:110
@ REOP_PLUS
Definition: regexp.c:121
@ REOP_CONCAT
Definition: regexp.c:142
@ REOP_FLAT
Definition: regexp.c:108
@ REOP_MINIMALSTAR
Definition: regexp.c:132
@ REOP_STAR
Definition: regexp.c:120
@ REOP_NONALNUM
Definition: regexp.c:104
@ REOP_LPARENNON
Definition: regexp.c:127
@ REOP_UCFLAT1
Definition: regexp.c:112
#define CLASS_CACHE_SIZE
Definition: regexp.c:292
static match_state_t * SimpleMatch(REGlobalData *gData, match_state_t *x, REOp op, jsbytecode **startpc, BOOL updatecp)
Definition: regexp.c:2328
#define JSREG_FIND_PAREN_ERROR
Definition: regexp.c:896
static BOOL SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
Definition: regexp.c:456
struct REBackTrackData REBackTrackData
#define ReportRegExpErrorHelper(a, b, c, d)
Definition: regexp.c:45
static void AddCharacterRangeToCharSet(RECharSet *cs, UINT c1, UINT c2)
Definition: regexp.c:2063
static BOOL ParseQuantifier(CompilerState *state)
Definition: regexp.c:1217
#define ARG_LEN
Definition: regexp.c:332
static BOOL ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack, INT operandSP)
Definition: regexp.c:805
#define INITIAL_STATESTACK
Definition: regexp.c:231
static BOOL ReallocStateStack(REGlobalData *gData)
Definition: regexp.c:2297
#define JS7_ISDEC(c)
Definition: regexp.c:91
static RENode * NewRENode(CompilerState *state, REOp op)
Definition: regexp.c:409
static const char * reop_names[]
Definition: regexp.c:149
static jsbytecode * WriteCompactIndex(jsbytecode *pc, size_t index)
Definition: regexp.c:372
#define REG_FOLD
Definition: regexp.h:36
#define REG_MULTILINE
Definition: regexp.h:38
#define REG_STICKY
Definition: regexp.h:39
BYTE jsbytecode
Definition: regexp.h:54
void * heap_pool_alloc(heap_pool_t *, DWORD) __WINE_ALLOC_SIZE(2) DECLSPEC_HIDDEN
Definition: jsutils.c:77
void heap_pool_clear(heap_pool_t *) DECLSPEC_HIDDEN
Definition: jsutils.c:146
heap_pool_t * heap_pool_mark(heap_pool_t *) DECLSPEC_HIDDEN
Definition: jsutils.c:182
void * heap_pool_grow(heap_pool_t *, void *, DWORD, DWORD) DECLSPEC_HIDDEN
Definition: jsutils.c:130
#define c
Definition: ke_i.h:80
#define debugstr_wn
Definition: kernel32.h:33
if(dx< 0)
Definition: linetemp.h:194
POINT cp
Definition: magnifier.c:59
#define memcpy(s1, s2, n)
Definition: mkisofs.h:878
HRESULT hres
Definition: protocol.c:465
#define shift
Definition: input.c:1755
#define min(a, b)
Definition: monoChain.cc:55
int k
Definition: mpi.c:3369
unsigned int UINT
Definition: ndis.h:50
#define cap
Definition: glfuncs.h:226
_Out_opt_ int * cx
Definition: commctrl.h:585
static unsigned __int64 next
Definition: rand_nt.c:6
#define err(...)
static FILE * out
Definition: regtests2xml.c:44
const WCHAR * str
static calc_node_t temp
Definition: rpn_ieee.c:38
#define offsetof(TYPE, MEMBER)
#define memset(x, y, z)
Definition: compat.h:39
#define TRACE(s)
Definition: solgame.cpp:4
struct CompilerState::@457 classCache[CLASS_CACHE_SIZE]
RENode * result
Definition: regexp.c:303
size_t classBitmapsMem
Definition: regexp.c:304
const WCHAR * cpend
Definition: regexp.c:297
heap_pool_t * pool
Definition: regexp.c:312
const WCHAR * start
Definition: regexp.c:306
size_t index
Definition: regexp.c:308
const WCHAR * cp
Definition: regexp.c:298
size_t treeDepth
Definition: regexp.c:301
size_t parenCount
Definition: regexp.c:299
size_t length
Definition: regexp.c:307
size_t classCount
Definition: regexp.c:300
void * context
Definition: regexp.c:295
WORD flags
Definition: regexp.c:310
size_t progLength
Definition: regexp.c:302
const WCHAR * cpbegin
Definition: regexp.c:296
RENode * continueNode
Definition: regexp.c:321
jsbytecode * endTermFixup
Definition: regexp.c:320
jsbytecode continueOp
Definition: regexp.c:322
jsbytecode * nextAltFixup
Definition: regexp.c:318
jsbytecode * altHead
Definition: regexp.c:317
jsbytecode * nextTermFixup
Definition: regexp.c:319
JSPackedBool jumpToJumpFlag
Definition: regexp.c:323
jsbytecode * backtrack_pc
Definition: regexp.c:221
size_t parenCount
Definition: regexp.c:225
size_t sz
Definition: regexp.c:220
const WCHAR * cp
Definition: regexp.c:223
size_t parenIndex
Definition: regexp.c:224
jsbytecode backtrack_op
Definition: regexp.c:222
size_t saveStateStackTop
Definition: regexp.c:226
BYTE * bits
Definition: regexp.c:68
WORD length
Definition: regexp.c:66
struct RECharSet::@447::@448 src
JSPackedBool converted
Definition: regexp.c:64
union RECharSet::@447 u
size_t startIndex
Definition: regexp.c:70
JSPackedBool sense
Definition: regexp.c:65
size_t start
Definition: regexp.c:238
size_t backTrackLimit
Definition: regexp.c:252
const WCHAR * cpbegin
Definition: regexp.c:240
const WCHAR * cpend
Definition: regexp.c:241
size_t backTrackStackSize
Definition: regexp.c:249
REProgState * stateStack
Definition: regexp.c:243
REBackTrackData * backTrackSP
Definition: regexp.c:248
size_t cursz
Definition: regexp.c:250
REBackTrackData * backTrackStack
Definition: regexp.c:247
regexp_t * regexp
Definition: regexp.c:236
size_t backTrackCount
Definition: regexp.c:251
BOOL ok
Definition: regexp.c:237
size_t stateStackTop
Definition: regexp.c:244
heap_pool_t * pool
Definition: regexp.c:254
ptrdiff_t skipped
Definition: regexp.c:239
size_t stateStackLimit
Definition: regexp.c:245
void * cx
Definition: regexp.c:235
Definition: regexp.c:260
UINT max
Definition: regexp.c:270
REOp op
Definition: regexp.c:261
size_t startIndex
Definition: regexp.c:274
JSPackedBool sense
Definition: regexp.c:278
WCHAR ch1
Definition: regexp.c:286
RENode * next
Definition: regexp.c:262
WCHAR chr
Definition: regexp.c:281
size_t index
Definition: regexp.c:276
size_t kidlen
Definition: regexp.c:275
struct RENode::@452::@454 ucclass
struct RENode::@452::@453 range
union RENode::@452 u
UINT min
Definition: regexp.c:269
void * kid
Definition: regexp.c:263
size_t parenIndex
Definition: regexp.c:267
struct RENode::@452::@456 altprereq
void * kid2
Definition: regexp.c:265
JSPackedBool greedy
Definition: regexp.c:271
size_t length
Definition: regexp.c:282
WCHAR ch2
Definition: regexp.c:287
WORD bmsize
Definition: regexp.c:277
struct RENode::@452::@455 flat
INT num
Definition: regexp.c:266
const WCHAR * errPos
Definition: regexp.c:448
REOp op
Definition: regexp.c:447
size_t parenIndex
Definition: regexp.c:449
size_t top
Definition: regexp.c:213
jsbytecode * continue_pc
Definition: regexp.c:203
UINT max
Definition: regexp.c:210
UINT min
Definition: regexp.c:209
size_t sz
Definition: regexp.c:214
struct REProgState::@449::@451 assertion
size_t parenSoFar
Definition: regexp.c:206
jsbytecode continue_op
Definition: regexp.c:204
struct REProgState::@449::@450 quantifier
ptrdiff_t index
Definition: regexp.c:205
union REProgState::@449 u
jsbytecode program[1]
Definition: regexp.h:63
struct RECharSet * classList
Definition: regexp.h:60
const WCHAR * source
Definition: regexp.h:61
DWORD source_len
Definition: regexp.h:62
WORD flags
Definition: regexp.h:57
size_t parenCount
Definition: regexp.h:58
size_t classCount
Definition: regexp.h:59
#define max(a, b)
Definition: svc.c:63
#define towlower(c)
Definition: wctype.h:97
#define towupper(c)
Definition: wctype.h:99
#define str_len
Definition: treelist.c:89
int32_t INT
Definition: typedefs.h:58
uint32_t ULONG_PTR
Definition: typedefs.h:65
Definition: pdh_main.c:94
HRESULT regexp_set_flags(regexp_t **regexp, void *cx, heap_pool_t *pool, WORD flags)
Definition: regexp.c:3290
#define CLASS_CACHE_SIZE
Definition: regexp.c:292
#define S_FALSE
Definition: winerror.h:2357
__wchar_t WCHAR
Definition: xmlstorage.h:180
unsigned char BYTE
Definition: xxhash.c:193