ReactOS 0.4.17-dev-357-ga8f14ff
regexp.c
Go to the documentation of this file.
1/*
2 * Copyright 2008 Jacek Caban for CodeWeavers
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
17 */
18
19/*
20 * Code in this file is based on files:
21 * js/src/jsregexp.h
22 * js/src/jsregexp.c
23 * from Mozilla project, released under LGPL 2.1 or later.
24 *
25 * The Original Code is Mozilla Communicator client code, released
26 * March 31, 1998.
27 *
28 * The Initial Developer of the Original Code is
29 * Netscape Communications Corporation.
30 * Portions created by the Initial Developer are Copyright (C) 1998
31 * the Initial Developer. All Rights Reserved.
32 */
33
34#include <assert.h>
35
36#include "jscript.h"
37#include "regexp.h"
38
39#include "wine/debug.h"
40
42
43/* FIXME: Better error handling */
44#define ReportRegExpError(a,b,c) throw_error((a)->context, E_FAIL, L"")
45#define ReportRegExpErrorHelper(a,b,c,d) throw_error((a)->context, E_FAIL, L"")
46#define JS_ReportErrorNumber(a,b,c,d) throw_error((a), E_FAIL, L"")
47#define JS_ReportErrorFlagsAndNumber(a,b,c,d,e,f) throw_error((a), E_FAIL, L"")
48#define JS_COUNT_OPERATION(a,b) do { } while(0)
49
50
52
53/*
54 * This struct holds a bitmap representation of a class from a regexp.
55 * There's a list of these referenced by the classList field in the regexp_t
56 * struct below. The initial state has startIndex set to the offset in the
57 * original regexp source of the beginning of the class contents. The first
58 * use of the class converts the source representation into a bitmap.
59 *
60 */
61typedef struct RECharSet {
65 union {
67 struct {
68 size_t startIndex;
69 size_t length;
70 } src;
71 } u;
73
74#define JSMSG_MIN_TOO_BIG 47
75#define JSMSG_MAX_TOO_BIG 48
76#define JSMSG_OUT_OF_ORDER 49
77#define JSMSG_OUT_OF_MEMORY 137
78
79#define LINE_SEPARATOR 0x2028
80#define PARA_SEPARATOR 0x2029
81
82#define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
83 ((c >= 'a') && (c <= 'z')) )
84#define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
85 (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
86
87#define JS_ISWORD(c) ((c) < 128 && (isalnum(c) || (c) == '_'))
88
89#define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9)
90#define JS7_UNDEC(c) ((c) - '0')
91
92typedef enum REOp {
142 REOP_LIMIT /* META: no operator >= to this */
144
145#define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS)
146
147static const char *reop_names[] = {
148 "empty",
149 "bol",
150 "eol",
151 "wbdry",
152 "wnonbdry",
153 "dot",
154 "digit",
155 "nondigit",
156 "alnum",
157 "nonalnum",
158 "space",
159 "nonspace",
160 "backref",
161 "flat",
162 "flat1",
163 "flati",
164 "flat1i",
165 "ucflat1",
166 "ucflat1i",
167 "ucflat",
168 "ucflati",
169 "class",
170 "nclass",
171 "alt",
172 "quant",
173 "star",
174 "plus",
175 "opt",
176 "lparen",
177 "rparen",
178 "jump",
179 "dotstar",
180 "lparennon",
181 "assert",
182 "assert_not",
183 "asserttest",
184 "assertnottest",
185 "minimalstar",
186 "minimalplus",
187 "minimalopt",
188 "minimalquant",
189 "endchild",
190 "repeat",
191 "minimalrepeat",
192 "altprereq",
193 "alrprereq2",
194 "endalt",
195 "concat",
196 "end",
197 NULL
198};
199
200typedef struct REProgState {
201 jsbytecode *continue_pc; /* current continuation data */
203 ptrdiff_t index; /* progress in text */
204 size_t parenSoFar; /* highest indexed paren started */
205 union {
206 struct {
207 UINT min; /* current quantifier limits */
210 struct {
211 size_t top; /* backtrack stack state */
212 size_t sz;
214 } u;
216
217typedef struct REBackTrackData {
218 size_t sz; /* size of previous stack entry */
219 jsbytecode *backtrack_pc; /* where to backtrack to */
221 const WCHAR *cp; /* index in text of match at backtrack */
222 size_t parenIndex; /* start index of saved paren contents */
223 size_t parenCount; /* # of saved paren contents */
224 size_t saveStateStackTop; /* number of parent states */
225 /* saved parent states follow */
226 /* saved paren contents follow */
228
229#define INITIAL_STATESTACK 100
230#define INITIAL_BACKTRACK 8000
231
232typedef struct REGlobalData {
233 void *cx;
234 regexp_t *regexp; /* the RE in execution */
235 BOOL ok; /* runtime error (out_of_memory only?) */
236 size_t start; /* offset to start at */
237 ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
238 const WCHAR *cpbegin; /* text base address */
239 const WCHAR *cpend; /* text limit address */
240
241 REProgState *stateStack; /* stack of state of current parents */
244
245 REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
248 size_t cursz; /* size of current stack entry */
249 size_t backTrackCount; /* how many times we've backtracked */
250 size_t backTrackLimit; /* upper limit on backtrack states */
251
252 heap_pool_t *pool; /* It's faster to use one malloc'd pool
253 than to malloc/free the three items
254 that are allocated from this pool */
256
257typedef struct RENode RENode;
258struct RENode {
259 REOp op; /* r.e. op bytecode */
260 RENode *next; /* next in concatenation order */
261 void *kid; /* first operand */
262 union {
263 void *kid2; /* second operand */
264 INT num; /* could be a number */
265 size_t parenIndex; /* or a parenthesis index */
266 struct { /* or a quantifier range */
271 struct { /* or a character class */
273 size_t kidlen; /* length of string at kid, in jschars */
274 size_t index; /* index into class list */
275 WORD bmsize; /* bitmap size, based on max char code */
278 struct { /* or a literal sequence */
279 WCHAR chr; /* of one character */
280 size_t length; /* or many (via the kid) */
282 struct {
283 RENode *kid2; /* second operand from ALT */
284 WCHAR ch1; /* match char for ALTPREREQ */
285 WCHAR ch2; /* ditto, or class index for ALTPREREQ2 */
287 } u;
288};
289
290#define CLASS_CACHE_SIZE 4
291
292typedef struct CompilerState {
295 const WCHAR *cpend;
296 const WCHAR *cp;
298 size_t classCount; /* number of [] encountered */
299 size_t treeDepth; /* maximum depth of parse tree */
300 size_t progLength; /* estimated bytecode length */
302 size_t classBitmapsMem; /* memory to hold all class bitmaps */
303 struct {
304 const WCHAR *start; /* small cache of class strings */
305 size_t length; /* since they're often the same */
306 size_t index;
309
310 heap_pool_t *pool; /* It's faster to use one malloc'd pool
311 than to malloc/free */
313
314typedef struct EmitStateStackEntry {
315 jsbytecode *altHead; /* start of REOP_ALT* opcode */
316 jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */
317 jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
318 jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */
319 RENode *continueNode; /* original REOP_ALT* node being stacked */
320 jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */
321 JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to
322 avoid 16-bit unsigned offset overflow */
324
325/*
326 * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
327 * the getters and setters take the pc of the offset, not of the opcode before
328 * the offset.
329 */
330#define ARG_LEN 2
331#define GET_ARG(pc) ((WORD)(((pc)[0] << 8) | (pc)[1]))
332#define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
333 (pc)[1] = (jsbytecode) (arg))
334
335#define OFFSET_LEN ARG_LEN
336#define OFFSET_MAX ((1 << (ARG_LEN * 8)) - 1)
337#define GET_OFFSET(pc) GET_ARG(pc)
338
340
341/*
342 * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
343 * For sanity, we limit it to 2^24 bytes.
344 */
345#define TREE_DEPTH_MAX ((1 << 24) / sizeof(EmitStateStackEntry))
346
347/*
348 * The maximum memory that can be allocated for class bitmaps.
349 * For sanity, we limit it to 2^24 bytes.
350 */
351#define CLASS_BITMAPS_MEM_LIMIT (1 << 24)
352
353/*
354 * Functions to get size and write/read bytecode that represent small indexes
355 * compactly.
356 * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
357 * indicates that the following byte brings more bits to the index. Otherwise
358 * this is the last byte in the index bytecode representing highest index bits.
359 */
360static size_t
362{
363 size_t width;
364
365 for (width = 1; (index >>= 7) != 0; ++width) { }
366 return width;
367}
368
369static inline jsbytecode *
371{
372 size_t next;
373
374 while ((next = index >> 7) != 0) {
375 *pc++ = (jsbytecode)(index | 0x80);
376 index = next;
377 }
378 *pc++ = (jsbytecode)index;
379 return pc;
380}
381
382static inline jsbytecode *
384{
385 size_t nextByte;
386
387 nextByte = *pc++;
388 if ((nextByte & 0x80) == 0) {
389 /*
390 * Short-circuit the most common case when compact index <= 127.
391 */
392 *result = nextByte;
393 } else {
394 size_t shift = 7;
395 *result = 0x7F & nextByte;
396 do {
397 nextByte = *pc++;
398 *result |= (nextByte & 0x7F) << shift;
399 shift += 7;
400 } while ((nextByte & 0x80) != 0);
401 }
402 return pc;
403}
404
405/* Construct and initialize an RENode, returning NULL for out-of-memory */
406static RENode *
408{
409 RENode *ren;
410
411 ren = heap_pool_alloc(state->pool, sizeof(*ren));
412 if (!ren) {
413 throw_error(state->context, E_OUTOFMEMORY, L"");
414 return NULL;
415 }
416 ren->op = op;
417 ren->next = NULL;
418 ren->kid = NULL;
419 return ren;
420}
421
422/*
423 * Validates and converts hex ascii value.
424 */
425static BOOL
427{
428 UINT cv = c;
429
430 if (cv < '0')
431 return FALSE;
432 if (cv <= '9') {
433 *digit = cv - '0';
434 return TRUE;
435 }
436 cv |= 0x20;
437 if (cv >= 'a' && cv <= 'f') {
438 *digit = cv - 'a' + 10;
439 return TRUE;
440 }
441 return FALSE;
442}
443
444typedef struct {
446 const WCHAR *errPos;
448} REOpData;
449
450#define JUMP_OFFSET_HI(off) ((jsbytecode)((off) >> 8))
451#define JUMP_OFFSET_LO(off) ((jsbytecode)(off))
452
453static BOOL
455{
456 ptrdiff_t offset = target - jump;
457
458 /* Check that target really points forward. */
459 assert(offset >= 2);
460 if ((size_t)offset > OFFSET_MAX)
461 return FALSE;
462
463 jump[0] = JUMP_OFFSET_HI(offset);
464 jump[1] = JUMP_OFFSET_LO(offset);
465 return TRUE;
466}
467
468/*
469 * Generate bytecode for the tree rooted at t using an explicit stack instead
470 * of recursion.
471 */
472static jsbytecode *
474 jsbytecode *pc, RENode *t)
475{
476 EmitStateStackEntry *emitStateSP, *emitStateStack;
477 RECharSet *charSet;
478 REOp op;
479
480 if (treeDepth == 0) {
481 emitStateStack = NULL;
482 } else {
483 emitStateStack = malloc(sizeof(EmitStateStackEntry) * treeDepth);
484 if (!emitStateStack)
485 return NULL;
486 }
487 emitStateSP = emitStateStack;
488 op = t->op;
490
491 for (;;) {
492 *pc++ = op;
493 switch (op) {
494 case REOP_EMPTY:
495 --pc;
496 break;
497
498 case REOP_ALTPREREQ2:
499 case REOP_ALTPREREQ:
500 assert(emitStateSP);
501 emitStateSP->altHead = pc - 1;
502 emitStateSP->endTermFixup = pc;
503 pc += OFFSET_LEN;
504 SET_ARG(pc, t->u.altprereq.ch1);
505 pc += ARG_LEN;
506 SET_ARG(pc, t->u.altprereq.ch2);
507 pc += ARG_LEN;
508
509 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
510 pc += OFFSET_LEN;
511
512 emitStateSP->continueNode = t;
513 emitStateSP->continueOp = REOP_JUMP;
514 emitStateSP->jumpToJumpFlag = FALSE;
515 ++emitStateSP;
516 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
517 t = t->kid;
518 op = t->op;
520 continue;
521
522 case REOP_JUMP:
523 emitStateSP->nextTermFixup = pc; /* offset to following term */
524 pc += OFFSET_LEN;
525 if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
526 goto jump_too_big;
527 emitStateSP->continueOp = REOP_ENDALT;
528 ++emitStateSP;
529 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
530 t = t->u.kid2;
531 op = t->op;
533 continue;
534
535 case REOP_ENDALT:
536 /*
537 * If we already patched emitStateSP->nextTermFixup to jump to
538 * a nearer jump, to avoid 16-bit immediate offset overflow, we
539 * are done here.
540 */
541 if (emitStateSP->jumpToJumpFlag)
542 break;
543
544 /*
545 * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
546 * REOP_ENDALT is executed only on successful match of the last
547 * alternate in a group.
548 */
549 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
550 goto jump_too_big;
551 if (t->op != REOP_ALT) {
552 if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
553 goto jump_too_big;
554 }
555
556 /*
557 * If the program is bigger than the REOP_JUMP offset range, then
558 * we must check for alternates before this one that are part of
559 * the same group, and fix up their jump offsets to target jumps
560 * close enough to fit in a 16-bit unsigned offset immediate.
561 */
562 if ((size_t)(pc - re->program) > OFFSET_MAX &&
563 emitStateSP > emitStateStack) {
564 EmitStateStackEntry *esp, *esp2;
565 jsbytecode *alt, *jump;
567
568 esp2 = emitStateSP;
569 alt = esp2->altHead;
570 for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
571 if (esp->continueOp == REOP_ENDALT &&
572 !esp->jumpToJumpFlag &&
573 esp->nextTermFixup + OFFSET_LEN == alt &&
574 (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
575 ? esp->endTermFixup
576 : esp->nextTermFixup)) > OFFSET_MAX) {
577 alt = esp->altHead;
578 jump = esp->nextTermFixup;
579
580 /*
581 * The span must be 1 less than the distance from
582 * jump offset to jump offset, so we actually jump
583 * to a REOP_JUMP bytecode, not to its offset!
584 */
585 for (;;) {
586 assert(jump < esp2->nextTermFixup);
587 span = esp2->nextTermFixup - jump - 1;
588 if ((size_t)span <= OFFSET_MAX)
589 break;
590 do {
591 if (--esp2 == esp)
592 goto jump_too_big;
593 } while (esp2->continueOp != REOP_ENDALT);
594 }
595
596 jump[0] = JUMP_OFFSET_HI(span);
597 jump[1] = JUMP_OFFSET_LO(span);
598
599 if (esp->continueNode->op != REOP_ALT) {
600 /*
601 * We must patch the offset at esp->endTermFixup
602 * as well, for the REOP_ALTPREREQ{,2} opcodes.
603 * If we're unlucky and endTermFixup is more than
604 * OFFSET_MAX bytes from its target, we cheat by
605 * jumping 6 bytes to the jump whose offset is at
606 * esp->nextTermFixup, which has the same target.
607 */
608 jump = esp->endTermFixup;
609 header = esp->nextTermFixup - jump;
610 span += header;
611 if ((size_t)span > OFFSET_MAX)
612 span = header;
613
614 jump[0] = JUMP_OFFSET_HI(span);
615 jump[1] = JUMP_OFFSET_LO(span);
616 }
617
618 esp->jumpToJumpFlag = TRUE;
619 }
620 }
621 }
622 break;
623
624 case REOP_ALT:
625 assert(emitStateSP);
626 emitStateSP->altHead = pc - 1;
627 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
628 pc += OFFSET_LEN;
629 emitStateSP->continueNode = t;
630 emitStateSP->continueOp = REOP_JUMP;
631 emitStateSP->jumpToJumpFlag = FALSE;
632 ++emitStateSP;
633 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
634 t = t->kid;
635 op = t->op;
637 continue;
638
639 case REOP_FLAT:
640 /*
641 * Coalesce FLATs if possible and if it would not increase bytecode
642 * beyond preallocated limit. The latter happens only when bytecode
643 * size for coalesced string with offset p and length 2 exceeds 6
644 * bytes preallocated for 2 single char nodes, i.e. when
645 * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
646 * GetCompactIndexWidth(p) > 4.
647 * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
648 * nodes strictly decreases bytecode size, the check has to be
649 * done only for the first coalescing.
650 */
651 if (t->kid &&
652 GetCompactIndexWidth((WCHAR*)t->kid - state->cpbegin) <= 4)
653 {
654 while (t->next &&
655 t->next->op == REOP_FLAT &&
656 (WCHAR*)t->kid + t->u.flat.length ==
657 t->next->kid) {
658 t->u.flat.length += t->next->u.flat.length;
659 t->next = t->next->next;
660 }
661 }
662 if (t->kid && t->u.flat.length > 1) {
663 pc[-1] = (state->flags & REG_FOLD) ? REOP_FLATi : REOP_FLAT;
664 pc = WriteCompactIndex(pc, (WCHAR*)t->kid - state->cpbegin);
665 pc = WriteCompactIndex(pc, t->u.flat.length);
666 } else if (t->u.flat.chr < 256) {
667 pc[-1] = (state->flags & REG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
668 *pc++ = (jsbytecode) t->u.flat.chr;
669 } else {
670 pc[-1] = (state->flags & REG_FOLD)
672 : REOP_UCFLAT1;
673 SET_ARG(pc, t->u.flat.chr);
674 pc += ARG_LEN;
675 }
676 break;
677
678 case REOP_LPAREN:
679 assert(emitStateSP);
680 pc = WriteCompactIndex(pc, t->u.parenIndex);
681 emitStateSP->continueNode = t;
682 emitStateSP->continueOp = REOP_RPAREN;
683 ++emitStateSP;
684 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
685 t = t->kid;
686 op = t->op;
687 continue;
688
689 case REOP_RPAREN:
690 pc = WriteCompactIndex(pc, t->u.parenIndex);
691 break;
692
693 case REOP_BACKREF:
694 pc = WriteCompactIndex(pc, t->u.parenIndex);
695 break;
696
697 case REOP_ASSERT:
698 assert(emitStateSP);
699 emitStateSP->nextTermFixup = pc;
700 pc += OFFSET_LEN;
701 emitStateSP->continueNode = t;
702 emitStateSP->continueOp = REOP_ASSERTTEST;
703 ++emitStateSP;
704 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
705 t = t->kid;
706 op = t->op;
707 continue;
708
709 case REOP_ASSERTTEST:
711 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
712 goto jump_too_big;
713 break;
714
715 case REOP_ASSERT_NOT:
716 assert(emitStateSP);
717 emitStateSP->nextTermFixup = pc;
718 pc += OFFSET_LEN;
719 emitStateSP->continueNode = t;
720 emitStateSP->continueOp = REOP_ASSERTNOTTEST;
721 ++emitStateSP;
722 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
723 t = t->kid;
724 op = t->op;
725 continue;
726
727 case REOP_QUANT:
728 assert(emitStateSP);
729 if (t->u.range.min == 0 && t->u.range.max == (UINT)-1) {
730 pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
731 } else if (t->u.range.min == 0 && t->u.range.max == 1) {
732 pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
733 } else if (t->u.range.min == 1 && t->u.range.max == (UINT) -1) {
734 pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
735 } else {
736 if (!t->u.range.greedy)
737 pc[-1] = REOP_MINIMALQUANT;
738 pc = WriteCompactIndex(pc, t->u.range.min);
739 /*
740 * Write max + 1 to avoid using size_t(max) + 1 bytes
741 * for (UINT)-1 sentinel.
742 */
743 pc = WriteCompactIndex(pc, t->u.range.max + 1);
744 }
745 emitStateSP->nextTermFixup = pc;
746 pc += OFFSET_LEN;
747 emitStateSP->continueNode = t;
748 emitStateSP->continueOp = REOP_ENDCHILD;
749 ++emitStateSP;
750 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
751 t = t->kid;
752 op = t->op;
753 continue;
754
755 case REOP_ENDCHILD:
756 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
757 goto jump_too_big;
758 break;
759
760 case REOP_CLASS:
761 if (!t->u.ucclass.sense)
762 pc[-1] = REOP_NCLASS;
763 pc = WriteCompactIndex(pc, t->u.ucclass.index);
764 charSet = &re->classList[t->u.ucclass.index];
765 charSet->converted = FALSE;
766 charSet->length = t->u.ucclass.bmsize;
767 charSet->u.src.startIndex = t->u.ucclass.startIndex;
768 charSet->u.src.length = t->u.ucclass.kidlen;
769 charSet->sense = t->u.ucclass.sense;
770 break;
771
772 default:
773 break;
774 }
775
776 t = t->next;
777 if (t) {
778 op = t->op;
779 } else {
780 if (emitStateSP == emitStateStack)
781 break;
782 --emitStateSP;
783 t = emitStateSP->continueNode;
784 op = (REOp) emitStateSP->continueOp;
785 }
786 }
787
788 cleanup:
789 free(emitStateStack);
790 return pc;
791
792 jump_too_big:
793 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
794 pc = NULL;
795 goto cleanup;
796}
797
798/*
799 * Process the op against the two top operands, reducing them to a single
800 * operand in the penultimate slot. Update progLength and treeDepth.
801 */
802static BOOL
803ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
804 INT operandSP)
805{
806 RENode *result;
807
808 switch (opData->op) {
809 case REOP_ALT:
811 if (!result)
812 return FALSE;
813 result->kid = operandStack[operandSP - 2];
814 result->u.kid2 = operandStack[operandSP - 1];
815 operandStack[operandSP - 2] = result;
816
817 if (state->treeDepth == TREE_DEPTH_MAX) {
818 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
819 return FALSE;
820 }
821 ++state->treeDepth;
822
823 /*
824 * Look at both alternates to see if there's a FLAT or a CLASS at
825 * the start of each. If so, use a prerequisite match.
826 */
827 if (((RENode *) result->kid)->op == REOP_FLAT &&
828 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
829 (state->flags & REG_FOLD) == 0) {
831 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
832 result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
833 /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
834 JUMP, <end> ... ENDALT */
835 state->progLength += 13;
836 }
837 else
838 if (((RENode *) result->kid)->op == REOP_CLASS &&
839 ((RENode *) result->kid)->u.ucclass.index < 256 &&
840 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
841 (state->flags & REG_FOLD) == 0) {
843 result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
844 result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
845 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
846 JUMP, <end> ... ENDALT */
847 state->progLength += 13;
848 }
849 else
850 if (((RENode *) result->kid)->op == REOP_FLAT &&
851 ((RENode *) result->u.kid2)->op == REOP_CLASS &&
852 ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
853 (state->flags & REG_FOLD) == 0) {
855 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
856 result->u.altprereq.ch2 =
857 ((RENode *) result->u.kid2)->u.ucclass.index;
858 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
859 JUMP, <end> ... ENDALT */
860 state->progLength += 13;
861 }
862 else {
863 /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
864 state->progLength += 7;
865 }
866 break;
867
868 case REOP_CONCAT:
869 result = operandStack[operandSP - 2];
870 while (result->next)
871 result = result->next;
872 result->next = operandStack[operandSP - 1];
873 break;
874
875 case REOP_ASSERT:
876 case REOP_ASSERT_NOT:
877 case REOP_LPARENNON:
878 case REOP_LPAREN:
879 /* These should have been processed by a close paren. */
880 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
881 opData->errPos);
882 return FALSE;
883
884 default:;
885 }
886 return TRUE;
887}
888
889/*
890 * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
891 * it being on the stack, and to propagate errors to its callers.
892 */
893#define JSREG_FIND_PAREN_COUNT 0x8000
894#define JSREG_FIND_PAREN_ERROR 0x4000
895
896/*
897 * Magic return value from FindParenCount and GetDecimalValue, to indicate
898 * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
899 * its findMax parameter is non-null.
900 */
901#define OVERFLOW_VALUE ((UINT)-1)
902
903static UINT
905{
907 int i;
908
909 if (state->flags & JSREG_FIND_PAREN_COUNT)
910 return OVERFLOW_VALUE;
911
912 /*
913 * Copy state into temp, flag it so we never report an invalid backref,
914 * and reset its members to parse the entire regexp. This is obviously
915 * suboptimal, but GetDecimalValue calls us only if a backref appears to
916 * refer to a forward parenthetical, which is rare.
917 */
918 temp = *state;
920 temp.cp = temp.cpbegin;
921 temp.parenCount = 0;
922 temp.classCount = 0;
923 temp.progLength = 0;
924 temp.treeDepth = 0;
925 temp.classBitmapsMem = 0;
926 for (i = 0; i < CLASS_CACHE_SIZE; i++)
927 temp.classCache[i].start = NULL;
928
929 if (!ParseRegExp(&temp)) {
931 return OVERFLOW_VALUE;
932 }
933 return temp.parenCount;
934}
935
936/*
937 * Extract and return a decimal value at state->cp. The initial character c
938 * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
939 * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
940 * state->flags to discover whether an error occurred under findMax.
941 */
942static UINT
945{
947 BOOL overflow = (value > max && (!findMax || value > findMax(state)));
948
949 /* The following restriction allows simpler overflow checks. */
950 assert(max <= ((UINT)-1 - 9) / 10);
951 while (state->cp < state->cpend) {
952 c = *state->cp;
953 if (!JS7_ISDEC(c))
954 break;
955 value = 10 * value + JS7_UNDEC(c);
956 if (!overflow && value > max && (!findMax || value > findMax(state)))
957 overflow = TRUE;
958 ++state->cp;
959 }
960 return overflow ? OVERFLOW_VALUE : value;
961}
962
963/*
964 * Calculate the total size of the bitmap required for a class expression.
965 */
966static BOOL
968 const WCHAR *end)
969{
970 UINT max = 0;
971 BOOL inRange = FALSE;
972 WCHAR c, rangeStart = 0;
973 UINT n, digit, nDigits, i;
974
975 target->u.ucclass.bmsize = 0;
976 target->u.ucclass.sense = TRUE;
977
978 if (src == end)
979 return TRUE;
980
981 if (*src == '^') {
982 ++src;
983 target->u.ucclass.sense = FALSE;
984 }
985
986 while (src != end) {
987 BOOL canStartRange = TRUE;
988 UINT localMax = 0;
989
990 switch (*src) {
991 case '\\':
992 ++src;
993 c = *src++;
994 switch (c) {
995 case 'b':
996 localMax = 0x8;
997 break;
998 case 'f':
999 localMax = 0xC;
1000 break;
1001 case 'n':
1002 localMax = 0xA;
1003 break;
1004 case 'r':
1005 localMax = 0xD;
1006 break;
1007 case 't':
1008 localMax = 0x9;
1009 break;
1010 case 'v':
1011 localMax = 0xB;
1012 break;
1013 case 'c':
1014 if (src < end && RE_IS_LETTER(*src)) {
1015 localMax = (UINT) (*src++) & 0x1F;
1016 } else {
1017 --src;
1018 localMax = '\\';
1019 }
1020 break;
1021 case 'x':
1022 nDigits = 2;
1023 goto lexHex;
1024 case 'u':
1025 nDigits = 4;
1026lexHex:
1027 n = 0;
1028 for (i = 0; (i < nDigits) && (src < end); i++) {
1029 c = *src++;
1030 if (!isASCIIHexDigit(c, &digit)) {
1031 /*
1032 * Back off to accepting the original
1033 *'\' as a literal.
1034 */
1035 src -= i + 1;
1036 n = '\\';
1037 break;
1038 }
1039 n = (n << 4) | digit;
1040 }
1041 localMax = n;
1042 break;
1043 case 'd':
1044 canStartRange = FALSE;
1045 if (inRange) {
1046 JS_ReportErrorNumber(state->context,
1047 js_GetErrorMessage, NULL,
1048 JSMSG_BAD_CLASS_RANGE);
1049 return FALSE;
1050 }
1051 localMax = '9';
1052 break;
1053 case 'D':
1054 case 's':
1055 case 'S':
1056 case 'w':
1057 case 'W':
1058 canStartRange = FALSE;
1059 if (inRange) {
1060 JS_ReportErrorNumber(state->context,
1061 js_GetErrorMessage, NULL,
1062 JSMSG_BAD_CLASS_RANGE);
1063 return FALSE;
1064 }
1065 max = 65535;
1066
1067 /*
1068 * If this is the start of a range, ensure that it's less than
1069 * the end.
1070 */
1071 localMax = 0;
1072 break;
1073 case '0':
1074 case '1':
1075 case '2':
1076 case '3':
1077 case '4':
1078 case '5':
1079 case '6':
1080 case '7':
1081 /*
1082 * This is a non-ECMA extension - decimal escapes (in this
1083 * case, octal!) are supposed to be an error inside class
1084 * ranges, but supported here for backwards compatibility.
1085 *
1086 */
1087 n = JS7_UNDEC(c);
1088 c = *src;
1089 if ('0' <= c && c <= '7') {
1090 src++;
1091 n = 8 * n + JS7_UNDEC(c);
1092 c = *src;
1093 if ('0' <= c && c <= '7') {
1094 src++;
1095 i = 8 * n + JS7_UNDEC(c);
1096 if (i <= 0377)
1097 n = i;
1098 else
1099 src--;
1100 }
1101 }
1102 localMax = n;
1103 break;
1104
1105 default:
1106 localMax = c;
1107 break;
1108 }
1109 break;
1110 default:
1111 localMax = *src++;
1112 break;
1113 }
1114
1115 if (inRange) {
1116 /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1117 if (rangeStart > localMax) {
1118 JS_ReportErrorNumber(state->context,
1119 js_GetErrorMessage, NULL,
1120 JSMSG_BAD_CLASS_RANGE);
1121 return FALSE;
1122 }
1123 inRange = FALSE;
1124 } else {
1125 if (canStartRange && src < end - 1) {
1126 if (*src == '-') {
1127 ++src;
1128 inRange = TRUE;
1129 rangeStart = (WCHAR)localMax;
1130 continue;
1131 }
1132 }
1133 if (state->flags & REG_FOLD)
1134 rangeStart = localMax; /* one run of the uc/dc loop below */
1135 }
1136
1137 if (state->flags & REG_FOLD) {
1138 WCHAR maxch = localMax;
1139
1140 for (i = rangeStart; i <= localMax; i++) {
1141 WCHAR uch, dch;
1142
1143 uch = towupper(i);
1144 dch = towlower(i);
1145 if(maxch < uch)
1146 maxch = uch;
1147 if(maxch < dch)
1148 maxch = dch;
1149 }
1150 localMax = maxch;
1151 }
1152
1153 if (localMax > max)
1154 max = localMax;
1155 }
1156 target->u.ucclass.bmsize = max;
1157 return TRUE;
1158}
1159
1160static INT
1162{
1163 UINT min, max;
1164 WCHAR c;
1165 const WCHAR *errp = state->cp++;
1166
1167 c = *state->cp;
1168 if (JS7_ISDEC(c)) {
1169 ++state->cp;
1170 min = GetDecimalValue(c, 0xFFFF, NULL, state);
1171 c = *state->cp;
1172
1173 if (!ignoreValues && min == OVERFLOW_VALUE)
1174 return JSMSG_MIN_TOO_BIG;
1175
1176 if (c == ',') {
1177 c = *++state->cp;
1178 if (JS7_ISDEC(c)) {
1179 ++state->cp;
1180 max = GetDecimalValue(c, 0xFFFF, NULL, state);
1181 c = *state->cp;
1182 if (!ignoreValues && max == OVERFLOW_VALUE)
1183 return JSMSG_MAX_TOO_BIG;
1184 if (!ignoreValues && min > max)
1185 return JSMSG_OUT_OF_ORDER;
1186 } else {
1187 max = (UINT)-1;
1188 }
1189 } else {
1190 max = min;
1191 }
1192 if (c == '}') {
1193 state->result = NewRENode(state, REOP_QUANT);
1194 if (!state->result)
1195 return JSMSG_OUT_OF_MEMORY;
1196 state->result->u.range.min = min;
1197 state->result->u.range.max = max;
1198 /*
1199 * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1200 * where <max> is written as compact(max+1) to make
1201 * (UINT)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1202 */
1203 state->progLength += (1 + GetCompactIndexWidth(min)
1205 +3);
1206 return 0;
1207 }
1208 }
1209
1210 state->cp = errp;
1211 return -1;
1212}
1213
1214static BOOL
1216{
1217 RENode *term;
1218 term = state->result;
1219 if (state->cp < state->cpend) {
1220 switch (*state->cp) {
1221 case '+':
1222 state->result = NewRENode(state, REOP_QUANT);
1223 if (!state->result)
1224 return FALSE;
1225 state->result->u.range.min = 1;
1226 state->result->u.range.max = (UINT)-1;
1227 /* <PLUS>, <next> ... <ENDCHILD> */
1228 state->progLength += 4;
1229 goto quantifier;
1230 case '*':
1231 state->result = NewRENode(state, REOP_QUANT);
1232 if (!state->result)
1233 return FALSE;
1234 state->result->u.range.min = 0;
1235 state->result->u.range.max = (UINT)-1;
1236 /* <STAR>, <next> ... <ENDCHILD> */
1237 state->progLength += 4;
1238 goto quantifier;
1239 case '?':
1240 state->result = NewRENode(state, REOP_QUANT);
1241 if (!state->result)
1242 return FALSE;
1243 state->result->u.range.min = 0;
1244 state->result->u.range.max = 1;
1245 /* <OPT>, <next> ... <ENDCHILD> */
1246 state->progLength += 4;
1247 goto quantifier;
1248 case '{': /* balance '}' */
1249 {
1250 INT err;
1251
1253 if (err == 0)
1254 goto quantifier;
1255 if (err == -1)
1256 return TRUE;
1257
1258 ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
1259 return FALSE;
1260 }
1261 default:;
1262 }
1263 }
1264 return TRUE;
1265
1266quantifier:
1267 if (state->treeDepth == TREE_DEPTH_MAX) {
1268 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1269 return FALSE;
1270 }
1271
1272 ++state->treeDepth;
1273 ++state->cp;
1274 state->result->kid = term;
1275 if (state->cp < state->cpend && *state->cp == '?') {
1276 ++state->cp;
1277 state->result->u.range.greedy = FALSE;
1278 } else {
1279 state->result->u.range.greedy = TRUE;
1280 }
1281 return TRUE;
1282}
1283
1284/*
1285 * item: assertion An item is either an assertion or
1286 * quantatom a quantified atom.
1287 *
1288 * assertion: '^' Assertions match beginning of string
1289 * (or line if the class static property
1290 * RegExp.multiline is true).
1291 * '$' End of string (or line if the class
1292 * static property RegExp.multiline is
1293 * true).
1294 * '\b' Word boundary (between \w and \W).
1295 * '\B' Word non-boundary.
1296 *
1297 * quantatom: atom An unquantified atom.
1298 * quantatom '{' n ',' m '}'
1299 * Atom must occur between n and m times.
1300 * quantatom '{' n ',' '}' Atom must occur at least n times.
1301 * quantatom '{' n '}' Atom must occur exactly n times.
1302 * quantatom '*' Zero or more times (same as {0,}).
1303 * quantatom '+' One or more times (same as {1,}).
1304 * quantatom '?' Zero or one time (same as {0,1}).
1305 *
1306 * any of which can be optionally followed by '?' for ungreedy
1307 *
1308 * atom: '(' regexp ')' A parenthesized regexp (what matched
1309 * can be addressed using a backreference,
1310 * see '\' n below).
1311 * '.' Matches any char except '\n'.
1312 * '[' classlist ']' A character class.
1313 * '[' '^' classlist ']' A negated character class.
1314 * '\f' Form Feed.
1315 * '\n' Newline (Line Feed).
1316 * '\r' Carriage Return.
1317 * '\t' Horizontal Tab.
1318 * '\v' Vertical Tab.
1319 * '\d' A digit (same as [0-9]).
1320 * '\D' A non-digit.
1321 * '\w' A word character, [0-9a-z_A-Z].
1322 * '\W' A non-word character.
1323 * '\s' A whitespace character, [ \b\f\n\r\t\v].
1324 * '\S' A non-whitespace character.
1325 * '\' n A backreference to the nth (n decimal
1326 * and positive) parenthesized expression.
1327 * '\' octal An octal escape sequence (octal must be
1328 * two or three digits long, unless it is
1329 * 0 for the null character).
1330 * '\x' hex A hex escape (hex must be two digits).
1331 * '\u' unicode A unicode escape (must be four digits).
1332 * '\c' ctrl A control character, ctrl is a letter.
1333 * '\' literalatomchar Any character except one of the above
1334 * that follow '\' in an atom.
1335 * otheratomchar Any character not first among the other
1336 * atom right-hand sides.
1337 */
1338static BOOL
1340{
1341 WCHAR c = *state->cp++;
1342 UINT nDigits;
1343 UINT num, tmp, n, i;
1344 const WCHAR *termStart;
1345
1346 switch (c) {
1347 /* assertions and atoms */
1348 case '^':
1349 state->result = NewRENode(state, REOP_BOL);
1350 if (!state->result)
1351 return FALSE;
1352 state->progLength++;
1353 return TRUE;
1354 case '$':
1355 state->result = NewRENode(state, REOP_EOL);
1356 if (!state->result)
1357 return FALSE;
1358 state->progLength++;
1359 return TRUE;
1360 case '\\':
1361 if (state->cp >= state->cpend) {
1362 /* a trailing '\' is an error */
1363 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
1364 return FALSE;
1365 }
1366 c = *state->cp++;
1367 switch (c) {
1368 /* assertion escapes */
1369 case 'b' :
1370 state->result = NewRENode(state, REOP_WBDRY);
1371 if (!state->result)
1372 return FALSE;
1373 state->progLength++;
1374 return TRUE;
1375 case 'B':
1376 state->result = NewRENode(state, REOP_WNONBDRY);
1377 if (!state->result)
1378 return FALSE;
1379 state->progLength++;
1380 return TRUE;
1381 /* Decimal escape */
1382 case '0':
1383 /* Give a strict warning. See also the note below. */
1384 WARN("non-octal digit in an escape sequence that doesn't match a back-reference\n");
1385 doOctal:
1386 num = 0;
1387 while (state->cp < state->cpend) {
1388 c = *state->cp;
1389 if (c < '0' || '7' < c)
1390 break;
1391 state->cp++;
1392 tmp = 8 * num + (UINT)JS7_UNDEC(c);
1393 if (tmp > 0377)
1394 break;
1395 num = tmp;
1396 }
1397 c = (WCHAR)num;
1398 doFlat:
1399 state->result = NewRENode(state, REOP_FLAT);
1400 if (!state->result)
1401 return FALSE;
1402 state->result->u.flat.chr = c;
1403 state->result->u.flat.length = 1;
1404 state->progLength += 3;
1405 break;
1406 case '1':
1407 case '2':
1408 case '3':
1409 case '4':
1410 case '5':
1411 case '6':
1412 case '7':
1413 case '8':
1414 case '9':
1415 termStart = state->cp - 1;
1416 num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1417 if (state->flags & JSREG_FIND_PAREN_ERROR)
1418 return FALSE;
1419 if (num == OVERFLOW_VALUE) {
1420 /* Give a strict mode warning. */
1421 WARN("back-reference exceeds number of capturing parentheses\n");
1422
1423 /*
1424 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1425 * error here. However, for compatibility with IE, we treat the
1426 * whole backref as flat if the first character in it is not a
1427 * valid octal character, and as an octal escape otherwise.
1428 */
1429 state->cp = termStart;
1430 if (c >= '8') {
1431 /* Treat this as flat. termStart - 1 is the \. */
1432 c = '\\';
1433 goto asFlat;
1434 }
1435
1436 /* Treat this as an octal escape. */
1437 goto doOctal;
1438 }
1439 assert(1 <= num && num <= 0x10000);
1440 state->result = NewRENode(state, REOP_BACKREF);
1441 if (!state->result)
1442 return FALSE;
1443 state->result->u.parenIndex = num - 1;
1444 state->progLength
1445 += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1446 break;
1447 /* Control escape */
1448 case 'f':
1449 c = 0xC;
1450 goto doFlat;
1451 case 'n':
1452 c = 0xA;
1453 goto doFlat;
1454 case 'r':
1455 c = 0xD;
1456 goto doFlat;
1457 case 't':
1458 c = 0x9;
1459 goto doFlat;
1460 case 'v':
1461 c = 0xB;
1462 goto doFlat;
1463 /* Control letter */
1464 case 'c':
1465 if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
1466 c = (WCHAR) (*state->cp++ & 0x1F);
1467 } else {
1468 /* back off to accepting the original '\' as a literal */
1469 --state->cp;
1470 c = '\\';
1471 }
1472 goto doFlat;
1473 /* HexEscapeSequence */
1474 case 'x':
1475 nDigits = 2;
1476 goto lexHex;
1477 /* UnicodeEscapeSequence */
1478 case 'u':
1479 nDigits = 4;
1480lexHex:
1481 n = 0;
1482 for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1483 UINT digit;
1484 c = *state->cp++;
1485 if (!isASCIIHexDigit(c, &digit)) {
1486 /*
1487 * Back off to accepting the original 'u' or 'x' as a
1488 * literal.
1489 */
1490 state->cp -= i + 2;
1491 n = *state->cp++;
1492 break;
1493 }
1494 n = (n << 4) | digit;
1495 }
1496 c = (WCHAR) n;
1497 goto doFlat;
1498 /* Character class escapes */
1499 case 'd':
1500 state->result = NewRENode(state, REOP_DIGIT);
1501doSimple:
1502 if (!state->result)
1503 return FALSE;
1504 state->progLength++;
1505 break;
1506 case 'D':
1507 state->result = NewRENode(state, REOP_NONDIGIT);
1508 goto doSimple;
1509 case 's':
1510 state->result = NewRENode(state, REOP_SPACE);
1511 goto doSimple;
1512 case 'S':
1513 state->result = NewRENode(state, REOP_NONSPACE);
1514 goto doSimple;
1515 case 'w':
1516 state->result = NewRENode(state, REOP_ALNUM);
1517 goto doSimple;
1518 case 'W':
1519 state->result = NewRENode(state, REOP_NONALNUM);
1520 goto doSimple;
1521 /* IdentityEscape */
1522 default:
1523 state->result = NewRENode(state, REOP_FLAT);
1524 if (!state->result)
1525 return FALSE;
1526 state->result->u.flat.chr = c;
1527 state->result->u.flat.length = 1;
1528 state->result->kid = (void *) (state->cp - 1);
1529 state->progLength += 3;
1530 break;
1531 }
1532 break;
1533 case '[':
1534 state->result = NewRENode(state, REOP_CLASS);
1535 if (!state->result)
1536 return FALSE;
1537 termStart = state->cp;
1538 state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1539 for (;;) {
1540 if (state->cp == state->cpend) {
1541 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1542 JSMSG_UNTERM_CLASS, termStart);
1543
1544 return FALSE;
1545 }
1546 if (*state->cp == '\\') {
1547 state->cp++;
1548 if (state->cp != state->cpend)
1549 state->cp++;
1550 continue;
1551 }
1552 if (*state->cp == ']') {
1553 state->result->u.ucclass.kidlen = state->cp - termStart;
1554 break;
1555 }
1556 state->cp++;
1557 }
1558 for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1559 if (!state->classCache[i].start) {
1560 state->classCache[i].start = termStart;
1561 state->classCache[i].length = state->result->u.ucclass.kidlen;
1562 state->classCache[i].index = state->classCount;
1563 break;
1564 }
1565 if (state->classCache[i].length ==
1566 state->result->u.ucclass.kidlen) {
1567 for (n = 0; ; n++) {
1568 if (n == state->classCache[i].length) {
1569 state->result->u.ucclass.index
1570 = state->classCache[i].index;
1571 goto claim;
1572 }
1573 if (state->classCache[i].start[n] != termStart[n])
1574 break;
1575 }
1576 }
1577 }
1578 state->result->u.ucclass.index = state->classCount++;
1579
1580 claim:
1581 /*
1582 * Call CalculateBitmapSize now as we want any errors it finds
1583 * to be reported during the parse phase, not at execution.
1584 */
1585 if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1586 return FALSE;
1587 /*
1588 * Update classBitmapsMem with number of bytes to hold bmsize bits,
1589 * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1590 * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1591 */
1592 n = (state->result->u.ucclass.bmsize >> 3) + 1;
1593 if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1594 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1595 return FALSE;
1596 }
1597 state->classBitmapsMem += n;
1598 /* CLASS, <index> */
1599 state->progLength
1600 += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1601 break;
1602
1603 case '.':
1604 state->result = NewRENode(state, REOP_DOT);
1605 goto doSimple;
1606
1607 case '{':
1608 {
1609 const WCHAR *errp = state->cp--;
1610 INT err;
1611
1613 state->cp = errp;
1614
1615 if (err < 0)
1616 goto asFlat;
1617
1618 /* FALL THROUGH */
1619 }
1620 case '*':
1621 case '+':
1622 case '?':
1624 return FALSE;
1625 default:
1626asFlat:
1627 state->result = NewRENode(state, REOP_FLAT);
1628 if (!state->result)
1629 return FALSE;
1630 state->result->u.flat.chr = c;
1631 state->result->u.flat.length = 1;
1632 state->result->kid = (void *) (state->cp - 1);
1633 state->progLength += 3;
1634 break;
1635 }
1636 return ParseQuantifier(state);
1637}
1638
1639/*
1640 * Top-down regular expression grammar, based closely on Perl4.
1641 *
1642 * regexp: altern A regular expression is one or more
1643 * altern '|' regexp alternatives separated by vertical bar.
1644 */
1645#define INITIAL_STACK_SIZE 128
1646
1647static BOOL
1649{
1650 size_t parenIndex;
1651 RENode *operand;
1652 REOpData *operatorStack;
1653 RENode **operandStack;
1654 REOp op;
1655 INT i;
1656 BOOL result = FALSE;
1657
1658 INT operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
1659 INT operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
1660
1661 /* Watch out for empty regexp */
1662 if (state->cp == state->cpend) {
1663 state->result = NewRENode(state, REOP_EMPTY);
1664 return (state->result != NULL);
1665 }
1666
1667 operatorStack = malloc(sizeof(REOpData) * operatorStackSize);
1668 if (!operatorStack)
1669 return FALSE;
1670
1671 operandStack = malloc(sizeof(RENode *) * operandStackSize);
1672 if (!operandStack)
1673 goto out;
1674
1675 for (;;) {
1676 parenIndex = state->parenCount;
1677 if (state->cp == state->cpend) {
1678 /*
1679 * If we are at the end of the regexp and we're short one or more
1680 * operands, the regexp must have the form /x|/ or some such, with
1681 * left parentheses making us short more than one operand.
1682 */
1683 if (operatorSP >= operandSP) {
1684 operand = NewRENode(state, REOP_EMPTY);
1685 if (!operand)
1686 goto out;
1687 goto pushOperand;
1688 }
1689 } else {
1690 switch (*state->cp) {
1691 case '(':
1692 ++state->cp;
1693 if (state->cp + 1 < state->cpend &&
1694 *state->cp == '?' &&
1695 (state->cp[1] == '=' ||
1696 state->cp[1] == '!' ||
1697 state->cp[1] == ':')) {
1698 switch (state->cp[1]) {
1699 case '=':
1700 op = REOP_ASSERT;
1701 /* ASSERT, <next>, ... ASSERTTEST */
1702 state->progLength += 4;
1703 break;
1704 case '!':
1706 /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
1707 state->progLength += 4;
1708 break;
1709 default:
1711 break;
1712 }
1713 state->cp += 2;
1714 } else {
1715 op = REOP_LPAREN;
1716 /* LPAREN, <index>, ... RPAREN, <index> */
1717 state->progLength
1718 += 2 * (1 + GetCompactIndexWidth(parenIndex));
1719 state->parenCount++;
1720 if (state->parenCount == 65535) {
1721 ReportRegExpError(state, JSREPORT_ERROR,
1722 JSMSG_TOO_MANY_PARENS);
1723 goto out;
1724 }
1725 }
1726 goto pushOperator;
1727
1728 case ')':
1729 /*
1730 * If there's no stacked open parenthesis, throw syntax error.
1731 */
1732 for (i = operatorSP - 1; ; i--) {
1733 if (i < 0) {
1734 ReportRegExpError(state, JSREPORT_ERROR,
1735 JSMSG_UNMATCHED_RIGHT_PAREN);
1736 goto out;
1737 }
1738 if (operatorStack[i].op == REOP_ASSERT ||
1739 operatorStack[i].op == REOP_ASSERT_NOT ||
1740 operatorStack[i].op == REOP_LPARENNON ||
1741 operatorStack[i].op == REOP_LPAREN) {
1742 break;
1743 }
1744 }
1745 /* FALL THROUGH */
1746
1747 case '|':
1748 /* Expected an operand before these, so make an empty one */
1749 operand = NewRENode(state, REOP_EMPTY);
1750 if (!operand)
1751 goto out;
1752 goto pushOperand;
1753
1754 default:
1755 if (!ParseTerm(state))
1756 goto out;
1757 operand = state->result;
1758pushOperand:
1759 if (operandSP == operandStackSize) {
1760 RENode **tmp;
1761 operandStackSize += operandStackSize;
1762 tmp = realloc(operandStack, sizeof(RENode *) * operandStackSize);
1763 if (!tmp)
1764 goto out;
1765 operandStack = tmp;
1766 }
1767 operandStack[operandSP++] = operand;
1768 break;
1769 }
1770 }
1771
1772 /* At the end; process remaining operators. */
1773restartOperator:
1774 if (state->cp == state->cpend) {
1775 while (operatorSP) {
1776 --operatorSP;
1777 if (!ProcessOp(state, &operatorStack[operatorSP],
1778 operandStack, operandSP))
1779 goto out;
1780 --operandSP;
1781 }
1782 assert(operandSP == 1);
1783 state->result = operandStack[0];
1784 result = TRUE;
1785 goto out;
1786 }
1787
1788 switch (*state->cp) {
1789 case '|':
1790 /* Process any stacked 'concat' operators */
1791 ++state->cp;
1792 while (operatorSP &&
1793 operatorStack[operatorSP - 1].op == REOP_CONCAT) {
1794 --operatorSP;
1795 if (!ProcessOp(state, &operatorStack[operatorSP],
1796 operandStack, operandSP)) {
1797 goto out;
1798 }
1799 --operandSP;
1800 }
1801 op = REOP_ALT;
1802 goto pushOperator;
1803
1804 case ')':
1805 /*
1806 * If there's no stacked open parenthesis, throw syntax error.
1807 */
1808 for (i = operatorSP - 1; ; i--) {
1809 if (i < 0) {
1810 ReportRegExpError(state, JSREPORT_ERROR,
1811 JSMSG_UNMATCHED_RIGHT_PAREN);
1812 goto out;
1813 }
1814 if (operatorStack[i].op == REOP_ASSERT ||
1815 operatorStack[i].op == REOP_ASSERT_NOT ||
1816 operatorStack[i].op == REOP_LPARENNON ||
1817 operatorStack[i].op == REOP_LPAREN) {
1818 break;
1819 }
1820 }
1821 ++state->cp;
1822
1823 /* Process everything on the stack until the open parenthesis. */
1824 for (;;) {
1825 assert(operatorSP);
1826 --operatorSP;
1827 switch (operatorStack[operatorSP].op) {
1828 case REOP_ASSERT:
1829 case REOP_ASSERT_NOT:
1830 case REOP_LPAREN:
1831 operand = NewRENode(state, operatorStack[operatorSP].op);
1832 if (!operand)
1833 goto out;
1834 operand->u.parenIndex =
1835 operatorStack[operatorSP].parenIndex;
1836 assert(operandSP);
1837 operand->kid = operandStack[operandSP - 1];
1838 operandStack[operandSP - 1] = operand;
1839 if (state->treeDepth == TREE_DEPTH_MAX) {
1840 ReportRegExpError(state, JSREPORT_ERROR,
1841 JSMSG_REGEXP_TOO_COMPLEX);
1842 goto out;
1843 }
1844 ++state->treeDepth;
1845 /* FALL THROUGH */
1846
1847 case REOP_LPARENNON:
1848 state->result = operandStack[operandSP - 1];
1849 if (!ParseQuantifier(state))
1850 goto out;
1851 operandStack[operandSP - 1] = state->result;
1852 goto restartOperator;
1853 default:
1854 if (!ProcessOp(state, &operatorStack[operatorSP],
1855 operandStack, operandSP))
1856 goto out;
1857 --operandSP;
1858 break;
1859 }
1860 }
1861 break;
1862
1863 case '{':
1864 {
1865 const WCHAR *errp = state->cp;
1866
1867 if (ParseMinMaxQuantifier(state, TRUE) < 0) {
1868 /*
1869 * This didn't even scan correctly as a quantifier, so we should
1870 * treat it as flat.
1871 */
1872 op = REOP_CONCAT;
1873 goto pushOperator;
1874 }
1875
1876 state->cp = errp;
1877 /* FALL THROUGH */
1878 }
1879
1880 case '+':
1881 case '*':
1882 case '?':
1883 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
1884 state->cp);
1885 result = FALSE;
1886 goto out;
1887
1888 default:
1889 /* Anything else is the start of the next term. */
1890 op = REOP_CONCAT;
1891pushOperator:
1892 if (operatorSP == operatorStackSize) {
1893 REOpData *tmp;
1894 operatorStackSize += operatorStackSize;
1895 tmp = realloc(operatorStack, sizeof(REOpData) * operatorStackSize);
1896 if (!tmp)
1897 goto out;
1898 operatorStack = tmp;
1899 }
1900 operatorStack[operatorSP].op = op;
1901 operatorStack[operatorSP].errPos = state->cp;
1902 operatorStack[operatorSP++].parenIndex = parenIndex;
1903 break;
1904 }
1905 }
1906out:
1907 free(operatorStack);
1908 free(operandStack);
1909 return result;
1910}
1911
1912/*
1913 * Save the current state of the match - the position in the input
1914 * text as well as the position in the bytecode. The state of any
1915 * parent expressions is also saved (preceding state).
1916 * Contents of parenCount parentheses from parenIndex are also saved.
1917 */
1918static REBackTrackData *
1921 size_t parenIndex, size_t parenCount)
1922{
1923 size_t i;
1925 (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
1926
1927 size_t sz = sizeof(REBackTrackData) +
1928 gData->stateStackTop * sizeof(REProgState) +
1929 parenCount * sizeof(RECapture);
1930
1931 ptrdiff_t btsize = gData->backTrackStackSize;
1932 ptrdiff_t btincr = ((char *)result + sz) -
1933 ((char *)gData->backTrackStack + btsize);
1934
1935 TRACE("\tBT_Push: %Iu,%Iu\n", (ULONG_PTR)parenIndex, (ULONG_PTR)parenCount);
1936
1937 JS_COUNT_OPERATION(gData->cx, JSOW_JUMP * (1 + parenCount));
1938 if (btincr > 0) {
1939 ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
1940
1941 JS_COUNT_OPERATION(gData->cx, JSOW_ALLOCATION);
1942 btincr = ((btincr+btsize-1)/btsize)*btsize;
1943 gData->backTrackStack = heap_pool_grow(gData->pool, gData->backTrackStack, btsize, btincr);
1944 if (!gData->backTrackStack) {
1945 throw_error(gData->cx, E_OUTOFMEMORY, L"");
1946 gData->ok = FALSE;
1947 return NULL;
1948 }
1949 gData->backTrackStackSize = btsize + btincr;
1950 result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
1951 }
1952 gData->backTrackSP = result;
1953 result->sz = gData->cursz;
1954 gData->cursz = sz;
1955
1956 result->backtrack_op = op;
1957 result->backtrack_pc = target;
1958 result->cp = cp;
1959 result->parenCount = parenCount;
1960 result->parenIndex = parenIndex;
1961
1962 result->saveStateStackTop = gData->stateStackTop;
1963 assert(gData->stateStackTop);
1964 memcpy(result + 1, gData->stateStack,
1965 sizeof(REProgState) * result->saveStateStackTop);
1966
1967 if (parenCount != 0) {
1968 memcpy((char *)(result + 1) +
1969 sizeof(REProgState) * result->saveStateStackTop,
1970 &x->parens[parenIndex],
1971 sizeof(RECapture) * parenCount);
1972 for (i = 0; i != parenCount; i++)
1973 x->parens[parenIndex + i].index = -1;
1974 }
1975
1976 return result;
1977}
1978
1979static inline match_state_t *
1980FlatNIMatcher(REGlobalData *gData, match_state_t *x, const WCHAR *matchChars,
1981 size_t length)
1982{
1983 size_t i;
1984 assert(gData->cpend >= x->cp);
1985 if (length > (size_t)(gData->cpend - x->cp))
1986 return NULL;
1987 for (i = 0; i != length; i++) {
1988 if (towupper(matchChars[i]) != towupper(x->cp[i]))
1989 return NULL;
1990 }
1991 x->cp += length;
1992 return x;
1993}
1994
1995/*
1996 * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
1997 * 2. If E is not a character then go to step 6.
1998 * 3. Let ch be E's character.
1999 * 4. Let A be a one-element RECharSet containing the character ch.
2000 * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2001 * 6. E must be an integer. Let n be that integer.
2002 * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2003 * 8. Return an internal Matcher closure that takes two arguments, a State x
2004 * and a Continuation c, and performs the following:
2005 * 1. Let cap be x's captures internal array.
2006 * 2. Let s be cap[n].
2007 * 3. If s is undefined, then call c(x) and return its result.
2008 * 4. Let e be x's endIndex.
2009 * 5. Let len be s's length.
2010 * 6. Let f be e+len.
2011 * 7. If f>InputLength, return failure.
2012 * 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2013 * such that Canonicalize(s[i]) is not the same character as
2014 * Canonicalize(Input [e+i]), then return failure.
2015 * 9. Let y be the State (f, cap).
2016 * 10. Call c(y) and return its result.
2017 */
2018static match_state_t *
2019BackrefMatcher(REGlobalData *gData, match_state_t *x, size_t parenIndex)
2020{
2021 size_t len, i;
2022 const WCHAR *parenContent;
2023 RECapture *cap = &x->parens[parenIndex];
2024
2025 if (cap->index == -1)
2026 return x;
2027
2028 len = cap->length;
2029 if (x->cp + len > gData->cpend)
2030 return NULL;
2031
2032 parenContent = &gData->cpbegin[cap->index];
2033 if (gData->regexp->flags & REG_FOLD) {
2034 for (i = 0; i < len; i++) {
2035 if (towupper(parenContent[i]) != towupper(x->cp[i]))
2036 return NULL;
2037 }
2038 } else {
2039 for (i = 0; i < len; i++) {
2040 if (parenContent[i] != x->cp[i])
2041 return NULL;
2042 }
2043 }
2044 x->cp += len;
2045 return x;
2046}
2047
2048/* Add a single character to the RECharSet */
2049static void
2051{
2052 UINT byteIndex = (UINT)(c >> 3);
2053 assert(c <= cs->length);
2054 cs->u.bits[byteIndex] |= 1 << (c & 0x7);
2055}
2056
2057
2058/* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2059static void
2061{
2062 UINT i;
2063
2064 UINT byteIndex1 = c1 >> 3;
2065 UINT byteIndex2 = c2 >> 3;
2066
2067 assert(c2 <= cs->length && c1 <= c2);
2068
2069 c1 &= 0x7;
2070 c2 &= 0x7;
2071
2072 if (byteIndex1 == byteIndex2) {
2073 cs->u.bits[byteIndex1] |= ((BYTE)0xFF >> (7 - (c2 - c1))) << c1;
2074 } else {
2075 cs->u.bits[byteIndex1] |= 0xFF << c1;
2076 for (i = byteIndex1 + 1; i < byteIndex2; i++)
2077 cs->u.bits[i] = 0xFF;
2078 cs->u.bits[byteIndex2] |= (BYTE)0xFF >> (7 - c2);
2079 }
2080}
2081
2082/* Compile the source of the class into a RECharSet */
2083static BOOL
2085{
2086 const WCHAR *src, *end;
2087 BOOL inRange = FALSE;
2088 WCHAR rangeStart = 0;
2089 UINT byteLength, n;
2090 WCHAR c, thisCh;
2091 INT nDigits, i;
2092
2093 assert(!charSet->converted);
2094 /*
2095 * Assert that startIndex and length points to chars inside [] inside
2096 * source string.
2097 */
2098 assert(1 <= charSet->u.src.startIndex);
2099 assert(charSet->u.src.startIndex < gData->regexp->source_len);
2100 assert(charSet->u.src.length <= gData->regexp->source_len
2101 - 1 - charSet->u.src.startIndex);
2102
2103 charSet->converted = TRUE;
2104 src = gData->regexp->source + charSet->u.src.startIndex;
2105
2106 end = src + charSet->u.src.length;
2107
2108 assert(src[-1] == '[' && end[0] == ']');
2109
2110 byteLength = (charSet->length >> 3) + 1;
2111 charSet->u.bits = malloc(byteLength);
2112 if (!charSet->u.bits) {
2113 throw_error(gData->cx, E_OUTOFMEMORY, L"");
2114 gData->ok = FALSE;
2115 return FALSE;
2116 }
2117 memset(charSet->u.bits, 0, byteLength);
2118
2119 if (src == end)
2120 return TRUE;
2121
2122 if (*src == '^') {
2123 assert(charSet->sense == FALSE);
2124 ++src;
2125 } else {
2126 assert(charSet->sense == TRUE);
2127 }
2128
2129 while (src != end) {
2130 switch (*src) {
2131 case '\\':
2132 ++src;
2133 c = *src++;
2134 switch (c) {
2135 case 'b':
2136 thisCh = 0x8;
2137 break;
2138 case 'f':
2139 thisCh = 0xC;
2140 break;
2141 case 'n':
2142 thisCh = 0xA;
2143 break;
2144 case 'r':
2145 thisCh = 0xD;
2146 break;
2147 case 't':
2148 thisCh = 0x9;
2149 break;
2150 case 'v':
2151 thisCh = 0xB;
2152 break;
2153 case 'c':
2154 if (src < end && JS_ISWORD(*src)) {
2155 thisCh = (WCHAR)(*src++ & 0x1F);
2156 } else {
2157 --src;
2158 thisCh = '\\';
2159 }
2160 break;
2161 case 'x':
2162 nDigits = 2;
2163 goto lexHex;
2164 case 'u':
2165 nDigits = 4;
2166 lexHex:
2167 n = 0;
2168 for (i = 0; (i < nDigits) && (src < end); i++) {
2169 UINT digit;
2170 c = *src++;
2171 if (!isASCIIHexDigit(c, &digit)) {
2172 /*
2173 * Back off to accepting the original '\'
2174 * as a literal
2175 */
2176 src -= i + 1;
2177 n = '\\';
2178 break;
2179 }
2180 n = (n << 4) | digit;
2181 }
2182 thisCh = (WCHAR)n;
2183 break;
2184 case '0':
2185 case '1':
2186 case '2':
2187 case '3':
2188 case '4':
2189 case '5':
2190 case '6':
2191 case '7':
2192 /*
2193 * This is a non-ECMA extension - decimal escapes (in this
2194 * case, octal!) are supposed to be an error inside class
2195 * ranges, but supported here for backwards compatibility.
2196 */
2197 n = JS7_UNDEC(c);
2198 c = *src;
2199 if ('0' <= c && c <= '7') {
2200 src++;
2201 n = 8 * n + JS7_UNDEC(c);
2202 c = *src;
2203 if ('0' <= c && c <= '7') {
2204 src++;
2205 i = 8 * n + JS7_UNDEC(c);
2206 if (i <= 0377)
2207 n = i;
2208 else
2209 src--;
2210 }
2211 }
2212 thisCh = (WCHAR)n;
2213 break;
2214
2215 case 'd':
2216 AddCharacterRangeToCharSet(charSet, '0', '9');
2217 continue; /* don't need range processing */
2218 case 'D':
2219 AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2221 (WCHAR)('9' + 1),
2222 (WCHAR)charSet->length);
2223 continue;
2224 case 's':
2225 for (i = (INT)charSet->length; i >= 0; i--)
2226 if (iswspace(i))
2227 AddCharacterToCharSet(charSet, (WCHAR)i);
2228 continue;
2229 case 'S':
2230 for (i = (INT)charSet->length; i >= 0; i--)
2231 if (!iswspace(i))
2232 AddCharacterToCharSet(charSet, (WCHAR)i);
2233 continue;
2234 case 'w':
2235 for (i = (INT)charSet->length; i >= 0; i--)
2236 if (JS_ISWORD(i))
2237 AddCharacterToCharSet(charSet, (WCHAR)i);
2238 continue;
2239 case 'W':
2240 for (i = (INT)charSet->length; i >= 0; i--)
2241 if (!JS_ISWORD(i))
2242 AddCharacterToCharSet(charSet, (WCHAR)i);
2243 continue;
2244 default:
2245 thisCh = c;
2246 break;
2247
2248 }
2249 break;
2250
2251 default:
2252 thisCh = *src++;
2253 break;
2254
2255 }
2256 if (inRange) {
2257 if (gData->regexp->flags & REG_FOLD) {
2258 assert(rangeStart <= thisCh);
2259 for (i = rangeStart; i <= thisCh; i++) {
2260 WCHAR uch, dch;
2261
2262 AddCharacterToCharSet(charSet, i);
2263 uch = towupper(i);
2264 dch = towlower(i);
2265 if (i != uch)
2266 AddCharacterToCharSet(charSet, uch);
2267 if (i != dch)
2268 AddCharacterToCharSet(charSet, dch);
2269 }
2270 } else {
2271 AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2272 }
2273 inRange = FALSE;
2274 } else {
2275 if (gData->regexp->flags & REG_FOLD) {
2276 AddCharacterToCharSet(charSet, towupper(thisCh));
2277 AddCharacterToCharSet(charSet, towlower(thisCh));
2278 } else {
2279 AddCharacterToCharSet(charSet, thisCh);
2280 }
2281 if (src < end - 1) {
2282 if (*src == '-') {
2283 ++src;
2284 inRange = TRUE;
2285 rangeStart = thisCh;
2286 }
2287 }
2288 }
2289 }
2290 return TRUE;
2291}
2292
2293static BOOL
2295{
2296 size_t limit = gData->stateStackLimit;
2297 size_t sz = sizeof(REProgState) * limit;
2298
2299 gData->stateStack = heap_pool_grow(gData->pool, gData->stateStack, sz, sz);
2300 if (!gData->stateStack) {
2301 throw_error(gData->cx, E_OUTOFMEMORY, L"");
2302 gData->ok = FALSE;
2303 return FALSE;
2304 }
2305 gData->stateStackLimit = limit + limit;
2306 return TRUE;
2307}
2308
2309#define PUSH_STATE_STACK(data) \
2310 do { \
2311 ++(data)->stateStackTop; \
2312 if ((data)->stateStackTop == (data)->stateStackLimit && \
2313 !ReallocStateStack((data))) { \
2314 return NULL; \
2315 } \
2316 }while(0)
2317
2318/*
2319 * Apply the current op against the given input to see if it's going to match
2320 * or fail. Return false if we don't get a match, true if we do. If updatecp is
2321 * true, then update the current state's cp. Always update startpc to the next
2322 * op.
2323 */
2324static inline match_state_t *
2326 jsbytecode **startpc, BOOL updatecp)
2327{
2329 WCHAR matchCh;
2330 size_t parenIndex;
2331 size_t offset, length, index;
2332 jsbytecode *pc = *startpc; /* pc has already been incremented past op */
2333 const WCHAR *source;
2334 const WCHAR *startcp = x->cp;
2335 WCHAR ch;
2336 RECharSet *charSet;
2337
2338 const char *opname = reop_names[op];
2339 TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
2340 (int)gData->stateStackTop * 2, "", opname);
2341
2342 switch (op) {
2343 case REOP_EMPTY:
2344 result = x;
2345 break;
2346 case REOP_BOL:
2347 if (x->cp != gData->cpbegin) {
2348 if (
2349 !(gData->regexp->flags & REG_MULTILINE)) {
2350 break;
2351 }
2352 if (!RE_IS_LINE_TERM(x->cp[-1]))
2353 break;
2354 }
2355 result = x;
2356 break;
2357 case REOP_EOL:
2358 if (x->cp != gData->cpend) {
2359 if (
2360 !(gData->regexp->flags & REG_MULTILINE)) {
2361 break;
2362 }
2363 if (!RE_IS_LINE_TERM(*x->cp))
2364 break;
2365 }
2366 result = x;
2367 break;
2368 case REOP_WBDRY:
2369 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2370 !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2371 result = x;
2372 }
2373 break;
2374 case REOP_WNONBDRY:
2375 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2376 (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2377 result = x;
2378 }
2379 break;
2380 case REOP_DOT:
2381 if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
2382 result = x;
2383 result->cp++;
2384 }
2385 break;
2386 case REOP_DIGIT:
2387 if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) {
2388 result = x;
2389 result->cp++;
2390 }
2391 break;
2392 case REOP_NONDIGIT:
2393 if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) {
2394 result = x;
2395 result->cp++;
2396 }
2397 break;
2398 case REOP_ALNUM:
2399 if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
2400 result = x;
2401 result->cp++;
2402 }
2403 break;
2404 case REOP_NONALNUM:
2405 if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
2406 result = x;
2407 result->cp++;
2408 }
2409 break;
2410 case REOP_SPACE:
2411 if (x->cp != gData->cpend && iswspace(*x->cp)) {
2412 result = x;
2413 result->cp++;
2414 }
2415 break;
2416 case REOP_NONSPACE:
2417 if (x->cp != gData->cpend && !iswspace(*x->cp)) {
2418 result = x;
2419 result->cp++;
2420 }
2421 break;
2422 case REOP_BACKREF:
2423 pc = ReadCompactIndex(pc, &parenIndex);
2424 assert(parenIndex < gData->regexp->parenCount);
2425 result = BackrefMatcher(gData, x, parenIndex);
2426 break;
2427 case REOP_FLAT:
2428 pc = ReadCompactIndex(pc, &offset);
2429 assert(offset < gData->regexp->source_len);
2430 pc = ReadCompactIndex(pc, &length);
2431 assert(1 <= length);
2432 assert(length <= gData->regexp->source_len - offset);
2433 if (length <= (size_t)(gData->cpend - x->cp)) {
2434 source = gData->regexp->source + offset;
2435 TRACE("%s\n", debugstr_wn(source, length));
2436 for (index = 0; index != length; index++) {
2437 if (source[index] != x->cp[index])
2438 return NULL;
2439 }
2440 x->cp += length;
2441 result = x;
2442 }
2443 break;
2444 case REOP_FLAT1:
2445 matchCh = *pc++;
2446 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2447 if (x->cp != gData->cpend && *x->cp == matchCh) {
2448 result = x;
2449 result->cp++;
2450 }
2451 break;
2452 case REOP_FLATi:
2453 pc = ReadCompactIndex(pc, &offset);
2454 assert(offset < gData->regexp->source_len);
2455 pc = ReadCompactIndex(pc, &length);
2456 assert(1 <= length);
2457 assert(length <= gData->regexp->source_len - offset);
2458 source = gData->regexp->source;
2459 result = FlatNIMatcher(gData, x, source + offset, length);
2460 break;
2461 case REOP_FLAT1i:
2462 matchCh = *pc++;
2463 if (x->cp != gData->cpend && towupper(*x->cp) == towupper(matchCh)) {
2464 result = x;
2465 result->cp++;
2466 }
2467 break;
2468 case REOP_UCFLAT1:
2469 matchCh = GET_ARG(pc);
2470 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2471 pc += ARG_LEN;
2472 if (x->cp != gData->cpend && *x->cp == matchCh) {
2473 result = x;
2474 result->cp++;
2475 }
2476 break;
2477 case REOP_UCFLAT1i:
2478 matchCh = GET_ARG(pc);
2479 pc += ARG_LEN;
2480 if (x->cp != gData->cpend && towupper(*x->cp) == towupper(matchCh)) {
2481 result = x;
2482 result->cp++;
2483 }
2484 break;
2485 case REOP_CLASS:
2486 pc = ReadCompactIndex(pc, &index);
2487 assert(index < gData->regexp->classCount);
2488 if (x->cp != gData->cpend) {
2489 charSet = &gData->regexp->classList[index];
2490 assert(charSet->converted);
2491 ch = *x->cp;
2492 index = ch >> 3;
2493 if (charSet->length != 0 &&
2494 ch <= charSet->length &&
2495 (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2496 result = x;
2497 result->cp++;
2498 }
2499 }
2500 break;
2501 case REOP_NCLASS:
2502 pc = ReadCompactIndex(pc, &index);
2503 assert(index < gData->regexp->classCount);
2504 if (x->cp != gData->cpend) {
2505 charSet = &gData->regexp->classList[index];
2506 assert(charSet->converted);
2507 ch = *x->cp;
2508 index = ch >> 3;
2509 if (charSet->length == 0 ||
2510 ch > charSet->length ||
2511 !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2512 result = x;
2513 result->cp++;
2514 }
2515 }
2516 break;
2517
2518 default:
2519 assert(FALSE);
2520 }
2521 if (result) {
2522 if (!updatecp)
2523 x->cp = startcp;
2524 *startpc = pc;
2525 TRACE(" *\n");
2526 return result;
2527 }
2528 x->cp = startcp;
2529 return NULL;
2530}
2531
2532static inline match_state_t *
2534{
2536 REBackTrackData *backTrackData;
2537 jsbytecode *nextpc, *testpc;
2538 REOp nextop;
2539 RECapture *cap;
2540 REProgState *curState;
2541 const WCHAR *startcp;
2542 size_t parenIndex, k;
2543 size_t parenSoFar = 0;
2544
2545 WCHAR matchCh1, matchCh2;
2546 RECharSet *charSet;
2547
2548 BOOL anchor;
2549 jsbytecode *pc = gData->regexp->program;
2550 REOp op = (REOp) *pc++;
2551
2552 /*
2553 * If the first node is a simple match, step the index into the string
2554 * until that match is made, or fail if it can't be found at all.
2555 */
2556 if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & REG_STICKY)) {
2557 anchor = FALSE;
2558 while (x->cp <= gData->cpend) {
2559 nextpc = pc; /* reset back to start each time */
2560 result = SimpleMatch(gData, x, op, &nextpc, TRUE);
2561 if (result) {
2562 anchor = TRUE;
2563 x = result;
2564 pc = nextpc; /* accept skip to next opcode */
2565 op = (REOp) *pc++;
2566 assert(op < REOP_LIMIT);
2567 break;
2568 }
2569 gData->skipped++;
2570 x->cp++;
2571 }
2572 if (!anchor)
2573 goto bad;
2574 }
2575
2576 for (;;) {
2577 const char *opname = reop_names[op];
2578 TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
2579 (int)gData->stateStackTop * 2, "", opname);
2580
2581 if (REOP_IS_SIMPLE(op)) {
2582 result = SimpleMatch(gData, x, op, &pc, TRUE);
2583 } else {
2584 curState = &gData->stateStack[gData->stateStackTop];
2585 switch (op) {
2586 case REOP_END:
2587 goto good;
2588 case REOP_ALTPREREQ2:
2589 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2590 pc += ARG_LEN;
2591 matchCh2 = GET_ARG(pc);
2592 pc += ARG_LEN;
2593 k = GET_ARG(pc);
2594 pc += ARG_LEN;
2595
2596 if (x->cp != gData->cpend) {
2597 if (*x->cp == matchCh2)
2598 goto doAlt;
2599
2600 charSet = &gData->regexp->classList[k];
2601 if (!charSet->converted && !ProcessCharSet(gData, charSet))
2602 goto bad;
2603 matchCh1 = *x->cp;
2604 k = matchCh1 >> 3;
2605 if ((charSet->length == 0 ||
2606 matchCh1 > charSet->length ||
2607 !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
2608 charSet->sense) {
2609 goto doAlt;
2610 }
2611 }
2612 result = NULL;
2613 break;
2614
2615 case REOP_ALTPREREQ:
2616 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2617 pc += ARG_LEN;
2618 matchCh1 = GET_ARG(pc);
2619 pc += ARG_LEN;
2620 matchCh2 = GET_ARG(pc);
2621 pc += ARG_LEN;
2622 if (x->cp == gData->cpend ||
2623 (*x->cp != matchCh1 && *x->cp != matchCh2)) {
2624 result = NULL;
2625 break;
2626 }
2627 /* else fall through... */
2628
2629 case REOP_ALT:
2630 doAlt:
2631 nextpc = pc + GET_OFFSET(pc); /* start of next alternate */
2632 pc += ARG_LEN; /* start of this alternate */
2633 curState->parenSoFar = parenSoFar;
2634 PUSH_STATE_STACK(gData);
2635 op = (REOp) *pc++;
2636 startcp = x->cp;
2637 if (REOP_IS_SIMPLE(op)) {
2638 if (!SimpleMatch(gData, x, op, &pc, TRUE)) {
2639 op = (REOp) *nextpc++;
2640 pc = nextpc;
2641 continue;
2642 }
2643 result = x;
2644 op = (REOp) *pc++;
2645 }
2646 nextop = (REOp) *nextpc++;
2647 if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
2648 goto bad;
2649 continue;
2650
2651 /*
2652 * Occurs at (successful) end of REOP_ALT,
2653 */
2654 case REOP_JUMP:
2655 /*
2656 * If we have not gotten a result here, it is because of an
2657 * empty match. Do the same thing REOP_EMPTY would do.
2658 */
2659 if (!result)
2660 result = x;
2661
2662 --gData->stateStackTop;
2663 pc += GET_OFFSET(pc);
2664 op = (REOp) *pc++;
2665 continue;
2666
2667 /*
2668 * Occurs at last (successful) end of REOP_ALT,
2669 */
2670 case REOP_ENDALT:
2671 /*
2672 * If we have not gotten a result here, it is because of an
2673 * empty match. Do the same thing REOP_EMPTY would do.
2674 */
2675 if (!result)
2676 result = x;
2677
2678 --gData->stateStackTop;
2679 op = (REOp) *pc++;
2680 continue;
2681
2682 case REOP_LPAREN:
2683 pc = ReadCompactIndex(pc, &parenIndex);
2684 TRACE("[ %Iu ]\n", (ULONG_PTR)parenIndex);
2685 assert(parenIndex < gData->regexp->parenCount);
2686 if (parenIndex + 1 > parenSoFar)
2687 parenSoFar = parenIndex + 1;
2688 x->parens[parenIndex].index = x->cp - gData->cpbegin;
2689 x->parens[parenIndex].length = 0;
2690 op = (REOp) *pc++;
2691 continue;
2692
2693 case REOP_RPAREN:
2694 {
2695 ptrdiff_t delta;
2696
2697 pc = ReadCompactIndex(pc, &parenIndex);
2698 assert(parenIndex < gData->regexp->parenCount);
2699 cap = &x->parens[parenIndex];
2700 delta = x->cp - (gData->cpbegin + cap->index);
2701 cap->length = (delta < 0) ? 0 : (size_t) delta;
2702 op = (REOp) *pc++;
2703
2704 if (!result)
2705 result = x;
2706 continue;
2707 }
2708 case REOP_ASSERT:
2709 nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */
2710 pc += ARG_LEN; /* start of ASSERT child */
2711 op = (REOp) *pc++;
2712 testpc = pc;
2713 if (REOP_IS_SIMPLE(op) &&
2714 !SimpleMatch(gData, x, op, &testpc, FALSE)) {
2715 result = NULL;
2716 break;
2717 }
2718 curState->u.assertion.top =
2719 (char *)gData->backTrackSP - (char *)gData->backTrackStack;
2720 curState->u.assertion.sz = gData->cursz;
2721 curState->index = x->cp - gData->cpbegin;
2722 curState->parenSoFar = parenSoFar;
2723 PUSH_STATE_STACK(gData);
2725 nextpc, x, x->cp, 0, 0)) {
2726 goto bad;
2727 }
2728 continue;
2729
2730 case REOP_ASSERT_NOT:
2731 nextpc = pc + GET_OFFSET(pc);
2732 pc += ARG_LEN;
2733 op = (REOp) *pc++;
2734 testpc = pc;
2735 if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
2736 SimpleMatch(gData, x, op, &testpc, FALSE) &&
2737 *testpc == REOP_ASSERTNOTTEST) {
2738 result = NULL;
2739 break;
2740 }
2741 curState->u.assertion.top
2742 = (char *)gData->backTrackSP -
2743 (char *)gData->backTrackStack;
2744 curState->u.assertion.sz = gData->cursz;
2745 curState->index = x->cp - gData->cpbegin;
2746 curState->parenSoFar = parenSoFar;
2747 PUSH_STATE_STACK(gData);
2749 nextpc, x, x->cp, 0, 0)) {
2750 goto bad;
2751 }
2752 continue;
2753
2754 case REOP_ASSERTTEST:
2755 --gData->stateStackTop;
2756 --curState;
2757 x->cp = gData->cpbegin + curState->index;
2758 gData->backTrackSP =
2759 (REBackTrackData *) ((char *)gData->backTrackStack +
2760 curState->u.assertion.top);
2761 gData->cursz = curState->u.assertion.sz;
2762 if (result)
2763 result = x;
2764 break;
2765
2766 case REOP_ASSERTNOTTEST:
2767 --gData->stateStackTop;
2768 --curState;
2769 x->cp = gData->cpbegin + curState->index;
2770 gData->backTrackSP =
2771 (REBackTrackData *) ((char *)gData->backTrackStack +
2772 curState->u.assertion.top);
2773 gData->cursz = curState->u.assertion.sz;
2774 result = (!result) ? x : NULL;
2775 break;
2776 case REOP_STAR:
2777 curState->u.quantifier.min = 0;
2778 curState->u.quantifier.max = (UINT)-1;
2779 goto quantcommon;
2780 case REOP_PLUS:
2781 curState->u.quantifier.min = 1;
2782 curState->u.quantifier.max = (UINT)-1;
2783 goto quantcommon;
2784 case REOP_OPT:
2785 curState->u.quantifier.min = 0;
2786 curState->u.quantifier.max = 1;
2787 goto quantcommon;
2788 case REOP_QUANT:
2789 pc = ReadCompactIndex(pc, &k);
2790 curState->u.quantifier.min = k;
2791 pc = ReadCompactIndex(pc, &k);
2792 /* max is k - 1 to use one byte for (UINT)-1 sentinel. */
2793 curState->u.quantifier.max = k - 1;
2794 assert(curState->u.quantifier.min <= curState->u.quantifier.max);
2795 quantcommon:
2796 if (curState->u.quantifier.max == 0) {
2797 pc = pc + GET_OFFSET(pc);
2798 op = (REOp) *pc++;
2799 result = x;
2800 continue;
2801 }
2802 /* Step over <next> */
2803 nextpc = pc + ARG_LEN;
2804 op = (REOp) *nextpc++;
2805 startcp = x->cp;
2806 if (REOP_IS_SIMPLE(op)) {
2807 if (!SimpleMatch(gData, x, op, &nextpc, TRUE)) {
2808 if (curState->u.quantifier.min == 0)
2809 result = x;
2810 else
2811 result = NULL;
2812 pc = pc + GET_OFFSET(pc);
2813 break;
2814 }
2815 op = (REOp) *nextpc++;
2816 result = x;
2817 }
2818 curState->index = startcp - gData->cpbegin;
2819 curState->continue_op = REOP_REPEAT;
2820 curState->continue_pc = pc;
2821 curState->parenSoFar = parenSoFar;
2822 PUSH_STATE_STACK(gData);
2823 if (curState->u.quantifier.min == 0 &&
2824 !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
2825 0, 0)) {
2826 goto bad;
2827 }
2828 pc = nextpc;
2829 continue;
2830
2831 case REOP_ENDCHILD: /* marks the end of a quantifier child */
2832 pc = curState[-1].continue_pc;
2833 op = (REOp) curState[-1].continue_op;
2834
2835 if (!result)
2836 result = x;
2837 continue;
2838
2839 case REOP_REPEAT:
2840 --curState;
2841 do {
2842 --gData->stateStackTop;
2843 if (!result) {
2844 /* Failed, see if we have enough children. */
2845 if (curState->u.quantifier.min == 0)
2846 goto repeatDone;
2847 goto break_switch;
2848 }
2849 if (curState->u.quantifier.min == 0 &&
2850 x->cp == gData->cpbegin + curState->index) {
2851 /* matched an empty string, that'll get us nowhere */
2852 result = NULL;
2853 goto break_switch;
2854 }
2855 if (curState->u.quantifier.min != 0)
2856 curState->u.quantifier.min--;
2857 if (curState->u.quantifier.max != (UINT) -1)
2858 curState->u.quantifier.max--;
2859 if (curState->u.quantifier.max == 0)
2860 goto repeatDone;
2861 nextpc = pc + ARG_LEN;
2862 nextop = (REOp) *nextpc;
2863 startcp = x->cp;
2864 if (REOP_IS_SIMPLE(nextop)) {
2865 nextpc++;
2866 if (!SimpleMatch(gData, x, nextop, &nextpc, TRUE)) {
2867 if (curState->u.quantifier.min == 0)
2868 goto repeatDone;
2869 result = NULL;
2870 goto break_switch;
2871 }
2872 result = x;
2873 }
2874 curState->index = startcp - gData->cpbegin;
2875 PUSH_STATE_STACK(gData);
2876 if (curState->u.quantifier.min == 0 &&
2878 pc, x, startcp,
2879 curState->parenSoFar,
2880 parenSoFar -
2881 curState->parenSoFar)) {
2882 goto bad;
2883 }
2884 } while (*nextpc == REOP_ENDCHILD);
2885 pc = nextpc;
2886 op = (REOp) *pc++;
2887 parenSoFar = curState->parenSoFar;
2888 continue;
2889
2890 repeatDone:
2891 result = x;
2892 pc += GET_OFFSET(pc);
2893 goto break_switch;
2894
2895 case REOP_MINIMALSTAR:
2896 curState->u.quantifier.min = 0;
2897 curState->u.quantifier.max = (UINT)-1;
2898 goto minimalquantcommon;
2899 case REOP_MINIMALPLUS:
2900 curState->u.quantifier.min = 1;
2901 curState->u.quantifier.max = (UINT)-1;
2902 goto minimalquantcommon;
2903 case REOP_MINIMALOPT:
2904 curState->u.quantifier.min = 0;
2905 curState->u.quantifier.max = 1;
2906 goto minimalquantcommon;
2907 case REOP_MINIMALQUANT:
2908 pc = ReadCompactIndex(pc, &k);
2909 curState->u.quantifier.min = k;
2910 pc = ReadCompactIndex(pc, &k);
2911 /* See REOP_QUANT comments about k - 1. */
2912 curState->u.quantifier.max = k - 1;
2913 assert(curState->u.quantifier.min
2914 <= curState->u.quantifier.max);
2915 minimalquantcommon:
2916 curState->index = x->cp - gData->cpbegin;
2917 curState->parenSoFar = parenSoFar;
2918 PUSH_STATE_STACK(gData);
2919 if (curState->u.quantifier.min != 0) {
2920 curState->continue_op = REOP_MINIMALREPEAT;
2921 curState->continue_pc = pc;
2922 /* step over <next> */
2923 pc += OFFSET_LEN;
2924 op = (REOp) *pc++;
2925 } else {
2927 pc, x, x->cp, 0, 0)) {
2928 goto bad;
2929 }
2930 --gData->stateStackTop;
2931 pc = pc + GET_OFFSET(pc);
2932 op = (REOp) *pc++;
2933 }
2934 continue;
2935
2936 case REOP_MINIMALREPEAT:
2937 --gData->stateStackTop;
2938 --curState;
2939
2940 TRACE("{%d,%d}\n", curState->u.quantifier.min, curState->u.quantifier.max);
2941#define PREPARE_REPEAT() \
2942 do { \
2943 curState->index = x->cp - gData->cpbegin; \
2944 curState->continue_op = REOP_MINIMALREPEAT; \
2945 curState->continue_pc = pc; \
2946 pc += ARG_LEN; \
2947 for (k = curState->parenSoFar; k < parenSoFar; k++) \
2948 x->parens[k].index = -1; \
2949 PUSH_STATE_STACK(gData); \
2950 op = (REOp) *pc++; \
2951 assert(op < REOP_LIMIT); \
2952 }while(0)
2953
2954 if (!result) {
2955 TRACE(" -\n");
2956 /*
2957 * Non-greedy failure - try to consume another child.
2958 */
2959 if (curState->u.quantifier.max == (UINT) -1 ||
2960 curState->u.quantifier.max > 0) {
2962 continue;
2963 }
2964 /* Don't need to adjust pc since we're going to pop. */
2965 break;
2966 }
2967 if (curState->u.quantifier.min == 0 &&
2968 x->cp == gData->cpbegin + curState->index) {
2969 /* Matched an empty string, that'll get us nowhere. */
2970 result = NULL;
2971 break;
2972 }
2973 if (curState->u.quantifier.min != 0)
2974 curState->u.quantifier.min--;
2975 if (curState->u.quantifier.max != (UINT) -1)
2976 curState->u.quantifier.max--;
2977 if (curState->u.quantifier.min != 0) {
2979 continue;
2980 }
2981 curState->index = x->cp - gData->cpbegin;
2982 curState->parenSoFar = parenSoFar;
2983 PUSH_STATE_STACK(gData);
2985 pc, x, x->cp,
2986 curState->parenSoFar,
2987 parenSoFar - curState->parenSoFar)) {
2988 goto bad;
2989 }
2990 --gData->stateStackTop;
2991 pc = pc + GET_OFFSET(pc);
2992 op = (REOp) *pc++;
2993 assert(op < REOP_LIMIT);
2994 continue;
2995 default:
2996 assert(FALSE);
2997 result = NULL;
2998 }
2999 break_switch:;
3000 }
3001
3002 /*
3003 * If the match failed and there's a backtrack option, take it.
3004 * Otherwise this is a complete and utter failure.
3005 */
3006 if (!result) {
3007 if (gData->cursz == 0)
3008 return NULL;
3009
3010 /* Potentially detect explosive regex here. */
3011 gData->backTrackCount++;
3012 if (gData->backTrackLimit &&
3013 gData->backTrackCount >= gData->backTrackLimit) {
3014 JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL,
3015 JSMSG_REGEXP_TOO_COMPLEX);
3016 gData->ok = FALSE;
3017 return NULL;
3018 }
3019
3020 backTrackData = gData->backTrackSP;
3021 gData->cursz = backTrackData->sz;
3022 gData->backTrackSP =
3023 (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
3024 x->cp = backTrackData->cp;
3025 pc = backTrackData->backtrack_pc;
3026 op = (REOp) backTrackData->backtrack_op;
3027 assert(op < REOP_LIMIT);
3028 gData->stateStackTop = backTrackData->saveStateStackTop;
3029 assert(gData->stateStackTop);
3030
3031 memcpy(gData->stateStack, backTrackData + 1,
3032 sizeof(REProgState) * backTrackData->saveStateStackTop);
3033 curState = &gData->stateStack[gData->stateStackTop - 1];
3034
3035 if (backTrackData->parenCount) {
3036 memcpy(&x->parens[backTrackData->parenIndex],
3037 (char *)(backTrackData + 1) +
3038 sizeof(REProgState) * backTrackData->saveStateStackTop,
3039 sizeof(RECapture) * backTrackData->parenCount);
3040 parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
3041 } else {
3042 for (k = curState->parenSoFar; k < parenSoFar; k++)
3043 x->parens[k].index = -1;
3044 parenSoFar = curState->parenSoFar;
3045 }
3046
3047 TRACE("\tBT_Pop: %Id,%Id\n",
3048 (ULONG_PTR)backTrackData->parenIndex,
3049 (ULONG_PTR)backTrackData->parenCount);
3050 continue;
3051 }
3052 x = result;
3053
3054 /*
3055 * Continue with the expression.
3056 */
3057 op = (REOp)*pc++;
3058 assert(op < REOP_LIMIT);
3059 }
3060
3061bad:
3062 TRACE("\n");
3063 return NULL;
3064
3065good:
3066 TRACE("\n");
3067 return x;
3068}
3069
3071{
3073 const WCHAR *cp = x->cp;
3074 const WCHAR *cp2;
3075 UINT j;
3076
3077 /*
3078 * Have to include the position beyond the last character
3079 * in order to detect end-of-input/line condition.
3080 */
3081 for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
3082 gData->skipped = cp2 - cp;
3083 x->cp = cp2;
3084 for (j = 0; j < gData->regexp->parenCount; j++)
3085 x->parens[j].index = -1;
3086 result = ExecuteREBytecode(gData, x);
3087 if (!gData->ok || result || (gData->regexp->flags & REG_STICKY))
3088 return result;
3089 gData->backTrackSP = gData->backTrackStack;
3090 gData->cursz = 0;
3091 gData->stateStackTop = 0;
3092 cp2 = cp + gData->skipped;
3093 }
3094 return NULL;
3095}
3096
3098{
3099 UINT i;
3100
3103 if (!gData->backTrackStack)
3104 goto bad;
3105
3106 gData->backTrackSP = gData->backTrackStack;
3107 gData->cursz = 0;
3108 gData->backTrackCount = 0;
3109 gData->backTrackLimit = 0;
3110
3112 gData->stateStack = heap_pool_alloc(gData->pool, sizeof(REProgState) * INITIAL_STATESTACK);
3113 if (!gData->stateStack)
3114 goto bad;
3115
3116 gData->stateStackTop = 0;
3117 gData->cx = cx;
3118 gData->pool = pool;
3119 gData->regexp = re;
3120 gData->ok = TRUE;
3121
3122 for (i = 0; i < re->classCount; i++) {
3123 if (!re->classList[i].converted &&
3124 !ProcessCharSet(gData, &re->classList[i])) {
3125 return E_FAIL;
3126 }
3127 }
3128
3129 return S_OK;
3130
3131bad:
3132 gData->ok = FALSE;
3133 return E_OUTOFMEMORY;
3134}
3135
3138{
3140 REGlobalData gData;
3142 const WCHAR *str_beg = result->cp;
3143 HRESULT hres;
3144
3145 assert(result->cp != NULL);
3146
3147 gData.cpbegin = str;
3148 gData.cpend = str+str_len;
3149 gData.start = result->cp-str;
3150 gData.skipped = 0;
3151 gData.pool = pool;
3152
3153 hres = InitMatch(regexp, cx, pool, &gData);
3154 if(FAILED(hres)) {
3155 WARN("InitMatch failed\n");
3156 heap_pool_clear(mark);
3157 return hres;
3158 }
3159
3160 res = MatchRegExp(&gData, result);
3161 heap_pool_clear(mark);
3162 if(!gData.ok) {
3163 WARN("MatchRegExp failed\n");
3164 return E_FAIL;
3165 }
3166
3167 if(!res) {
3168 result->match_len = 0;
3169 return S_FALSE;
3170 }
3171
3172 result->match_len = (result->cp-str_beg) - gData.skipped;
3173 result->paren_count = regexp->parenCount;
3174 return S_OK;
3175}
3176
3178{
3179 if (re->classList) {
3180 UINT i;
3181 for (i = 0; i < re->classCount; i++) {
3182 if (re->classList[i].converted)
3183 free(re->classList[i].u.bits);
3184 re->classList[i].u.bits = NULL;
3185 }
3186 free(re->classList);
3187 }
3188 free(re);
3189}
3190
3192 DWORD str_len, WORD flags, BOOL flat)
3193{
3194 regexp_t *re;
3195 heap_pool_t *mark;
3197 size_t resize;
3198 jsbytecode *endPC;
3199 UINT i;
3200 size_t len;
3201
3202 re = NULL;
3203 mark = heap_pool_mark(pool);
3204 len = str_len;
3205
3206 state.context = cx;
3207 state.pool = pool;
3208 state.cp = str;
3209 if (!state.cp)
3210 goto out;
3211 state.cpbegin = state.cp;
3212 state.cpend = state.cp + len;
3213 state.flags = flags;
3214 state.parenCount = 0;
3215 state.classCount = 0;
3216 state.progLength = 0;
3217 state.treeDepth = 0;
3218 state.classBitmapsMem = 0;
3219 for (i = 0; i < CLASS_CACHE_SIZE; i++)
3220 state.classCache[i].start = NULL;
3221
3222 if (len != 0 && flat) {
3223 state.result = NewRENode(&state, REOP_FLAT);
3224 if (!state.result)
3225 goto out;
3226 state.result->u.flat.chr = *state.cpbegin;
3227 state.result->u.flat.length = len;
3228 state.result->kid = (void *) state.cpbegin;
3229 /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
3230 state.progLength += 1 + GetCompactIndexWidth(0)
3232 } else {
3233 if (!ParseRegExp(&state))
3234 goto out;
3235 }
3236 resize = offsetof(regexp_t, program) + state.progLength + 1;
3237 re = malloc(resize);
3238 if (!re)
3239 goto out;
3240
3241 assert(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
3242 re->classCount = state.classCount;
3243 if (re->classCount) {
3244 re->classList = malloc(re->classCount * sizeof(RECharSet));
3245 if (!re->classList) {
3246 regexp_destroy(re);
3247 re = NULL;
3248 goto out;
3249 }
3250 for (i = 0; i < re->classCount; i++)
3251 re->classList[i].converted = FALSE;
3252 } else {
3253 re->classList = NULL;
3254 }
3255 endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
3256 if (!endPC) {
3257 regexp_destroy(re);
3258 re = NULL;
3259 goto out;
3260 }
3261 *endPC++ = REOP_END;
3262 /*
3263 * Check whether size was overestimated and shrink using realloc.
3264 * This is safe since no pointers to newly parsed regexp or its parts
3265 * besides re exist here.
3266 */
3267 if ((size_t)(endPC - re->program) != state.progLength + 1) {
3268 regexp_t *tmp;
3269 assert((size_t)(endPC - re->program) < state.progLength + 1);
3270 resize = offsetof(regexp_t, program) + (endPC - re->program);
3271 tmp = realloc(re, resize);
3272 if (tmp)
3273 re = tmp;
3274 }
3275
3276 re->flags = flags;
3277 re->parenCount = state.parenCount;
3278 re->source = str;
3279 re->source_len = str_len;
3280
3281out:
3282 heap_pool_clear(mark);
3283 return re;
3284}
static int state
Definition: maze.c:121
#define WINE_DEFAULT_DEBUG_CHANNEL(t)
Definition: precomp.h:23
#define index(s, c)
Definition: various.h:29
#define WARN(fmt,...)
Definition: precomp.h:61
#define E_OUTOFMEMORY
Definition: ddrawi.h:100
#define E_FAIL
Definition: ddrawi.h:102
#define realloc
Definition: debug_ros.c:6
#define free
Definition: debug_ros.c:5
#define malloc
Definition: debug_ros.c:4
#define NULL
Definition: types.h:112
#define TRUE
Definition: types.h:120
#define FALSE
Definition: types.h:117
UINT op
Definition: effect.c:236
unsigned char uch
Definition: zlib.h:47
static void cleanup(void)
Definition: main.c:1335
HRESULT throw_error(script_ctx_t *ctx, HRESULT error, const WCHAR *str)
Definition: error.c:398
#define REG_FOLD
Definition: regexp.h:36
#define REG_MULTILINE
Definition: regexp.h:38
#define REG_STICKY
Definition: regexp.h:39
BYTE jsbytecode
Definition: regexp.h:54
unsigned char ch[4][2]
Definition: console.c:118
#define assert(_expr)
Definition: assert.h:32
int ptrdiff_t
Definition: corecrt.h:194
#define L(x)
Definition: resources.c:13
unsigned short WORD
Definition: ntddk_ex.h:93
unsigned int BOOL
Definition: ntddk_ex.h:94
unsigned long DWORD
Definition: ntddk_ex.h:95
GLint GLint GLint GLint GLint x
Definition: gl.h:1548
GLuint GLuint end
Definition: gl.h:1545
GLdouble GLdouble t
Definition: gl.h:2047
GLint GLint GLsizei width
Definition: gl.h:1546
GLdouble n
Definition: glext.h:7729
GLuint res
Definition: glext.h:9613
GLenum src
Definition: glext.h:6340
GLintptr offset
Definition: glext.h:5920
const GLubyte * c
Definition: glext.h:8905
GLuint index
Definition: glext.h:6031
GLenum GLenum GLvoid GLvoid GLvoid * span
Definition: glext.h:5664
GLuint program
Definition: glext.h:6723
GLint limit
Definition: glext.h:10326
GLbitfield flags
Definition: glext.h:7161
GLuint GLsizei GLsizei * length
Definition: glext.h:6040
GLuint64EXT * result
Definition: glext.h:11304
GLuint GLuint num
Definition: glext.h:9618
GLenum GLsizei len
Definition: glext.h:6722
GLenum cap
Definition: glext.h:9639
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble * u
Definition: glfuncs.h:240
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint GLint GLint j
Definition: glfuncs.h:250
#define cs
Definition: i386-dis.c:442
#define S_OK
Definition: intsafe.h:52
#define FAILED(hr)
Definition: intsafe.h:51
#define JSREG_FIND_PAREN_COUNT
Definition: regexp.c:893
#define JSMSG_OUT_OF_MEMORY
Definition: regexp.c:77
static size_t GetCompactIndexWidth(size_t index)
Definition: regexp.c:361
#define JS7_UNDEC(c)
Definition: regexp.c:90
#define RE_IS_LETTER(c)
Definition: regexp.c:82
static REBackTrackData * PushBackTrackState(REGlobalData *gData, REOp op, jsbytecode *target, match_state_t *x, const WCHAR *cp, size_t parenIndex, size_t parenCount)
Definition: regexp.c:1919
static match_state_t * FlatNIMatcher(REGlobalData *gData, match_state_t *x, const WCHAR *matchChars, size_t length)
Definition: regexp.c:1980
static jsbytecode * ReadCompactIndex(jsbytecode *pc, size_t *result)
Definition: regexp.c:383
static BOOL ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
Definition: regexp.c:2084
#define JS_COUNT_OPERATION(a, b)
Definition: regexp.c:48
static BOOL ParseRegExp(CompilerState *)
Definition: regexp.c:1648
#define REOP_IS_SIMPLE(op)
Definition: regexp.c:145
#define ReportRegExpError(a, b, c)
Definition: regexp.c:44
#define JSMSG_MIN_TOO_BIG
Definition: regexp.c:74
#define JUMP_OFFSET_LO(off)
Definition: regexp.c:451
struct REProgState REProgState
#define RE_IS_LINE_TERM(c)
Definition: regexp.c:84
static jsbytecode * EmitREBytecode(CompilerState *state, regexp_t *re, size_t treeDepth, jsbytecode *pc, RENode *t)
Definition: regexp.c:473
BYTE JSPackedBool
Definition: regexp.c:51
static HRESULT InitMatch(regexp_t *re, void *cx, heap_pool_t *pool, REGlobalData *gData)
Definition: regexp.c:3097
static UINT GetDecimalValue(WCHAR c, UINT max, UINT(*findMax)(CompilerState *state), CompilerState *state)
Definition: regexp.c:943
static UINT FindParenCount(CompilerState *state)
Definition: regexp.c:904
#define OVERFLOW_VALUE
Definition: regexp.c:901
#define OFFSET_LEN
Definition: regexp.c:335
static INT ParseMinMaxQuantifier(CompilerState *state, BOOL ignoreValues)
Definition: regexp.c:1161
#define GET_ARG(pc)
Definition: regexp.c:331
struct REGlobalData REGlobalData
#define GET_OFFSET(pc)
Definition: regexp.c:337
#define JSMSG_MAX_TOO_BIG
Definition: regexp.c:75
HRESULT regexp_execute(regexp_t *regexp, void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_len, match_state_t *result)
Definition: regexp.c:3136
regexp_t * regexp_new(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_len, WORD flags, BOOL flat)
Definition: regexp.c:3191
#define INITIAL_BACKTRACK
Definition: regexp.c:230
static BOOL CalculateBitmapSize(CompilerState *state, RENode *target, const WCHAR *src, const WCHAR *end)
Definition: regexp.c:967
struct RECharSet RECharSet
static BOOL isASCIIHexDigit(WCHAR c, UINT *digit)
Definition: regexp.c:426
#define JUMP_OFFSET_HI(off)
Definition: regexp.c:450
#define TREE_DEPTH_MAX
Definition: regexp.c:345
static void AddCharacterToCharSet(RECharSet *cs, WCHAR c)
Definition: regexp.c:2050
static match_state_t * MatchRegExp(REGlobalData *gData, match_state_t *x)
Definition: regexp.c:3070
#define JSMSG_OUT_OF_ORDER
Definition: regexp.c:76
struct CompilerState CompilerState
#define OFFSET_MAX
Definition: regexp.c:336
#define PREPARE_REPEAT()
#define JS_ISWORD(c)
Definition: regexp.c:87
#define CLASS_BITMAPS_MEM_LIMIT
Definition: regexp.c:351
static match_state_t * BackrefMatcher(REGlobalData *gData, match_state_t *x, size_t parenIndex)
Definition: regexp.c:2019
#define SET_ARG(pc, arg)
Definition: regexp.c:332
static BOOL ParseTerm(CompilerState *state)
Definition: regexp.c:1339
#define JS_ReportErrorNumber(a, b, c, d)
Definition: regexp.c:46
void regexp_destroy(regexp_t *re)
Definition: regexp.c:3177
#define INITIAL_STACK_SIZE
Definition: regexp.c:1645
static match_state_t * ExecuteREBytecode(REGlobalData *gData, match_state_t *x)
Definition: regexp.c:2533
#define PUSH_STATE_STACK(data)
Definition: regexp.c:2309
struct EmitStateStackEntry EmitStateStackEntry
REOp
Definition: regexp.c:92
@ REOP_LPAREN
Definition: regexp.c:121
@ REOP_SPACE
Definition: regexp.c:103
@ REOP_WBDRY
Definition: regexp.c:96
@ REOP_UCFLAT
Definition: regexp.c:112
@ REOP_ALTPREREQ
Definition: regexp.c:137
@ REOP_REPEAT
Definition: regexp.c:135
@ REOP_QUANT
Definition: regexp.c:117
@ REOP_UCFLAT1i
Definition: regexp.c:111
@ REOP_ASSERTTEST
Definition: regexp.c:128
@ REOP_EMPTY
Definition: regexp.c:93
@ REOP_OPT
Definition: regexp.c:120
@ REOP_DOTSTAR
Definition: regexp.c:124
@ REOP_FLAT1i
Definition: regexp.c:109
@ REOP_MINIMALPLUS
Definition: regexp.c:131
@ REOP_JUMP
Definition: regexp.c:123
@ REOP_ALT
Definition: regexp.c:116
@ REOP_NONSPACE
Definition: regexp.c:104
@ REOP_DOT
Definition: regexp.c:98
@ REOP_BACKREF
Definition: regexp.c:105
@ REOP_BOL
Definition: regexp.c:94
@ REOP_MINIMALREPEAT
Definition: regexp.c:136
@ REOP_ENDCHILD
Definition: regexp.c:134
@ REOP_LIMIT
Definition: regexp.c:142
@ REOP_NCLASS
Definition: regexp.c:115
@ REOP_ASSERT
Definition: regexp.c:126
@ REOP_ALNUM
Definition: regexp.c:101
@ REOP_WNONBDRY
Definition: regexp.c:97
@ REOP_MINIMALOPT
Definition: regexp.c:132
@ REOP_END
Definition: regexp.c:141
@ REOP_UCFLATi
Definition: regexp.c:113
@ REOP_CLASS
Definition: regexp.c:114
@ REOP_MINIMALQUANT
Definition: regexp.c:133
@ REOP_RPAREN
Definition: regexp.c:122
@ REOP_DIGIT
Definition: regexp.c:99
@ REOP_NONDIGIT
Definition: regexp.c:100
@ REOP_ENDALT
Definition: regexp.c:139
@ REOP_ALTPREREQ2
Definition: regexp.c:138
@ REOP_FLAT1
Definition: regexp.c:107
@ REOP_ASSERTNOTTEST
Definition: regexp.c:129
@ REOP_EOL
Definition: regexp.c:95
@ REOP_ASSERT_NOT
Definition: regexp.c:127
@ REOP_FLATi
Definition: regexp.c:108
@ REOP_PLUS
Definition: regexp.c:119
@ REOP_CONCAT
Definition: regexp.c:140
@ REOP_FLAT
Definition: regexp.c:106
@ REOP_MINIMALSTAR
Definition: regexp.c:130
@ REOP_STAR
Definition: regexp.c:118
@ REOP_NONALNUM
Definition: regexp.c:102
@ REOP_LPARENNON
Definition: regexp.c:125
@ REOP_UCFLAT1
Definition: regexp.c:110
#define CLASS_CACHE_SIZE
Definition: regexp.c:290
static match_state_t * SimpleMatch(REGlobalData *gData, match_state_t *x, REOp op, jsbytecode **startpc, BOOL updatecp)
Definition: regexp.c:2325
#define JSREG_FIND_PAREN_ERROR
Definition: regexp.c:894
static BOOL SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
Definition: regexp.c:454
struct REBackTrackData REBackTrackData
#define ReportRegExpErrorHelper(a, b, c, d)
Definition: regexp.c:45
static void AddCharacterRangeToCharSet(RECharSet *cs, UINT c1, UINT c2)
Definition: regexp.c:2060
static BOOL ParseQuantifier(CompilerState *state)
Definition: regexp.c:1215
#define ARG_LEN
Definition: regexp.c:330
static BOOL ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack, INT operandSP)
Definition: regexp.c:803
#define INITIAL_STATESTACK
Definition: regexp.c:229
static BOOL ReallocStateStack(REGlobalData *gData)
Definition: regexp.c:2294
#define JS7_ISDEC(c)
Definition: regexp.c:89
static RENode * NewRENode(CompilerState *state, REOp op)
Definition: regexp.c:407
static const char * reop_names[]
Definition: regexp.c:147
static jsbytecode * WriteCompactIndex(jsbytecode *pc, size_t index)
Definition: regexp.c:370
void heap_pool_clear(heap_pool_t *)
Definition: jsutils.c:141
heap_pool_t * heap_pool_mark(heap_pool_t *)
Definition: jsutils.c:177
void * heap_pool_grow(heap_pool_t *, void *, DWORD, DWORD)
Definition: jsutils.c:125
#define SCRIPTLANGUAGEVERSION_ES5
Definition: jscript.h:53
void * heap_pool_alloc(heap_pool_t *, DWORD) __WINE_ALLOC_SIZE(2)
Definition: jsutils.c:72
#define JS_E_REGEXP_SYNTAX
Definition: jscript.h:564
#define JS_E_UNEXPECTED_QUANTIFIER
Definition: jscript.h:565
#define c
Definition: ke_i.h:80
#define debugstr_wn
Definition: kernel32.h:33
if(dx< 0)
Definition: linetemp.h:194
POINT cp
Definition: magnifier.c:59
#define memcpy(s1, s2, n)
Definition: mkisofs.h:878
HRESULT hres
Definition: protocol.c:465
CONDITION_VARIABLE cv
Definition: sync.c:1887
#define shift
Definition: input.c:3280
#define min(a, b)
Definition: monoChain.cc:55
int k
Definition: mpi.c:3369
unsigned int UINT
Definition: ndis.h:50
short WCHAR
Definition: pedump.c:58
#define cap
Definition: glfuncs.h:226
_Out_opt_ int * cx
Definition: commctrl.h:585
static unsigned __int64 next
Definition: rand_nt.c:6
#define err(...)
const WCHAR * str
static calc_node_t temp
Definition: rpn_ieee.c:38
#define offsetof(TYPE, MEMBER)
#define iswspace(_c)
Definition: ctype.h:669
#define memset(x, y, z)
Definition: compat.h:39
#define towlower(c)
Definition: wctype.h:97
#define towupper(c)
Definition: wctype.h:99
#define TRACE(s)
Definition: solgame.cpp:4
RENode * result
Definition: regexp.c:301
size_t classBitmapsMem
Definition: regexp.c:302
script_ctx_t * context
Definition: regexp.c:293
struct CompilerState::@456 classCache[CLASS_CACHE_SIZE]
const WCHAR * cpend
Definition: regexp.c:295
heap_pool_t * pool
Definition: regexp.c:310
const WCHAR * start
Definition: regexp.c:304
size_t index
Definition: regexp.c:306
const WCHAR * cp
Definition: regexp.c:296
size_t treeDepth
Definition: regexp.c:299
size_t parenCount
Definition: regexp.c:297
size_t length
Definition: regexp.c:305
size_t classCount
Definition: regexp.c:298
WORD flags
Definition: regexp.c:308
size_t progLength
Definition: regexp.c:300
const WCHAR * cpbegin
Definition: regexp.c:294
RENode * continueNode
Definition: regexp.c:319
jsbytecode * endTermFixup
Definition: regexp.c:318
jsbytecode continueOp
Definition: regexp.c:320
jsbytecode * nextAltFixup
Definition: regexp.c:316
jsbytecode * altHead
Definition: regexp.c:315
jsbytecode * nextTermFixup
Definition: regexp.c:317
JSPackedBool jumpToJumpFlag
Definition: regexp.c:321
jsbytecode * backtrack_pc
Definition: regexp.c:219
size_t parenCount
Definition: regexp.c:223
size_t sz
Definition: regexp.c:218
const WCHAR * cp
Definition: regexp.c:221
size_t parenIndex
Definition: regexp.c:222
jsbytecode backtrack_op
Definition: regexp.c:220
size_t saveStateStackTop
Definition: regexp.c:224
BYTE * bits
Definition: regexp.c:66
WORD length
Definition: regexp.c:64
JSPackedBool converted
Definition: regexp.c:62
struct RECharSet::@446::@447 src
size_t startIndex
Definition: regexp.c:68
JSPackedBool sense
Definition: regexp.c:63
size_t length
Definition: regexp.c:69
union RECharSet::@446 u
size_t start
Definition: regexp.c:236
size_t backTrackLimit
Definition: regexp.c:250
const WCHAR * cpbegin
Definition: regexp.c:238
const WCHAR * cpend
Definition: regexp.c:239
size_t backTrackStackSize
Definition: regexp.c:247
REProgState * stateStack
Definition: regexp.c:241
REBackTrackData * backTrackSP
Definition: regexp.c:246
size_t cursz
Definition: regexp.c:248
REBackTrackData * backTrackStack
Definition: regexp.c:245
regexp_t * regexp
Definition: regexp.c:234
size_t backTrackCount
Definition: regexp.c:249
BOOL ok
Definition: regexp.c:235
size_t stateStackTop
Definition: regexp.c:242
heap_pool_t * pool
Definition: regexp.c:252
ptrdiff_t skipped
Definition: regexp.c:237
size_t stateStackLimit
Definition: regexp.c:243
void * cx
Definition: regexp.c:233
Definition: regexp.c:258
UINT max
Definition: regexp.c:268
REOp op
Definition: regexp.c:259
RENode * kid2
Definition: regexp.c:283
size_t startIndex
Definition: regexp.c:272
JSPackedBool sense
Definition: regexp.c:276
WCHAR ch1
Definition: regexp.c:284
RENode * next
Definition: regexp.c:260
WCHAR chr
Definition: regexp.c:279
size_t index
Definition: regexp.c:274
struct RENode::@451::@453 ucclass
size_t kidlen
Definition: regexp.c:273
struct RENode::@451::@454 flat
UINT min
Definition: regexp.c:267
void * kid
Definition: regexp.c:261
size_t parenIndex
Definition: regexp.c:265
void * kid2
Definition: regexp.c:263
JSPackedBool greedy
Definition: regexp.c:269
struct RENode::@451::@455 altprereq
size_t length
Definition: regexp.c:280
WCHAR ch2
Definition: regexp.c:285
union RENode::@451 u
WORD bmsize
Definition: regexp.c:275
struct RENode::@451::@452 range
INT num
Definition: regexp.c:264
const WCHAR * errPos
Definition: regexp.c:446
REOp op
Definition: regexp.c:445
size_t parenIndex
Definition: regexp.c:447
size_t top
Definition: regexp.c:211
jsbytecode * continue_pc
Definition: regexp.c:201
UINT max
Definition: regexp.c:208
struct REProgState::@448::@450 assertion
UINT min
Definition: regexp.c:207
size_t sz
Definition: regexp.c:212
size_t parenSoFar
Definition: regexp.c:204
jsbytecode continue_op
Definition: regexp.c:202
union REProgState::@448 u
ptrdiff_t index
Definition: regexp.c:203
struct REProgState::@448::@449 quantifier
jsbytecode program[1]
Definition: regexp.h:63
struct RECharSet * classList
Definition: regexp.h:60
const WCHAR * source
Definition: regexp.h:61
DWORD source_len
Definition: regexp.h:62
WORD flags
Definition: regexp.h:57
size_t parenCount
Definition: regexp.h:58
size_t classCount
Definition: regexp.h:59
Definition: tools.h:99
#define max(a, b)
Definition: svc.c:63
#define str_len
Definition: treelist.c:89
int32_t INT
Definition: typedefs.h:58
uint32_t ULONG_PTR
Definition: typedefs.h:65
Definition: pdh_main.c:96
BYTE JSPackedBool
Definition: regexp.c:53
REOp
Definition: regexp.c:94
#define CLASS_CACHE_SIZE
Definition: regexp.c:292
wchar_t tm const _CrtWcstime_Writes_and_advances_ptr_ count wchar_t ** out
Definition: wcsftime.cpp:383
#define S_FALSE
Definition: winerror.h:3451
unsigned char BYTE
Definition: xxhash.c:193