ReactOS Fundraising Campaign 2012
 
€ 4,410 / € 30,000

Information | Donate

Home | Info | Community | Development | myReactOS | Contact Us

  1. Home
  2. Community
  3. Development
  4. myReactOS
  5. Fundraiser 2012

  1. Main Page
  2. Alphabetical List
  3. Data Structures
  4. Directories
  5. File List
  6. Data Fields
  7. Globals
  8. Related Pages

ReactOS Development > Doxygen

regexp.c
Go to the documentation of this file.
00001 /*
00002  * Copyright 2008 Jacek Caban for CodeWeavers
00003  *
00004  * This library is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU Lesser General Public
00006  * License as published by the Free Software Foundation; either
00007  * version 2.1 of the License, or (at your option) any later version.
00008  *
00009  * This library is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012  * Lesser General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU Lesser General Public
00015  * License along with this library; if not, write to the Free Software
00016  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
00017  */
00018 
00019 /*
00020  * Code in this file is based on files:
00021  * js/src/jsregexp.h
00022  * js/src/jsregexp.c
00023  * from Mozilla project, released under LGPL 2.1 or later.
00024  *
00025  * The Original Code is Mozilla Communicator client code, released
00026  * March 31, 1998.
00027  *
00028  * The Initial Developer of the Original Code is
00029  * Netscape Communications Corporation.
00030  * Portions created by the Initial Developer are Copyright (C) 1998
00031  * the Initial Developer. All Rights Reserved.
00032  */
00033 
00034 #include <assert.h>
00035 #include <math.h>
00036 
00037 #include "jscript.h"
00038 
00039 #include "wine/debug.h"
00040 
00041 WINE_DEFAULT_DEBUG_CHANNEL(jscript);
00042 
00043 #define JSREG_FOLD      0x01    /* fold uppercase to lowercase */
00044 #define JSREG_GLOB      0x02    /* global exec, creates array of matches */
00045 #define JSREG_MULTILINE 0x04    /* treat ^ and $ as begin and end of line */
00046 #define JSREG_STICKY    0x08    /* only match starting at lastIndex */
00047 
00048 typedef BYTE JSPackedBool;
00049 typedef BYTE jsbytecode;
00050 
00051 /*
00052  * This struct holds a bitmap representation of a class from a regexp.
00053  * There's a list of these referenced by the classList field in the JSRegExp
00054  * struct below. The initial state has startIndex set to the offset in the
00055  * original regexp source of the beginning of the class contents. The first
00056  * use of the class converts the source representation into a bitmap.
00057  *
00058  */
00059 typedef struct RECharSet {
00060     JSPackedBool    converted;
00061     JSPackedBool    sense;
00062     WORD            length;
00063     union {
00064         BYTE        *bits;
00065         struct {
00066             size_t  startIndex;
00067             size_t  length;
00068         } src;
00069     } u;
00070 } RECharSet;
00071 
00072 typedef struct {
00073     WORD         flags;         /* flags, see jsapi.h's JSREG_* defines */
00074     size_t       parenCount;    /* number of parenthesized submatches */
00075     size_t       classCount;    /* count [...] bitmaps */
00076     RECharSet    *classList;    /* list of [...] bitmaps */
00077     BSTR         source;        /* locked source string, sans // */
00078     jsbytecode   program[1];    /* regular expression bytecode */
00079 } JSRegExp;
00080 
00081 typedef struct {
00082     DispatchEx dispex;
00083 
00084     JSRegExp *jsregexp;
00085     BSTR str;
00086     INT last_index;
00087     VARIANT last_index_var;
00088 } RegExpInstance;
00089 
00090 static const WCHAR sourceW[] = {'s','o','u','r','c','e',0};
00091 static const WCHAR globalW[] = {'g','l','o','b','a','l',0};
00092 static const WCHAR ignoreCaseW[] = {'i','g','n','o','r','e','C','a','s','e',0};
00093 static const WCHAR multilineW[] = {'m','u','l','t','i','l','i','n','e',0};
00094 static const WCHAR lastIndexW[] = {'l','a','s','t','I','n','d','e','x',0};
00095 static const WCHAR toStringW[] = {'t','o','S','t','r','i','n','g',0};
00096 static const WCHAR execW[] = {'e','x','e','c',0};
00097 static const WCHAR testW[] = {'t','e','s','t',0};
00098 
00099 static const WCHAR leftContextW[] =
00100     {'l','e','f','t','C','o','n','t','e','x','t',0};
00101 static const WCHAR rightContextW[] =
00102     {'r','i','g','h','t','C','o','n','t','e','x','t',0};
00103 
00104 static const WCHAR undefinedW[] = {'u','n','d','e','f','i','n','e','d',0};
00105 static const WCHAR emptyW[] = {0};
00106 
00107 /* FIXME: Better error handling */
00108 #define ReportRegExpError(a,b,c)
00109 #define ReportRegExpErrorHelper(a,b,c,d)
00110 #define JS_ReportErrorNumber(a,b,c,d)
00111 #define JS_ReportErrorFlagsAndNumber(a,b,c,d,e,f)
00112 #define js_ReportOutOfScriptQuota(a)
00113 #define JS_ReportOutOfMemory(a)
00114 #define JS_COUNT_OPERATION(a,b)
00115 
00116 #define JSMSG_MIN_TOO_BIG 47
00117 #define JSMSG_MAX_TOO_BIG 48
00118 #define JSMSG_OUT_OF_ORDER 49
00119 #define JSMSG_OUT_OF_MEMORY 137
00120 
00121 #define LINE_SEPARATOR  0x2028
00122 #define PARA_SEPARATOR  0x2029
00123 
00124 #define RE_IS_LETTER(c)     (((c >= 'A') && (c <= 'Z')) ||                    \
00125                              ((c >= 'a') && (c <= 'z')) )
00126 #define RE_IS_LINE_TERM(c)  ((c == '\n') || (c == '\r') ||                    \
00127                              (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
00128 
00129 #define JS_ISWORD(c)    ((c) < 128 && (isalnum(c) || (c) == '_'))
00130 
00131 #define JS7_ISDEC(c)    ((((unsigned)(c)) - '0') <= 9)
00132 #define JS7_UNDEC(c)    ((c) - '0')
00133 
00134 typedef enum REOp {
00135     REOP_EMPTY,
00136     REOP_BOL,
00137     REOP_EOL,
00138     REOP_WBDRY,
00139     REOP_WNONBDRY,
00140     REOP_DOT,
00141     REOP_DIGIT,
00142     REOP_NONDIGIT,
00143     REOP_ALNUM,
00144     REOP_NONALNUM,
00145     REOP_SPACE,
00146     REOP_NONSPACE,
00147     REOP_BACKREF,
00148     REOP_FLAT,
00149     REOP_FLAT1,
00150     REOP_FLATi,
00151     REOP_FLAT1i,
00152     REOP_UCFLAT1,
00153     REOP_UCFLAT1i,
00154     REOP_UCFLAT,
00155     REOP_UCFLATi,
00156     REOP_CLASS,
00157     REOP_NCLASS,
00158     REOP_ALT,
00159     REOP_QUANT,
00160     REOP_STAR,
00161     REOP_PLUS,
00162     REOP_OPT,
00163     REOP_LPAREN,
00164     REOP_RPAREN,
00165     REOP_JUMP,
00166     REOP_DOTSTAR,
00167     REOP_LPARENNON,
00168     REOP_ASSERT,
00169     REOP_ASSERT_NOT,
00170     REOP_ASSERTTEST,
00171     REOP_ASSERTNOTTEST,
00172     REOP_MINIMALSTAR,
00173     REOP_MINIMALPLUS,
00174     REOP_MINIMALOPT,
00175     REOP_MINIMALQUANT,
00176     REOP_ENDCHILD,
00177     REOP_REPEAT,
00178     REOP_MINIMALREPEAT,
00179     REOP_ALTPREREQ,
00180     REOP_ALTPREREQ2,
00181     REOP_ENDALT,
00182     REOP_CONCAT,
00183     REOP_END,
00184     REOP_LIMIT /* META: no operator >= to this */
00185 } REOp;
00186 
00187 #define REOP_IS_SIMPLE(op)  ((op) <= REOP_NCLASS)
00188 
00189 static const char *reop_names[] = {
00190     "empty",
00191     "bol",
00192     "eol",
00193     "wbdry",
00194     "wnonbdry",
00195     "dot",
00196     "digit",
00197     "nondigit",
00198     "alnum",
00199     "nonalnum",
00200     "space",
00201     "nonspace",
00202     "backref",
00203     "flat",
00204     "flat1",
00205     "flati",
00206     "flat1i",
00207     "ucflat1",
00208     "ucflat1i",
00209     "ucflat",
00210     "ucflati",
00211     "class",
00212     "nclass",
00213     "alt",
00214     "quant",
00215     "star",
00216     "plus",
00217     "opt",
00218     "lparen",
00219     "rparen",
00220     "jump",
00221     "dotstar",
00222     "lparennon",
00223     "assert",
00224     "assert_not",
00225     "asserttest",
00226     "assertnottest",
00227     "minimalstar",
00228     "minimalplus",
00229     "minimalopt",
00230     "minimalquant",
00231     "endchild",
00232     "repeat",
00233     "minimalrepeat",
00234     "altprereq",
00235     "alrprereq2",
00236     "endalt",
00237     "concat",
00238     "end",
00239     NULL
00240 };
00241 
00242 typedef struct RECapture {
00243     ptrdiff_t index;           /* start of contents, -1 for empty  */
00244     size_t length;             /* length of capture */
00245 } RECapture;
00246 
00247 typedef struct REMatchState {
00248     const WCHAR *cp;
00249     RECapture parens[1];      /* first of 're->parenCount' captures,
00250                                  allocated at end of this struct */
00251 } REMatchState;
00252 
00253 typedef struct REProgState {
00254     jsbytecode *continue_pc;        /* current continuation data */
00255     jsbytecode continue_op;
00256     ptrdiff_t index;                /* progress in text */
00257     size_t parenSoFar;              /* highest indexed paren started */
00258     union {
00259         struct {
00260             UINT min;               /* current quantifier limits */
00261             UINT max;
00262         } quantifier;
00263         struct {
00264             size_t top;             /* backtrack stack state */
00265             size_t sz;
00266         } assertion;
00267     } u;
00268 } REProgState;
00269 
00270 typedef struct REBackTrackData {
00271     size_t sz;                      /* size of previous stack entry */
00272     jsbytecode *backtrack_pc;       /* where to backtrack to */
00273     jsbytecode backtrack_op;
00274     const WCHAR *cp;                /* index in text of match at backtrack */
00275     size_t parenIndex;              /* start index of saved paren contents */
00276     size_t parenCount;              /* # of saved paren contents */
00277     size_t saveStateStackTop;       /* number of parent states */
00278     /* saved parent states follow */
00279     /* saved paren contents follow */
00280 } REBackTrackData;
00281 
00282 #define INITIAL_STATESTACK  100
00283 #define INITIAL_BACKTRACK   8000
00284 
00285 typedef struct REGlobalData {
00286     script_ctx_t *cx;
00287     JSRegExp *regexp;               /* the RE in execution */
00288     BOOL ok;                        /* runtime error (out_of_memory only?) */
00289     size_t start;                   /* offset to start at */
00290     ptrdiff_t skipped;              /* chars skipped anchoring this r.e. */
00291     const WCHAR    *cpbegin;        /* text base address */
00292     const WCHAR    *cpend;          /* text limit address */
00293 
00294     REProgState *stateStack;        /* stack of state of current parents */
00295     size_t stateStackTop;
00296     size_t stateStackLimit;
00297 
00298     REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
00299     REBackTrackData *backTrackSP;
00300     size_t backTrackStackSize;
00301     size_t cursz;                   /* size of current stack entry */
00302     size_t backTrackCount;          /* how many times we've backtracked */
00303     size_t backTrackLimit;          /* upper limit on backtrack states */
00304 
00305     jsheap_t *pool;                 /* It's faster to use one malloc'd pool
00306                                        than to malloc/free the three items
00307                                        that are allocated from this pool */
00308 } REGlobalData;
00309 
00310 typedef struct RENode RENode;
00311 struct RENode {
00312     REOp            op;         /* r.e. op bytecode */
00313     RENode          *next;      /* next in concatenation order */
00314     void            *kid;       /* first operand */
00315     union {
00316         void        *kid2;      /* second operand */
00317         INT         num;        /* could be a number */
00318         size_t      parenIndex; /* or a parenthesis index */
00319         struct {                /* or a quantifier range */
00320             UINT  min;
00321             UINT  max;
00322             JSPackedBool greedy;
00323         } range;
00324         struct {                /* or a character class */
00325             size_t  startIndex;
00326             size_t  kidlen;     /* length of string at kid, in jschars */
00327             size_t  index;      /* index into class list */
00328             WORD  bmsize;       /* bitmap size, based on max char code */
00329             JSPackedBool sense;
00330         } ucclass;
00331         struct {                /* or a literal sequence */
00332             WCHAR   chr;        /* of one character */
00333             size_t  length;     /* or many (via the kid) */
00334         } flat;
00335         struct {
00336             RENode  *kid2;      /* second operand from ALT */
00337             WCHAR   ch1;        /* match char for ALTPREREQ */
00338             WCHAR   ch2;        /* ditto, or class index for ALTPREREQ2 */
00339         } altprereq;
00340     } u;
00341 };
00342 
00343 #define CLASS_CACHE_SIZE    4
00344 
00345 typedef struct CompilerState {
00346     script_ctx_t    *context;
00347     const WCHAR     *cpbegin;
00348     const WCHAR     *cpend;
00349     const WCHAR     *cp;
00350     size_t          parenCount;
00351     size_t          classCount;   /* number of [] encountered */
00352     size_t          treeDepth;    /* maximum depth of parse tree */
00353     size_t          progLength;   /* estimated bytecode length */
00354     RENode          *result;
00355     size_t          classBitmapsMem; /* memory to hold all class bitmaps */
00356     struct {
00357         const WCHAR *start;         /* small cache of class strings */
00358         size_t length;              /* since they're often the same */
00359         size_t index;
00360     } classCache[CLASS_CACHE_SIZE];
00361     WORD          flags;
00362 } CompilerState;
00363 
00364 typedef struct EmitStateStackEntry {
00365     jsbytecode      *altHead;       /* start of REOP_ALT* opcode */
00366     jsbytecode      *nextAltFixup;  /* fixup pointer to next-alt offset */
00367     jsbytecode      *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
00368     jsbytecode      *endTermFixup;  /* fixup ptr. to REOPT_ALTPREREQ* offset */
00369     RENode          *continueNode;  /* original REOP_ALT* node being stacked */
00370     jsbytecode      continueOp;     /* REOP_JUMP or REOP_ENDALT continuation */
00371     JSPackedBool    jumpToJumpFlag; /* true if we've patched jump-to-jump to
00372                                        avoid 16-bit unsigned offset overflow */
00373 } EmitStateStackEntry;
00374 
00375 /*
00376  * Immediate operand sizes and getter/setters.  Unlike the ones in jsopcode.h,
00377  * the getters and setters take the pc of the offset, not of the opcode before
00378  * the offset.
00379  */
00380 #define ARG_LEN             2
00381 #define GET_ARG(pc)         ((WORD)(((pc)[0] << 8) | (pc)[1]))
00382 #define SET_ARG(pc, arg)    ((pc)[0] = (jsbytecode) ((arg) >> 8),       \
00383                              (pc)[1] = (jsbytecode) (arg))
00384 
00385 #define OFFSET_LEN          ARG_LEN
00386 #define OFFSET_MAX          ((1 << (ARG_LEN * 8)) - 1)
00387 #define GET_OFFSET(pc)      GET_ARG(pc)
00388 
00389 static BOOL ParseRegExp(CompilerState*);
00390 
00391 /*
00392  * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
00393  * For sanity, we limit it to 2^24 bytes.
00394  */
00395 #define TREE_DEPTH_MAX  ((1 << 24) / sizeof(EmitStateStackEntry))
00396 
00397 /*
00398  * The maximum memory that can be allocated for class bitmaps.
00399  * For sanity, we limit it to 2^24 bytes.
00400  */
00401 #define CLASS_BITMAPS_MEM_LIMIT (1 << 24)
00402 
00403 /*
00404  * Functions to get size and write/read bytecode that represent small indexes
00405  * compactly.
00406  * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
00407  * indicates that the following byte brings more bits to the index. Otherwise
00408  * this is the last byte in the index bytecode representing highest index bits.
00409  */
00410 static size_t
00411 GetCompactIndexWidth(size_t index)
00412 {
00413     size_t width;
00414 
00415     for (width = 1; (index >>= 7) != 0; ++width) { }
00416     return width;
00417 }
00418 
00419 static inline jsbytecode *
00420 WriteCompactIndex(jsbytecode *pc, size_t index)
00421 {
00422     size_t next;
00423 
00424     while ((next = index >> 7) != 0) {
00425         *pc++ = (jsbytecode)(index | 0x80);
00426         index = next;
00427     }
00428     *pc++ = (jsbytecode)index;
00429     return pc;
00430 }
00431 
00432 static inline jsbytecode *
00433 ReadCompactIndex(jsbytecode *pc, size_t *result)
00434 {
00435     size_t nextByte;
00436 
00437     nextByte = *pc++;
00438     if ((nextByte & 0x80) == 0) {
00439         /*
00440          * Short-circuit the most common case when compact index <= 127.
00441          */
00442         *result = nextByte;
00443     } else {
00444         size_t shift = 7;
00445         *result = 0x7F & nextByte;
00446         do {
00447             nextByte = *pc++;
00448             *result |= (nextByte & 0x7F) << shift;
00449             shift += 7;
00450         } while ((nextByte & 0x80) != 0);
00451     }
00452     return pc;
00453 }
00454 
00455 /* Construct and initialize an RENode, returning NULL for out-of-memory */
00456 static RENode *
00457 NewRENode(CompilerState *state, REOp op)
00458 {
00459     RENode *ren;
00460 
00461     ren = jsheap_alloc(&state->context->tmp_heap, sizeof(*ren));
00462     if (!ren) {
00463         /* js_ReportOutOfScriptQuota(cx); */
00464         return NULL;
00465     }
00466     ren->op = op;
00467     ren->next = NULL;
00468     ren->kid = NULL;
00469     return ren;
00470 }
00471 
00472 /*
00473  * Validates and converts hex ascii value.
00474  */
00475 static BOOL
00476 isASCIIHexDigit(WCHAR c, UINT *digit)
00477 {
00478     UINT cv = c;
00479 
00480     if (cv < '0')
00481         return FALSE;
00482     if (cv <= '9') {
00483         *digit = cv - '0';
00484         return TRUE;
00485     }
00486     cv |= 0x20;
00487     if (cv >= 'a' && cv <= 'f') {
00488         *digit = cv - 'a' + 10;
00489         return TRUE;
00490     }
00491     return FALSE;
00492 }
00493 
00494 typedef struct {
00495     REOp op;
00496     const WCHAR *errPos;
00497     size_t parenIndex;
00498 } REOpData;
00499 
00500 #define JUMP_OFFSET_HI(off)     ((jsbytecode)((off) >> 8))
00501 #define JUMP_OFFSET_LO(off)     ((jsbytecode)(off))
00502 
00503 static BOOL
00504 SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
00505 {
00506     ptrdiff_t offset = target - jump;
00507 
00508     /* Check that target really points forward. */
00509     assert(offset >= 2);
00510     if ((size_t)offset > OFFSET_MAX)
00511         return FALSE;
00512 
00513     jump[0] = JUMP_OFFSET_HI(offset);
00514     jump[1] = JUMP_OFFSET_LO(offset);
00515     return TRUE;
00516 }
00517 
00518 /*
00519  * Generate bytecode for the tree rooted at t using an explicit stack instead
00520  * of recursion.
00521  */
00522 static jsbytecode *
00523 EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth,
00524                jsbytecode *pc, RENode *t)
00525 {
00526     EmitStateStackEntry *emitStateSP, *emitStateStack;
00527     RECharSet *charSet;
00528     REOp op;
00529 
00530     if (treeDepth == 0) {
00531         emitStateStack = NULL;
00532     } else {
00533         emitStateStack = heap_alloc(sizeof(EmitStateStackEntry) * treeDepth);
00534         if (!emitStateStack)
00535             return NULL;
00536     }
00537     emitStateSP = emitStateStack;
00538     op = t->op;
00539     assert(op < REOP_LIMIT);
00540 
00541     for (;;) {
00542         *pc++ = op;
00543         switch (op) {
00544           case REOP_EMPTY:
00545             --pc;
00546             break;
00547 
00548           case REOP_ALTPREREQ2:
00549           case REOP_ALTPREREQ:
00550             assert(emitStateSP);
00551             emitStateSP->altHead = pc - 1;
00552             emitStateSP->endTermFixup = pc;
00553             pc += OFFSET_LEN;
00554             SET_ARG(pc, t->u.altprereq.ch1);
00555             pc += ARG_LEN;
00556             SET_ARG(pc, t->u.altprereq.ch2);
00557             pc += ARG_LEN;
00558 
00559             emitStateSP->nextAltFixup = pc;    /* offset to next alternate */
00560             pc += OFFSET_LEN;
00561 
00562             emitStateSP->continueNode = t;
00563             emitStateSP->continueOp = REOP_JUMP;
00564             emitStateSP->jumpToJumpFlag = FALSE;
00565             ++emitStateSP;
00566             assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
00567             t = t->kid;
00568             op = t->op;
00569             assert(op < REOP_LIMIT);
00570             continue;
00571 
00572           case REOP_JUMP:
00573             emitStateSP->nextTermFixup = pc;    /* offset to following term */
00574             pc += OFFSET_LEN;
00575             if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
00576                 goto jump_too_big;
00577             emitStateSP->continueOp = REOP_ENDALT;
00578             ++emitStateSP;
00579             assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
00580             t = t->u.kid2;
00581             op = t->op;
00582             assert(op < REOP_LIMIT);
00583             continue;
00584 
00585           case REOP_ENDALT:
00586             /*
00587              * If we already patched emitStateSP->nextTermFixup to jump to
00588              * a nearer jump, to avoid 16-bit immediate offset overflow, we
00589              * are done here.
00590              */
00591             if (emitStateSP->jumpToJumpFlag)
00592                 break;
00593 
00594             /*
00595              * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
00596              * REOP_ENDALT is executed only on successful match of the last
00597              * alternate in a group.
00598              */
00599             if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
00600                 goto jump_too_big;
00601             if (t->op != REOP_ALT) {
00602                 if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
00603                     goto jump_too_big;
00604             }
00605 
00606             /*
00607              * If the program is bigger than the REOP_JUMP offset range, then
00608              * we must check for alternates before this one that are part of
00609              * the same group, and fix up their jump offsets to target jumps
00610              * close enough to fit in a 16-bit unsigned offset immediate.
00611              */
00612             if ((size_t)(pc - re->program) > OFFSET_MAX &&
00613                 emitStateSP > emitStateStack) {
00614                 EmitStateStackEntry *esp, *esp2;
00615                 jsbytecode *alt, *jump;
00616                 ptrdiff_t span, header;
00617 
00618                 esp2 = emitStateSP;
00619                 alt = esp2->altHead;
00620                 for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
00621                     if (esp->continueOp == REOP_ENDALT &&
00622                         !esp->jumpToJumpFlag &&
00623                         esp->nextTermFixup + OFFSET_LEN == alt &&
00624                         (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
00625                                        ? esp->endTermFixup
00626                                        : esp->nextTermFixup)) > OFFSET_MAX) {
00627                         alt = esp->altHead;
00628                         jump = esp->nextTermFixup;
00629 
00630                         /*
00631                          * The span must be 1 less than the distance from
00632                          * jump offset to jump offset, so we actually jump
00633                          * to a REOP_JUMP bytecode, not to its offset!
00634                          */
00635                         for (;;) {
00636                             assert(jump < esp2->nextTermFixup);
00637                             span = esp2->nextTermFixup - jump - 1;
00638                             if ((size_t)span <= OFFSET_MAX)
00639                                 break;
00640                             do {
00641                                 if (--esp2 == esp)
00642                                     goto jump_too_big;
00643                             } while (esp2->continueOp != REOP_ENDALT);
00644                         }
00645 
00646                         jump[0] = JUMP_OFFSET_HI(span);
00647                         jump[1] = JUMP_OFFSET_LO(span);
00648 
00649                         if (esp->continueNode->op != REOP_ALT) {
00650                             /*
00651                              * We must patch the offset at esp->endTermFixup
00652                              * as well, for the REOP_ALTPREREQ{,2} opcodes.
00653                              * If we're unlucky and endTermFixup is more than
00654                              * OFFSET_MAX bytes from its target, we cheat by
00655                              * jumping 6 bytes to the jump whose offset is at
00656                              * esp->nextTermFixup, which has the same target.
00657                              */
00658                             jump = esp->endTermFixup;
00659                             header = esp->nextTermFixup - jump;
00660                             span += header;
00661                             if ((size_t)span > OFFSET_MAX)
00662                                 span = header;
00663 
00664                             jump[0] = JUMP_OFFSET_HI(span);
00665                             jump[1] = JUMP_OFFSET_LO(span);
00666                         }
00667 
00668                         esp->jumpToJumpFlag = TRUE;
00669                     }
00670                 }
00671             }
00672             break;
00673 
00674           case REOP_ALT:
00675             assert(emitStateSP);
00676             emitStateSP->altHead = pc - 1;
00677             emitStateSP->nextAltFixup = pc;     /* offset to next alternate */
00678             pc += OFFSET_LEN;
00679             emitStateSP->continueNode = t;
00680             emitStateSP->continueOp = REOP_JUMP;
00681             emitStateSP->jumpToJumpFlag = FALSE;
00682             ++emitStateSP;
00683             assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
00684             t = t->kid;
00685             op = t->op;
00686             assert(op < REOP_LIMIT);
00687             continue;
00688 
00689           case REOP_FLAT:
00690             /*
00691              * Coalesce FLATs if possible and if it would not increase bytecode
00692              * beyond preallocated limit. The latter happens only when bytecode
00693              * size for coalesced string with offset p and length 2 exceeds 6
00694              * bytes preallocated for 2 single char nodes, i.e. when
00695              * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
00696              * GetCompactIndexWidth(p) > 4.
00697              * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
00698              * nodes strictly decreases bytecode size, the check has to be
00699              * done only for the first coalescing.
00700              */
00701             if (t->kid &&
00702                 GetCompactIndexWidth((WCHAR*)t->kid - state->cpbegin) <= 4)
00703             {
00704                 while (t->next &&
00705                        t->next->op == REOP_FLAT &&
00706                        (WCHAR*)t->kid + t->u.flat.length ==
00707                        t->next->kid) {
00708                     t->u.flat.length += t->next->u.flat.length;
00709                     t->next = t->next->next;
00710                 }
00711             }
00712             if (t->kid && t->u.flat.length > 1) {
00713                 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLATi : REOP_FLAT;
00714                 pc = WriteCompactIndex(pc, (WCHAR*)t->kid - state->cpbegin);
00715                 pc = WriteCompactIndex(pc, t->u.flat.length);
00716             } else if (t->u.flat.chr < 256) {
00717                 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
00718                 *pc++ = (jsbytecode) t->u.flat.chr;
00719             } else {
00720                 pc[-1] = (state->flags & JSREG_FOLD)
00721                          ? REOP_UCFLAT1i
00722                          : REOP_UCFLAT1;
00723                 SET_ARG(pc, t->u.flat.chr);
00724                 pc += ARG_LEN;
00725             }
00726             break;
00727 
00728           case REOP_LPAREN:
00729             assert(emitStateSP);
00730             pc = WriteCompactIndex(pc, t->u.parenIndex);
00731             emitStateSP->continueNode = t;
00732             emitStateSP->continueOp = REOP_RPAREN;
00733             ++emitStateSP;
00734             assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
00735             t = t->kid;
00736             op = t->op;
00737             continue;
00738 
00739           case REOP_RPAREN:
00740             pc = WriteCompactIndex(pc, t->u.parenIndex);
00741             break;
00742 
00743           case REOP_BACKREF:
00744             pc = WriteCompactIndex(pc, t->u.parenIndex);
00745             break;
00746 
00747           case REOP_ASSERT:
00748             assert(emitStateSP);
00749             emitStateSP->nextTermFixup = pc;
00750             pc += OFFSET_LEN;
00751             emitStateSP->continueNode = t;
00752             emitStateSP->continueOp = REOP_ASSERTTEST;
00753             ++emitStateSP;
00754             assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
00755             t = t->kid;
00756             op = t->op;
00757             continue;
00758 
00759           case REOP_ASSERTTEST:
00760           case REOP_ASSERTNOTTEST:
00761             if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
00762                 goto jump_too_big;
00763             break;
00764 
00765           case REOP_ASSERT_NOT:
00766             assert(emitStateSP);
00767             emitStateSP->nextTermFixup = pc;
00768             pc += OFFSET_LEN;
00769             emitStateSP->continueNode = t;
00770             emitStateSP->continueOp = REOP_ASSERTNOTTEST;
00771             ++emitStateSP;
00772             assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
00773             t = t->kid;
00774             op = t->op;
00775             continue;
00776 
00777           case REOP_QUANT:
00778             assert(emitStateSP);
00779             if (t->u.range.min == 0 && t->u.range.max == (UINT)-1) {
00780                 pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
00781             } else if (t->u.range.min == 0 && t->u.range.max == 1) {
00782                 pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
00783             } else if (t->u.range.min == 1 && t->u.range.max == (UINT) -1) {
00784                 pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
00785             } else {
00786                 if (!t->u.range.greedy)
00787                     pc[-1] = REOP_MINIMALQUANT;
00788                 pc = WriteCompactIndex(pc, t->u.range.min);
00789                 /*
00790                  * Write max + 1 to avoid using size_t(max) + 1 bytes
00791                  * for (UINT)-1 sentinel.
00792                  */
00793                 pc = WriteCompactIndex(pc, t->u.range.max + 1);
00794             }
00795             emitStateSP->nextTermFixup = pc;
00796             pc += OFFSET_LEN;
00797             emitStateSP->continueNode = t;
00798             emitStateSP->continueOp = REOP_ENDCHILD;
00799             ++emitStateSP;
00800             assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
00801             t = t->kid;
00802             op = t->op;
00803             continue;
00804 
00805           case REOP_ENDCHILD:
00806             if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
00807                 goto jump_too_big;
00808             break;
00809 
00810           case REOP_CLASS:
00811             if (!t->u.ucclass.sense)
00812                 pc[-1] = REOP_NCLASS;
00813             pc = WriteCompactIndex(pc, t->u.ucclass.index);
00814             charSet = &re->classList[t->u.ucclass.index];
00815             charSet->converted = FALSE;
00816             charSet->length = t->u.ucclass.bmsize;
00817             charSet->u.src.startIndex = t->u.ucclass.startIndex;
00818             charSet->u.src.length = t->u.ucclass.kidlen;
00819             charSet->sense = t->u.ucclass.sense;
00820             break;
00821 
00822           default:
00823             break;
00824         }
00825 
00826         t = t->next;
00827         if (t) {
00828             op = t->op;
00829         } else {
00830             if (emitStateSP == emitStateStack)
00831                 break;
00832             --emitStateSP;
00833             t = emitStateSP->continueNode;
00834             op = (REOp) emitStateSP->continueOp;
00835         }
00836     }
00837 
00838   cleanup:
00839     heap_free(emitStateStack);
00840     return pc;
00841 
00842   jump_too_big:
00843     ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
00844     pc = NULL;
00845     goto cleanup;
00846 }
00847 
00848 /*
00849  * Process the op against the two top operands, reducing them to a single
00850  * operand in the penultimate slot. Update progLength and treeDepth.
00851  */
00852 static BOOL
00853 ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
00854           INT operandSP)
00855 {
00856     RENode *result;
00857 
00858     switch (opData->op) {
00859       case REOP_ALT:
00860         result = NewRENode(state, REOP_ALT);
00861         if (!result)
00862             return FALSE;
00863         result->kid = operandStack[operandSP - 2];
00864         result->u.kid2 = operandStack[operandSP - 1];
00865         operandStack[operandSP - 2] = result;
00866 
00867         if (state->treeDepth == TREE_DEPTH_MAX) {
00868             ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
00869             return FALSE;
00870         }
00871         ++state->treeDepth;
00872 
00873         /*
00874          * Look at both alternates to see if there's a FLAT or a CLASS at
00875          * the start of each. If so, use a prerequisite match.
00876          */
00877         if (((RENode *) result->kid)->op == REOP_FLAT &&
00878             ((RENode *) result->u.kid2)->op == REOP_FLAT &&
00879             (state->flags & JSREG_FOLD) == 0) {
00880             result->op = REOP_ALTPREREQ;
00881             result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
00882             result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
00883             /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
00884                                             JUMP, <end> ... ENDALT */
00885             state->progLength += 13;
00886         }
00887         else
00888         if (((RENode *) result->kid)->op == REOP_CLASS &&
00889             ((RENode *) result->kid)->u.ucclass.index < 256 &&
00890             ((RENode *) result->u.kid2)->op == REOP_FLAT &&
00891             (state->flags & JSREG_FOLD) == 0) {
00892             result->op = REOP_ALTPREREQ2;
00893             result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
00894             result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
00895             /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
00896                                             JUMP, <end> ... ENDALT */
00897             state->progLength += 13;
00898         }
00899         else
00900         if (((RENode *) result->kid)->op == REOP_FLAT &&
00901             ((RENode *) result->u.kid2)->op == REOP_CLASS &&
00902             ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
00903             (state->flags & JSREG_FOLD) == 0) {
00904             result->op = REOP_ALTPREREQ2;
00905             result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
00906             result->u.altprereq.ch2 =
00907                 ((RENode *) result->u.kid2)->u.ucclass.index;
00908             /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
00909                                           JUMP, <end> ... ENDALT */
00910             state->progLength += 13;
00911         }
00912         else {
00913             /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
00914             state->progLength += 7;
00915         }
00916         break;
00917 
00918       case REOP_CONCAT:
00919         result = operandStack[operandSP - 2];
00920         while (result->next)
00921             result = result->next;
00922         result->next = operandStack[operandSP - 1];
00923         break;
00924 
00925       case REOP_ASSERT:
00926       case REOP_ASSERT_NOT:
00927       case REOP_LPARENNON:
00928       case REOP_LPAREN:
00929         /* These should have been processed by a close paren. */
00930         ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
00931                                 opData->errPos);
00932         return FALSE;
00933 
00934       default:;
00935     }
00936     return TRUE;
00937 }
00938 
00939 /*
00940  * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
00941  * its being on the stack, and to propagate errors to its callers.
00942  */
00943 #define JSREG_FIND_PAREN_COUNT  0x8000
00944 #define JSREG_FIND_PAREN_ERROR  0x4000
00945 
00946 /*
00947  * Magic return value from FindParenCount and GetDecimalValue, to indicate
00948  * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
00949  * its findMax parameter is non-null.
00950  */
00951 #define OVERFLOW_VALUE          ((UINT)-1)
00952 
00953 static UINT
00954 FindParenCount(CompilerState *state)
00955 {
00956     CompilerState temp;
00957     int i;
00958 
00959     if (state->flags & JSREG_FIND_PAREN_COUNT)
00960         return OVERFLOW_VALUE;
00961 
00962     /*
00963      * Copy state into temp, flag it so we never report an invalid backref,
00964      * and reset its members to parse the entire regexp.  This is obviously
00965      * suboptimal, but GetDecimalValue calls us only if a backref appears to
00966      * refer to a forward parenthetical, which is rare.
00967      */
00968     temp = *state;
00969     temp.flags |= JSREG_FIND_PAREN_COUNT;
00970     temp.cp = temp.cpbegin;
00971     temp.parenCount = 0;
00972     temp.classCount = 0;
00973     temp.progLength = 0;
00974     temp.treeDepth = 0;
00975     temp.classBitmapsMem = 0;
00976     for (i = 0; i < CLASS_CACHE_SIZE; i++)
00977         temp.classCache[i].start = NULL;
00978 
00979     if (!ParseRegExp(&temp)) {
00980         state->flags |= JSREG_FIND_PAREN_ERROR;
00981         return OVERFLOW_VALUE;
00982     }
00983     return temp.parenCount;
00984 }
00985 
00986 /*
00987  * Extract and return a decimal value at state->cp.  The initial character c
00988  * has already been read.  Return OVERFLOW_VALUE if the result exceeds max.
00989  * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
00990  * state->flags to discover whether an error occurred under findMax.
00991  */
00992 static UINT
00993 GetDecimalValue(WCHAR c, UINT max, UINT (*findMax)(CompilerState *state),
00994                 CompilerState *state)
00995 {
00996     UINT value = JS7_UNDEC(c);
00997     BOOL overflow = (value > max && (!findMax || value > findMax(state)));
00998 
00999     /* The following restriction allows simpler overflow checks. */
01000     assert(max <= ((UINT)-1 - 9) / 10);
01001     while (state->cp < state->cpend) {
01002         c = *state->cp;
01003         if (!JS7_ISDEC(c))
01004             break;
01005         value = 10 * value + JS7_UNDEC(c);
01006         if (!overflow && value > max && (!findMax || value > findMax(state)))
01007             overflow = TRUE;
01008         ++state->cp;
01009     }
01010     return overflow ? OVERFLOW_VALUE : value;
01011 }
01012 
01013 /*
01014  * Calculate the total size of the bitmap required for a class expression.
01015  */
01016 static BOOL
01017 CalculateBitmapSize(CompilerState *state, RENode *target, const WCHAR *src,
01018                     const WCHAR *end)
01019 {
01020     UINT max = 0;
01021     BOOL inRange = FALSE;
01022     WCHAR c, rangeStart = 0;
01023     UINT n, digit, nDigits, i;
01024 
01025     target->u.ucclass.bmsize = 0;
01026     target->u.ucclass.sense = TRUE;
01027 
01028     if (src == end)
01029         return TRUE;
01030 
01031     if (*src == '^') {
01032         ++src;
01033         target->u.ucclass.sense = FALSE;
01034     }
01035 
01036     while (src != end) {
01037         BOOL canStartRange = TRUE;
01038         UINT localMax = 0;
01039 
01040         switch (*src) {
01041           case '\\':
01042             ++src;
01043             c = *src++;
01044             switch (c) {
01045               case 'b':
01046                 localMax = 0x8;
01047                 break;
01048               case 'f':
01049                 localMax = 0xC;
01050                 break;
01051               case 'n':
01052                 localMax = 0xA;
01053                 break;
01054               case 'r':
01055                 localMax = 0xD;
01056                 break;
01057               case 't':
01058                 localMax = 0x9;
01059                 break;
01060               case 'v':
01061                 localMax = 0xB;
01062                 break;
01063               case 'c':
01064                 if (src < end && RE_IS_LETTER(*src)) {
01065                     localMax = (UINT) (*src++) & 0x1F;
01066                 } else {
01067                     --src;
01068                     localMax = '\\';
01069                 }
01070                 break;
01071               case 'x':
01072                 nDigits = 2;
01073                 goto lexHex;
01074               case 'u':
01075                 nDigits = 4;
01076 lexHex:
01077                 n = 0;
01078                 for (i = 0; (i < nDigits) && (src < end); i++) {
01079                     c = *src++;
01080                     if (!isASCIIHexDigit(c, &digit)) {
01081                         /*
01082                          * Back off to accepting the original
01083                          *'\' as a literal.
01084                          */
01085                         src -= i + 1;
01086                         n = '\\';
01087                         break;
01088                     }
01089                     n = (n << 4) | digit;
01090                 }
01091                 localMax = n;
01092                 break;
01093               case 'd':
01094                 canStartRange = FALSE;
01095                 if (inRange) {
01096                     JS_ReportErrorNumber(state->context,
01097                                          js_GetErrorMessage, NULL,
01098                                          JSMSG_BAD_CLASS_RANGE);
01099                     return FALSE;
01100                 }
01101                 localMax = '9';
01102                 break;
01103               case 'D':
01104               case 's':
01105               case 'S':
01106               case 'w':
01107               case 'W':
01108                 canStartRange = FALSE;
01109                 if (inRange) {
01110                     JS_ReportErrorNumber(state->context,
01111                                          js_GetErrorMessage, NULL,
01112                                          JSMSG_BAD_CLASS_RANGE);
01113                     return FALSE;
01114                 }
01115                 max = 65535;
01116 
01117                 /*
01118                  * If this is the start of a range, ensure that it's less than
01119                  * the end.
01120                  */
01121                 localMax = 0;
01122                 break;
01123               case '0':
01124               case '1':
01125               case '2':
01126               case '3':
01127               case '4':
01128               case '5':
01129               case '6':
01130               case '7':
01131                 /*
01132                  *  This is a non-ECMA extension - decimal escapes (in this
01133                  *  case, octal!) are supposed to be an error inside class
01134                  *  ranges, but supported here for backwards compatibility.
01135                  *
01136                  */
01137                 n = JS7_UNDEC(c);
01138                 c = *src;
01139                 if ('0' <= c && c <= '7') {
01140                     src++;
01141                     n = 8 * n + JS7_UNDEC(c);
01142                     c = *src;
01143                     if ('0' <= c && c <= '7') {
01144                         src++;
01145                         i = 8 * n + JS7_UNDEC(c);
01146                         if (i <= 0377)
01147                             n = i;
01148                         else
01149                             src--;
01150                     }
01151                 }
01152                 localMax = n;
01153                 break;
01154 
01155               default:
01156                 localMax = c;
01157                 break;
01158             }
01159             break;
01160           default:
01161             localMax = *src++;
01162             break;
01163         }
01164 
01165         if (inRange) {
01166             /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
01167             if (rangeStart > localMax) {
01168                 JS_ReportErrorNumber(state->context,
01169                                      js_GetErrorMessage, NULL,
01170                                      JSMSG_BAD_CLASS_RANGE);
01171                 return FALSE;
01172             }
01173             inRange = FALSE;
01174         } else {
01175             if (canStartRange && src < end - 1) {
01176                 if (*src == '-') {
01177                     ++src;
01178                     inRange = TRUE;
01179                     rangeStart = (WCHAR)localMax;
01180                     continue;
01181                 }
01182             }
01183             if (state->flags & JSREG_FOLD)
01184                 rangeStart = localMax;   /* one run of the uc/dc loop below */
01185         }
01186 
01187         if (state->flags & JSREG_FOLD) {
01188             WCHAR maxch = localMax;
01189 
01190             for (i = rangeStart; i <= localMax; i++) {
01191                 WCHAR uch, dch;
01192 
01193                 uch = toupperW(i);
01194                 dch = tolowerW(i);
01195                 if(maxch < uch)
01196                     maxch = uch;
01197                 if(maxch < dch)
01198                     maxch = dch;
01199             }
01200             localMax = maxch;
01201         }
01202 
01203         if (localMax > max)
01204             max = localMax;
01205     }
01206     target->u.ucclass.bmsize = max;
01207     return TRUE;
01208 }
01209 
01210 static INT
01211 ParseMinMaxQuantifier(CompilerState *state, BOOL ignoreValues)
01212 {
01213     UINT min, max;
01214     WCHAR c;
01215     const WCHAR *errp = state->cp++;
01216 
01217     c = *state->cp;
01218     if (JS7_ISDEC(c)) {
01219         ++state->cp;
01220         min = GetDecimalValue(c, 0xFFFF, NULL, state);
01221         c = *state->cp;
01222 
01223         if (!ignoreValues && min == OVERFLOW_VALUE)
01224             return JSMSG_MIN_TOO_BIG;
01225 
01226         if (c == ',') {
01227             c = *++state->cp;
01228             if (JS7_ISDEC(c)) {
01229                 ++state->cp;
01230                 max = GetDecimalValue(c, 0xFFFF, NULL, state);
01231                 c = *state->cp;
01232                 if (!ignoreValues && max == OVERFLOW_VALUE)
01233                     return JSMSG_MAX_TOO_BIG;
01234                 if (!ignoreValues && min > max)
01235                     return JSMSG_OUT_OF_ORDER;
01236             } else {
01237                 max = (UINT)-1;
01238             }
01239         } else {
01240             max = min;
01241         }
01242         if (c == '}') {
01243             state->result = NewRENode(state, REOP_QUANT);
01244             if (!state->result)
01245                 return JSMSG_OUT_OF_MEMORY;
01246             state->result->u.range.min = min;
01247             state->result->u.range.max = max;
01248             /*
01249              * QUANT, <min>, <max>, <next> ... <ENDCHILD>
01250              * where <max> is written as compact(max+1) to make
01251              * (UINT)-1 sentinel to occupy 1 byte, not width_of(max)+1.
01252              */
01253             state->progLength += (1 + GetCompactIndexWidth(min)
01254                                   + GetCompactIndexWidth(max + 1)
01255                                   +3);
01256             return 0;
01257         }
01258     }
01259 
01260     state->cp = errp;
01261     return -1;
01262 }
01263 
01264 static BOOL
01265 ParseQuantifier(CompilerState *state)
01266 {
01267     RENode *term;
01268     term = state->result;
01269     if (state->cp < state->cpend) {
01270         switch (*state->cp) {
01271           case '+':
01272             state->result = NewRENode(state, REOP_QUANT);
01273             if (!state->result)
01274                 return FALSE;
01275             state->result->u.range.min = 1;
01276             state->result->u.range.max = (UINT)-1;
01277             /* <PLUS>, <next> ... <ENDCHILD> */
01278             state->progLength += 4;
01279             goto quantifier;
01280           case '*':
01281             state->result = NewRENode(state, REOP_QUANT);
01282             if (!state->result)
01283                 return FALSE;
01284             state->result->u.range.min = 0;
01285             state->result->u.range.max = (UINT)-1;
01286             /* <STAR>, <next> ... <ENDCHILD> */
01287             state->progLength += 4;
01288             goto quantifier;
01289           case '?':
01290             state->result = NewRENode(state, REOP_QUANT);
01291             if (!state->result)
01292                 return FALSE;
01293             state->result->u.range.min = 0;
01294             state->result->u.range.max = 1;
01295             /* <OPT>, <next> ... <ENDCHILD> */
01296             state->progLength += 4;
01297             goto quantifier;
01298           case '{':       /* balance '}' */
01299           {
01300             INT err;
01301 
01302             err = ParseMinMaxQuantifier(state, FALSE);
01303             if (err == 0)
01304                 goto quantifier;
01305             if (err == -1)
01306                 return TRUE;
01307 
01308             ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
01309             return FALSE;
01310           }
01311           default:;
01312         }
01313     }
01314     return TRUE;
01315 
01316 quantifier:
01317     if (state->treeDepth == TREE_DEPTH_MAX) {
01318         ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
01319         return FALSE;
01320     }
01321 
01322     ++state->treeDepth;
01323     ++state->cp;
01324     state->result->kid = term;
01325     if (state->cp < state->cpend && *state->cp == '?') {
01326         ++state->cp;
01327         state->result->u.range.greedy = FALSE;
01328     } else {
01329         state->result->u.range.greedy = TRUE;
01330     }
01331     return TRUE;
01332 }
01333 
01334 /*
01335  *  item:       assertion               An item is either an assertion or
01336  *              quantatom               a quantified atom.
01337  *
01338  *  assertion:  '^'                     Assertions match beginning of string
01339  *                                      (or line if the class static property
01340  *                                      RegExp.multiline is true).
01341  *              '$'                     End of string (or line if the class
01342  *                                      static property RegExp.multiline is
01343  *                                      true).
01344  *              '\b'                    Word boundary (between \w and \W).
01345  *              '\B'                    Word non-boundary.
01346  *
01347  *  quantatom:  atom                    An unquantified atom.
01348  *              quantatom '{' n ',' m '}'
01349  *                                      Atom must occur between n and m times.
01350  *              quantatom '{' n ',' '}' Atom must occur at least n times.
01351  *              quantatom '{' n '}'     Atom must occur exactly n times.
01352  *              quantatom '*'           Zero or more times (same as {0,}).
01353  *              quantatom '+'           One or more times (same as {1,}).
01354  *              quantatom '?'           Zero or one time (same as {0,1}).
01355  *
01356  *              any of which can be optionally followed by '?' for ungreedy
01357  *
01358  *  atom:       '(' regexp ')'          A parenthesized regexp (what matched
01359  *                                      can be addressed using a backreference,
01360  *                                      see '\' n below).
01361  *              '.'                     Matches any char except '\n'.
01362  *              '[' classlist ']'       A character class.
01363  *              '[' '^' classlist ']'   A negated character class.
01364  *              '\f'                    Form Feed.
01365  *              '\n'                    Newline (Line Feed).
01366  *              '\r'                    Carriage Return.
01367  *              '\t'                    Horizontal Tab.
01368  *              '\v'                    Vertical Tab.
01369  *              '\d'                    A digit (same as [0-9]).
01370  *              '\D'                    A non-digit.
01371  *              '\w'                    A word character, [0-9a-z_A-Z].
01372  *              '\W'                    A non-word character.
01373  *              '\s'                    A whitespace character, [ \b\f\n\r\t\v].
01374  *              '\S'                    A non-whitespace character.
01375  *              '\' n                   A backreference to the nth (n decimal
01376  *                                      and positive) parenthesized expression.
01377  *              '\' octal               An octal escape sequence (octal must be
01378  *                                      two or three digits long, unless it is
01379  *                                      0 for the null character).
01380  *              '\x' hex                A hex escape (hex must be two digits).
01381  *              '\u' unicode            A unicode escape (must be four digits).
01382  *              '\c' ctrl               A control character, ctrl is a letter.
01383  *              '\' literalatomchar     Any character except one of the above
01384  *                                      that follow '\' in an atom.
01385  *              otheratomchar           Any character not first among the other
01386  *                                      atom right-hand sides.
01387  */
01388 static BOOL
01389 ParseTerm(CompilerState *state)
01390 {
01391     WCHAR c = *state->cp++;
01392     UINT nDigits;
01393     UINT num, tmp, n, i;
01394     const WCHAR *termStart;
01395 
01396     switch (c) {
01397     /* assertions and atoms */
01398       case '^':
01399         state->result = NewRENode(state, REOP_BOL);
01400         if (!state->result)
01401             return FALSE;
01402         state->progLength++;
01403         return TRUE;
01404       case '$':
01405         state->result = NewRENode(state, REOP_EOL);
01406         if (!state->result)
01407             return FALSE;
01408         state->progLength++;
01409         return TRUE;
01410       case '\\':
01411         if (state->cp >= state->cpend) {
01412             /* a trailing '\' is an error */
01413             ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
01414             return FALSE;
01415         }
01416         c = *state->cp++;
01417         switch (c) {
01418         /* assertion escapes */
01419           case 'b' :
01420             state->result = NewRENode(state, REOP_WBDRY);
01421             if (!state->result)
01422                 return FALSE;
01423             state->progLength++;
01424             return TRUE;
01425           case 'B':
01426             state->result = NewRENode(state, REOP_WNONBDRY);
01427             if (!state->result)
01428                 return FALSE;
01429             state->progLength++;
01430             return TRUE;
01431           /* Decimal escape */
01432           case '0':
01433               /* Give a strict warning. See also the note below. */
01434               WARN("non-octal digit in an escape sequence that doesn't match a back-reference\n");
01435      doOctal:
01436             num = 0;
01437             while (state->cp < state->cpend) {
01438                 c = *state->cp;
01439                 if (c < '0' || '7' < c)
01440                     break;
01441                 state->cp++;
01442                 tmp = 8 * num + (UINT)JS7_UNDEC(c);
01443                 if (tmp > 0377)
01444                     break;
01445                 num = tmp;
01446             }
01447             c = (WCHAR)num;
01448     doFlat:
01449             state->result = NewRENode(state, REOP_FLAT);
01450             if (!state->result)
01451                 return FALSE;
01452             state->result->u.flat.chr = c;
01453             state->result->u.flat.length = 1;
01454             state->progLength += 3;
01455             break;
01456           case '1':
01457           case '2':
01458           case '3':
01459           case '4':
01460           case '5':
01461           case '6':
01462           case '7':
01463           case '8':
01464           case '9':
01465             termStart = state->cp - 1;
01466             num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
01467             if (state->flags & JSREG_FIND_PAREN_ERROR)
01468                 return FALSE;
01469             if (num == OVERFLOW_VALUE) {
01470                 /* Give a strict mode warning. */
01471                 WARN("back-reference exceeds number of capturing parentheses\n");
01472 
01473                 /*
01474                  * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
01475                  * error here. However, for compatibility with IE, we treat the
01476                  * whole backref as flat if the first character in it is not a
01477                  * valid octal character, and as an octal escape otherwise.
01478                  */
01479                 state->cp = termStart;
01480                 if (c >= '8') {
01481                     /* Treat this as flat. termStart - 1 is the \. */
01482                     c = '\\';
01483                     goto asFlat;
01484                 }
01485 
01486                 /* Treat this as an octal escape. */
01487                 goto doOctal;
01488             }
01489             assert(1 <= num && num <= 0x10000);
01490             state->result = NewRENode(state, REOP_BACKREF);
01491             if (!state->result)
01492                 return FALSE;
01493             state->result->u.parenIndex = num - 1;
01494             state->progLength
01495                 += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
01496             break;
01497           /* Control escape */
01498           case 'f':
01499             c = 0xC;
01500             goto doFlat;
01501           case 'n':
01502             c = 0xA;
01503             goto doFlat;
01504           case 'r':
01505             c = 0xD;
01506             goto doFlat;
01507           case 't':
01508             c = 0x9;
01509             goto doFlat;
01510           case 'v':
01511             c = 0xB;
01512             goto doFlat;
01513           /* Control letter */
01514           case 'c':
01515             if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
01516                 c = (WCHAR) (*state->cp++ & 0x1F);
01517             } else {
01518                 /* back off to accepting the original '\' as a literal */
01519                 --state->cp;
01520                 c = '\\';
01521             }
01522             goto doFlat;
01523           /* HexEscapeSequence */
01524           case 'x':
01525             nDigits = 2;
01526             goto lexHex;
01527           /* UnicodeEscapeSequence */
01528           case 'u':
01529             nDigits = 4;
01530 lexHex:
01531             n = 0;
01532             for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
01533                 UINT digit;
01534                 c = *state->cp++;
01535                 if (!isASCIIHexDigit(c, &digit)) {
01536                     /*
01537                      * Back off to accepting the original 'u' or 'x' as a
01538                      * literal.
01539                      */
01540                     state->cp -= i + 2;
01541                     n = *state->cp++;
01542                     break;
01543                 }
01544                 n = (n << 4) | digit;
01545             }
01546             c = (WCHAR) n;
01547             goto doFlat;
01548           /* Character class escapes */
01549           case 'd':
01550             state->result = NewRENode(state, REOP_DIGIT);
01551 doSimple:
01552             if (!state->result)
01553                 return FALSE;
01554             state->progLength++;
01555             break;
01556           case 'D':
01557             state->result = NewRENode(state, REOP_NONDIGIT);
01558             goto doSimple;
01559           case 's':
01560             state->result = NewRENode(state, REOP_SPACE);
01561             goto doSimple;
01562           case 'S':
01563             state->result = NewRENode(state, REOP_NONSPACE);
01564             goto doSimple;
01565           case 'w':
01566             state->result = NewRENode(state, REOP_ALNUM);
01567             goto doSimple;
01568           case 'W':
01569             state->result = NewRENode(state, REOP_NONALNUM);
01570             goto doSimple;
01571           /* IdentityEscape */
01572           default:
01573             state->result = NewRENode(state, REOP_FLAT);
01574             if (!state->result)
01575                 return FALSE;
01576             state->result->u.flat.chr = c;
01577             state->result->u.flat.length = 1;
01578             state->result->kid = (void *) (state->cp - 1);
01579             state->progLength += 3;
01580             break;
01581         }
01582         break;
01583       case '[':
01584         state->result = NewRENode(state, REOP_CLASS);
01585         if (!state->result)
01586             return FALSE;
01587         termStart = state->cp;
01588         state->result->u.ucclass.startIndex = termStart - state->cpbegin;
01589         for (;;) {
01590             if (state->cp == state->cpend) {
01591                 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
01592                                         JSMSG_UNTERM_CLASS, termStart);
01593 
01594                 return FALSE;
01595             }
01596             if (*state->cp == '\\') {
01597                 state->cp++;
01598                 if (state->cp != state->cpend)
01599                     state->cp++;
01600                 continue;
01601             }
01602             if (*state->cp == ']') {
01603                 state->result->u.ucclass.kidlen = state->cp - termStart;
01604                 break;
01605             }
01606             state->cp++;
01607         }
01608         for (i = 0; i < CLASS_CACHE_SIZE; i++) {
01609             if (!state->classCache[i].start) {
01610                 state->classCache[i].start = termStart;
01611                 state->classCache[i].length = state->result->u.ucclass.kidlen;
01612                 state->classCache[i].index = state->classCount;
01613                 break;
01614             }
01615             if (state->classCache[i].length ==
01616                 state->result->u.ucclass.kidlen) {
01617                 for (n = 0; ; n++) {
01618                     if (n == state->classCache[i].length) {
01619                         state->result->u.ucclass.index
01620                             = state->classCache[i].index;
01621                         goto claim;
01622                     }
01623                     if (state->classCache[i].start[n] != termStart[n])
01624                         break;
01625                 }
01626             }
01627         }
01628         state->result->u.ucclass.index = state->classCount++;
01629 
01630     claim:
01631         /*
01632          * Call CalculateBitmapSize now as we want any errors it finds
01633          * to be reported during the parse phase, not at execution.
01634          */
01635         if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
01636             return FALSE;
01637         /*
01638          * Update classBitmapsMem with number of bytes to hold bmsize bits,
01639          * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
01640          * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
01641          */
01642         n = (state->result->u.ucclass.bmsize >> 3) + 1;
01643         if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
01644             ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
01645             return FALSE;
01646         }
01647         state->classBitmapsMem += n;
01648         /* CLASS, <index> */
01649         state->progLength
01650             += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
01651         break;
01652 
01653       case '.':
01654         state->result = NewRENode(state, REOP_DOT);
01655         goto doSimple;
01656 
01657       case '{':
01658       {
01659         const WCHAR *errp = state->cp--;
01660         INT err;
01661 
01662         err = ParseMinMaxQuantifier(state, TRUE);
01663         state->cp = errp;
01664 
01665         if (err < 0)
01666             goto asFlat;
01667 
01668         /* FALL THROUGH */
01669       }
01670       case '*':
01671       case '+':
01672       case '?':
01673         ReportRegExpErrorHelper(state, JSREPORT_ERROR,
01674                                 JSMSG_BAD_QUANTIFIER, state->cp - 1);
01675         return FALSE;
01676       default:
01677 asFlat:
01678         state->result = NewRENode(state, REOP_FLAT);
01679         if (!state->result)
01680             return FALSE;
01681         state->result->u.flat.chr = c;
01682         state->result->u.flat.length = 1;
01683         state->result->kid = (void *) (state->cp - 1);
01684         state->progLength += 3;
01685         break;
01686     }
01687     return ParseQuantifier(state);
01688 }
01689 
01690 /*
01691  * Top-down regular expression grammar, based closely on Perl4.
01692  *
01693  *  regexp:     altern                  A regular expression is one or more
01694  *              altern '|' regexp       alternatives separated by vertical bar.
01695  */
01696 #define INITIAL_STACK_SIZE  128
01697 
01698 static BOOL
01699 ParseRegExp(CompilerState *state)
01700 {
01701     size_t parenIndex;
01702     RENode *operand;
01703     REOpData *operatorStack;
01704     RENode **operandStack;
01705     REOp op;
01706     INT i;
01707     BOOL result = FALSE;
01708 
01709     INT operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
01710     INT operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
01711 
01712     /* Watch out for empty regexp */
01713     if (state->cp == state->cpend) {
01714         state->result = NewRENode(state, REOP_EMPTY);
01715         return (state->result != NULL);
01716     }
01717 
01718     operatorStack = heap_alloc(sizeof(REOpData) * operatorStackSize);
01719     if (!operatorStack)
01720         return FALSE;
01721 
01722     operandStack = heap_alloc(sizeof(RENode *) * operandStackSize);
01723     if (!operandStack)
01724         goto out;
01725 
01726     for (;;) {
01727         parenIndex = state->parenCount;
01728         if (state->cp == state->cpend) {
01729             /*
01730              * If we are at the end of the regexp and we're short one or more
01731              * operands, the regexp must have the form /x|/ or some such, with
01732              * left parentheses making us short more than one operand.
01733              */
01734             if (operatorSP >= operandSP) {
01735                 operand = NewRENode(state, REOP_EMPTY);
01736                 if (!operand)
01737                     goto out;
01738                 goto pushOperand;
01739             }
01740         } else {
01741             switch (*state->cp) {
01742               case '(':
01743                 ++state->cp;
01744                 if (state->cp + 1 < state->cpend &&
01745                     *state->cp == '?' &&
01746                     (state->cp[1] == '=' ||
01747                      state->cp[1] == '!' ||
01748                      state->cp[1] == ':')) {
01749                     switch (state->cp[1]) {
01750                       case '=':
01751                         op = REOP_ASSERT;
01752                         /* ASSERT, <next>, ... ASSERTTEST */
01753                         state->progLength += 4;
01754                         break;
01755                       case '!':
01756                         op = REOP_ASSERT_NOT;
01757                         /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
01758                         state->progLength += 4;
01759                         break;
01760                       default:
01761                         op = REOP_LPARENNON;
01762                         break;
01763                     }
01764                     state->cp += 2;
01765                 } else {
01766                     op = REOP_LPAREN;
01767                     /* LPAREN, <index>, ... RPAREN, <index> */
01768                     state->progLength
01769                         += 2 * (1 + GetCompactIndexWidth(parenIndex));
01770                     state->parenCount++;
01771                     if (state->parenCount == 65535) {
01772                         ReportRegExpError(state, JSREPORT_ERROR,
01773                                           JSMSG_TOO_MANY_PARENS);
01774                         goto out;
01775                     }
01776                 }
01777                 goto pushOperator;
01778 
01779               case ')':
01780                 /*
01781                  * If there's no stacked open parenthesis, throw syntax error.
01782                  */
01783                 for (i = operatorSP - 1; ; i--) {
01784                     if (i < 0) {
01785                         ReportRegExpError(state, JSREPORT_ERROR,
01786                                           JSMSG_UNMATCHED_RIGHT_PAREN);
01787                         goto out;
01788                     }
01789                     if (operatorStack[i].op == REOP_ASSERT ||
01790                         operatorStack[i].op == REOP_ASSERT_NOT ||
01791                         operatorStack[i].op == REOP_LPARENNON ||
01792                         operatorStack[i].op == REOP_LPAREN) {
01793                         break;
01794                     }
01795                 }
01796                 /* FALL THROUGH */
01797 
01798               case '|':
01799                 /* Expected an operand before these, so make an empty one */
01800                 operand = NewRENode(state, REOP_EMPTY);
01801                 if (!operand)
01802                     goto out;
01803                 goto pushOperand;
01804 
01805               default:
01806                 if (!ParseTerm(state))
01807                     goto out;
01808                 operand = state->result;
01809 pushOperand:
01810                 if (operandSP == operandStackSize) {
01811                     RENode **tmp;
01812                     operandStackSize += operandStackSize;
01813                     tmp = heap_realloc(operandStack, sizeof(RENode *) * operandStackSize);
01814                     if (!tmp)
01815                         goto out;
01816                     operandStack = tmp;
01817                 }
01818                 operandStack[operandSP++] = operand;
01819                 break;
01820             }
01821         }
01822 
01823         /* At the end; process remaining operators. */
01824 restartOperator:
01825         if (state->cp == state->cpend) {
01826             while (operatorSP) {
01827                 --operatorSP;
01828                 if (!ProcessOp(state, &operatorStack[operatorSP],
01829                                operandStack, operandSP))
01830                     goto out;
01831                 --operandSP;
01832             }
01833             assert(operandSP == 1);
01834             state->result = operandStack[0];
01835             result = TRUE;
01836             goto out;
01837         }
01838 
01839         switch (*state->cp) {
01840           case '|':
01841             /* Process any stacked 'concat' operators */
01842             ++state->cp;
01843             while (operatorSP &&
01844                    operatorStack[operatorSP - 1].op == REOP_CONCAT) {
01845                 --operatorSP;
01846                 if (!ProcessOp(state, &operatorStack[operatorSP],
01847                                operandStack, operandSP)) {
01848                     goto out;
01849                 }
01850                 --operandSP;
01851             }
01852             op = REOP_ALT;
01853             goto pushOperator;
01854 
01855           case ')':
01856             /*
01857              * If there's no stacked open parenthesis, throw syntax error.
01858              */
01859             for (i = operatorSP - 1; ; i--) {
01860                 if (i < 0) {
01861                     ReportRegExpError(state, JSREPORT_ERROR,
01862                                       JSMSG_UNMATCHED_RIGHT_PAREN);
01863                     goto out;
01864                 }
01865                 if (operatorStack[i].op == REOP_ASSERT ||
01866                     operatorStack[i].op == REOP_ASSERT_NOT ||
01867                     operatorStack[i].op == REOP_LPARENNON ||
01868                     operatorStack[i].op == REOP_LPAREN) {
01869                     break;
01870                 }
01871             }
01872             ++state->cp;
01873 
01874             /* Process everything on the stack until the open parenthesis. */
01875             for (;;) {
01876                 assert(operatorSP);
01877                 --operatorSP;
01878                 switch (operatorStack[operatorSP].op) {
01879                   case REOP_ASSERT:
01880                   case REOP_ASSERT_NOT:
01881                   case REOP_LPAREN:
01882                     operand = NewRENode(state, operatorStack[operatorSP].op);
01883                     if (!operand)
01884                         goto out;
01885                     operand->u.parenIndex =
01886                         operatorStack[operatorSP].parenIndex;
01887                     assert(operandSP);
01888                     operand->kid = operandStack[operandSP - 1];
01889                     operandStack[operandSP - 1] = operand;
01890                     if (state->treeDepth == TREE_DEPTH_MAX) {
01891                         ReportRegExpError(state, JSREPORT_ERROR,
01892                                           JSMSG_REGEXP_TOO_COMPLEX);
01893                         goto out;
01894                     }
01895                     ++state->treeDepth;
01896                     /* FALL THROUGH */
01897 
01898                   case REOP_LPARENNON:
01899                     state->result = operandStack[operandSP - 1];
01900                     if (!ParseQuantifier(state))
01901                         goto out;
01902                     operandStack[operandSP - 1] = state->result;
01903                     goto restartOperator;
01904                   default:
01905                     if (!ProcessOp(state, &operatorStack[operatorSP],
01906                                    operandStack, operandSP))
01907                         goto out;
01908                     --operandSP;
01909                     break;
01910                 }
01911             }
01912             break;
01913 
01914           case '{':
01915           {
01916             const WCHAR *errp = state->cp;
01917 
01918             if (ParseMinMaxQuantifier(state, TRUE) < 0) {
01919                 /*
01920                  * This didn't even scan correctly as a quantifier, so we should
01921                  * treat it as flat.
01922                  */
01923                 op = REOP_CONCAT;
01924                 goto pushOperator;
01925             }
01926 
01927             state->cp = errp;
01928             /* FALL THROUGH */
01929           }
01930 
01931           case '+':
01932           case '*':
01933           case '?':
01934             ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
01935                                     state->cp);
01936             result = FALSE;
01937             goto out;
01938 
01939           default:
01940             /* Anything else is the start of the next term. */
01941             op = REOP_CONCAT;
01942 pushOperator:
01943             if (operatorSP == operatorStackSize) {
01944                 REOpData *tmp;
01945                 operatorStackSize += operatorStackSize;
01946                 tmp = heap_realloc(operatorStack, sizeof(REOpData) * operatorStackSize);
01947                 if (!tmp)
01948                     goto out;
01949                 operatorStack = tmp;
01950             }
01951             operatorStack[operatorSP].op = op;
01952             operatorStack[operatorSP].errPos = state->cp;
01953             operatorStack[operatorSP++].parenIndex = parenIndex;
01954             break;
01955         }
01956     }
01957 out:
01958     heap_free(operatorStack);
01959     heap_free(operandStack);
01960     return result;
01961 }
01962 
01963 /*
01964  * Save the current state of the match - the position in the input
01965  * text as well as the position in the bytecode. The state of any
01966  * parent expressions is also saved (preceding state).
01967  * Contents of parenCount parentheses from parenIndex are also saved.
01968  */
01969 static REBackTrackData *
01970 PushBackTrackState(REGlobalData *gData, REOp op,
01971                    jsbytecode *target, REMatchState *x, const WCHAR *cp,
01972                    size_t parenIndex, size_t parenCount)
01973 {
01974     size_t i;
01975     REBackTrackData *result =
01976         (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
01977 
01978     size_t sz = sizeof(REBackTrackData) +
01979                 gData->stateStackTop * sizeof(REProgState) +
01980                 parenCount * sizeof(RECapture);
01981 
01982     ptrdiff_t btsize = gData->backTrackStackSize;
01983     ptrdiff_t btincr = ((char *)result + sz) -
01984                        ((char *)gData->backTrackStack + btsize);
01985 
01986     TRACE("\tBT_Push: %lu,%lu\n", (ULONG_PTR)parenIndex, (ULONG_PTR)parenCount);
01987 
01988     JS_COUNT_OPERATION(gData->cx, JSOW_JUMP * (1 + parenCount));
01989     if (btincr > 0) {
01990         ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
01991 
01992         JS_COUNT_OPERATION(gData->cx, JSOW_ALLOCATION);
01993         btincr = ((btincr+btsize-1)/btsize)*btsize;
01994         gData->backTrackStack = jsheap_grow(gData->pool, gData->backTrackStack, btsize, btincr);
01995         if (!gData->backTrackStack) {
01996             js_ReportOutOfScriptQuota(gData->cx);
01997             gData->ok = FALSE;
01998             return NULL;
01999         }
02000         gData->backTrackStackSize = btsize + btincr;
02001         result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
02002     }
02003     gData->backTrackSP = result;
02004     result->sz = gData->cursz;
02005     gData->cursz = sz;
02006 
02007     result->backtrack_op = op;
02008     result->backtrack_pc = target;
02009     result->cp = cp;
02010     result->parenCount = parenCount;
02011     result->parenIndex = parenIndex;
02012 
02013     result->saveStateStackTop = gData->stateStackTop;
02014     assert(gData->stateStackTop);
02015     memcpy(result + 1, gData->stateStack,
02016            sizeof(REProgState) * result->saveStateStackTop);
02017 
02018     if (parenCount != 0) {
02019         memcpy((char *)(result + 1) +
02020                sizeof(REProgState) * result->saveStateStackTop,
02021                &x->parens[parenIndex],
02022                sizeof(RECapture) * parenCount);
02023         for (i = 0; i != parenCount; i++)
02024             x->parens[parenIndex + i].index = -1;
02025     }
02026 
02027     return result;
02028 }
02029 
02030 static inline REMatchState *
02031 FlatNIMatcher(REGlobalData *gData, REMatchState *x, WCHAR *matchChars,
02032               size_t length)
02033 {
02034     size_t i;
02035     assert(gData->cpend >= x->cp);
02036     if (length > (size_t)(gData->cpend - x->cp))
02037         return NULL;
02038     for (i = 0; i != length; i++) {
02039         if (toupperW(matchChars[i]) != toupperW(x->cp[i]))
02040             return NULL;
02041     }
02042     x->cp += length;
02043     return x;
02044 }
02045 
02046 /*
02047  * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
02048  * 2. If E is not a character then go to step 6.
02049  * 3. Let ch be E's character.
02050  * 4. Let A be a one-element RECharSet containing the character ch.
02051  * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
02052  * 6. E must be an integer. Let n be that integer.
02053  * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
02054  * 8. Return an internal Matcher closure that takes two arguments, a State x
02055  *    and a Continuation c, and performs the following:
02056  *     1. Let cap be x's captures internal array.
02057  *     2. Let s be cap[n].
02058  *     3. If s is undefined, then call c(x) and return its result.
02059  *     4. Let e be x's endIndex.
02060  *     5. Let len be s's length.
02061  *     6. Let f be e+len.
02062  *     7. If f>InputLength, return failure.
02063  *     8. If there exists an integer i between 0 (inclusive) and len (exclusive)
02064  *        such that Canonicalize(s[i]) is not the same character as
02065  *        Canonicalize(Input [e+i]), then return failure.
02066  *     9. Let y be the State (f, cap).
02067  *     10. Call c(y) and return its result.
02068  */
02069 static REMatchState *
02070 BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex)
02071 {
02072     size_t len, i;
02073     const WCHAR *parenContent;
02074     RECapture *cap = &x->parens[parenIndex];
02075 
02076     if (cap->index == -1)
02077         return x;
02078 
02079     len = cap->length;
02080     if (x->cp + len > gData->cpend)
02081         return NULL;
02082 
02083     parenContent = &gData->cpbegin[cap->index];
02084     if (gData->regexp->flags & JSREG_FOLD) {
02085         for (i = 0; i < len; i++) {
02086             if (toupperW(parenContent[i]) != toupperW(x->cp[i]))
02087                 return NULL;
02088         }
02089     } else {
02090         for (i = 0; i < len; i++) {
02091             if (parenContent[i] != x->cp[i])
02092                 return NULL;
02093         }
02094     }
02095     x->cp += len;
02096     return x;
02097 }
02098 
02099 /* Add a single character to the RECharSet */
02100 static void
02101 AddCharacterToCharSet(RECharSet *cs, WCHAR c)
02102 {
02103     UINT byteIndex = (UINT)(c >> 3);
02104     assert(c <= cs->length);
02105     cs->u.bits[byteIndex] |= 1 << (c & 0x7);
02106 }
02107 
02108 
02109 /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
02110 static void
02111 AddCharacterRangeToCharSet(RECharSet *cs, UINT c1, UINT c2)
02112 {
02113     UINT i;
02114 
02115     UINT byteIndex1 = c1 >> 3;
02116     UINT byteIndex2 = c2 >> 3;
02117 
02118     assert(c2 <= cs->length && c1 <= c2);
02119 
02120     c1 &= 0x7;
02121     c2 &= 0x7;
02122 
02123     if (byteIndex1 == byteIndex2) {
02124         cs->u.bits[byteIndex1] |= ((BYTE)0xFF >> (7 - (c2 - c1))) << c1;
02125     } else {
02126         cs->u.bits[byteIndex1] |= 0xFF << c1;
02127         for (i = byteIndex1 + 1; i < byteIndex2; i++)
02128             cs->u.bits[i] = 0xFF;
02129         cs->u.bits[byteIndex2] |= (BYTE)0xFF >> (7 - c2);
02130     }
02131 }
02132 
02133 /* Compile the source of the class into a RECharSet */
02134 static BOOL
02135 ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
02136 {
02137     const WCHAR *src, *end;
02138     BOOL inRange = FALSE;
02139     WCHAR rangeStart = 0;
02140     UINT byteLength, n;
02141     WCHAR c, thisCh;
02142     INT nDigits, i;
02143 
02144     assert(!charSet->converted);
02145     /*
02146      * Assert that startIndex and length points to chars inside [] inside
02147      * source string.
02148      */
02149     assert(1 <= charSet->u.src.startIndex);
02150     assert(charSet->u.src.startIndex
02151               < SysStringLen(gData->regexp->source));
02152     assert(charSet->u.src.length <= SysStringLen(gData->regexp->source)
02153                                        - 1 - charSet->u.src.startIndex);
02154 
02155     charSet->converted = TRUE;
02156     src = gData->regexp->source + charSet->u.src.startIndex;
02157 
02158     end = src + charSet->u.src.length;
02159 
02160     assert(src[-1] == '[' && end[0] == ']');
02161 
02162     byteLength = (charSet->length >> 3) + 1;
02163     charSet->u.bits = heap_alloc(byteLength);
02164     if (!charSet->u.bits) {
02165         JS_ReportOutOfMemory(gData->cx);
02166         gData->ok = FALSE;
02167         return FALSE;
02168     }
02169     memset(charSet->u.bits, 0, byteLength);
02170 
02171     if (src == end)
02172         return TRUE;
02173 
02174     if (*src == '^') {
02175         assert(charSet->sense == FALSE);
02176         ++src;
02177     } else {
02178         assert(charSet->sense == TRUE);
02179     }
02180 
02181     while (src != end) {
02182         switch (*src) {
02183           case '\\':
02184             ++src;
02185             c = *src++;
02186             switch (c) {
02187               case 'b':
02188                 thisCh = 0x8;
02189                 break;
02190               case 'f':
02191                 thisCh = 0xC;
02192                 break;
02193               case 'n':
02194                 thisCh = 0xA;
02195                 break;
02196               case 'r':
02197                 thisCh = 0xD;
02198                 break;
02199               case 't':
02200                 thisCh = 0x9;
02201                 break;
02202               case 'v':
02203                 thisCh = 0xB;
02204                 break;
02205               case 'c':
02206                 if (src < end && JS_ISWORD(*src)) {
02207                     thisCh = (WCHAR)(*src++ & 0x1F);
02208                 } else {
02209                     --src;
02210                     thisCh = '\\';
02211                 }
02212                 break;
02213               case 'x':
02214                 nDigits = 2;
02215                 goto lexHex;
02216               case 'u':
02217                 nDigits = 4;
02218             lexHex:
02219                 n = 0;
02220                 for (i = 0; (i < nDigits) && (src < end); i++) {
02221                     UINT digit;
02222                     c = *src++;
02223                     if (!isASCIIHexDigit(c, &digit)) {
02224                         /*
02225                          * Back off to accepting the original '\'
02226                          * as a literal
02227                          */
02228                         src -= i + 1;
02229                         n = '\\';
02230                         break;
02231                     }
02232                     n = (n << 4) | digit;
02233                 }
02234                 thisCh = (WCHAR)n;
02235                 break;
02236               case '0':
02237               case '1':
02238               case '2':
02239               case '3':
02240               case '4':
02241               case '5':
02242               case '6':
02243               case '7':
02244                 /*
02245                  *  This is a non-ECMA extension - decimal escapes (in this
02246                  *  case, octal!) are supposed to be an error inside class
02247                  *  ranges, but supported here for backwards compatibility.
02248                  */
02249                 n = JS7_UNDEC(c);
02250                 c = *src;
02251                 if ('0' <= c && c <= '7') {
02252                     src++;
02253                     n = 8 * n + JS7_UNDEC(c);
02254                     c = *src;
02255                     if ('0' <= c && c <= '7') {
02256                         src++;
02257                         i = 8 * n + JS7_UNDEC(c);
02258                         if (i <= 0377)
02259                             n = i;
02260                         else
02261                             src--;
02262                     }
02263                 }
02264                 thisCh = (WCHAR)n;
02265                 break;
02266 
02267               case 'd':
02268                 AddCharacterRangeToCharSet(charSet, '0', '9');
02269                 continue;   /* don't need range processing */
02270               case 'D':
02271                 AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
02272                 AddCharacterRangeToCharSet(charSet,
02273                                            (WCHAR)('9' + 1),
02274                                            (WCHAR)charSet->length);
02275                 continue;
02276               case 's':
02277                 for (i = (INT)charSet->length; i >= 0; i--)
02278                     if (isspaceW(i))
02279                         AddCharacterToCharSet(charSet, (WCHAR)i);
02280                 continue;
02281               case 'S':
02282                 for (i = (INT)charSet->length; i >= 0; i--)
02283                     if (!isspaceW(i))
02284                         AddCharacterToCharSet(charSet, (WCHAR)i);
02285                 continue;
02286               case 'w':
02287                 for (i = (INT)charSet->length; i >= 0; i--)
02288                     if (JS_ISWORD(i))
02289                         AddCharacterToCharSet(charSet, (WCHAR)i);
02290                 continue;
02291               case 'W':
02292                 for (i = (INT)charSet->length; i >= 0; i--)
02293                     if (!JS_ISWORD(i))
02294                         AddCharacterToCharSet(charSet, (WCHAR)i);
02295                 continue;
02296               default:
02297                 thisCh = c;
02298                 break;
02299 
02300             }
02301             break;
02302 
02303           default:
02304             thisCh = *src++;
02305             break;
02306 
02307         }
02308         if (inRange) {
02309             if (gData->regexp->flags & JSREG_FOLD) {
02310                 int i;
02311 
02312                 assert(rangeStart <= thisCh);
02313                 for (i = rangeStart; i <= thisCh; i++) {
02314                     WCHAR uch, dch;
02315 
02316                     AddCharacterToCharSet(charSet, i);
02317                     uch = toupperW(i);
02318                     dch = tolowerW(i);
02319                     if (i != uch)
02320                         AddCharacterToCharSet(charSet, uch);
02321                     if (i != dch)
02322                         AddCharacterToCharSet(charSet, dch);
02323                 }
02324             } else {
02325                 AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
02326             }
02327             inRange = FALSE;
02328         } else {
02329             if (gData->regexp->flags & JSREG_FOLD) {
02330                 AddCharacterToCharSet(charSet, toupperW(thisCh));
02331                 AddCharacterToCharSet(charSet, tolowerW(thisCh));
02332             } else {
02333                 AddCharacterToCharSet(charSet, thisCh);
02334             }
02335             if (src < end - 1) {
02336                 if (*src == '-') {
02337                     ++src;
02338                     inRange = TRUE;
02339                     rangeStart = thisCh;
02340                 }
02341             }
02342         }
02343     }
02344     return TRUE;
02345 }
02346 
02347 static BOOL
02348 ReallocStateStack(REGlobalData *gData)
02349 {
02350     size_t limit = gData->stateStackLimit;
02351     size_t sz = sizeof(REProgState) * limit;
02352 
02353     gData->stateStack = jsheap_grow(gData->pool, gData->stateStack, sz, sz);
02354     if (!gData->stateStack) {
02355         js_ReportOutOfScriptQuota(gData->cx);
02356         gData->ok = FALSE;
02357         return FALSE;
02358     }
02359     gData->stateStackLimit = limit + limit;
02360     return TRUE;
02361 }
02362 
02363 #define PUSH_STATE_STACK(data)                                                \
02364     do {                                                                      \
02365         ++(data)->stateStackTop;                                              \
02366         if ((data)->stateStackTop == (data)->stateStackLimit &&               \
02367             !ReallocStateStack((data))) {                                     \
02368             return NULL;                                                      \
02369         }                                                                     \
02370     }while(0)
02371 
02372 /*
02373  * Apply the current op against the given input to see if it's going to match
02374  * or fail. Return false if we don't get a match, true if we do. If updatecp is
02375  * true, then update the current state's cp. Always update startpc to the next
02376  * op.
02377  */
02378 static inline REMatchState *
02379 SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
02380             jsbytecode **startpc, BOOL updatecp)
02381 {
02382     REMatchState *result = NULL;
02383     WCHAR matchCh;
02384     size_t parenIndex;
02385     size_t offset, length, index;
02386     jsbytecode *pc = *startpc;  /* pc has already been incremented past op */
02387     WCHAR *source;
02388     const WCHAR *startcp = x->cp;
02389     WCHAR ch;
02390     RECharSet *charSet;
02391 
02392     const char *opname = reop_names[op];
02393     TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
02394           (int)gData->stateStackTop * 2, "", opname);
02395 
02396     switch (op) {
02397       case REOP_EMPTY:
02398         result = x;
02399         break;
02400       case REOP_BOL:
02401         if (x->cp != gData->cpbegin) {
02402             if (
02403                 !(gData->regexp->flags & JSREG_MULTILINE)) {
02404                 break;
02405             }
02406             if (!RE_IS_LINE_TERM(x->cp[-1]))
02407                 break;
02408         }
02409         result = x;
02410         break;
02411       case REOP_EOL:
02412         if (x->cp != gData->cpend) {
02413             if (
02414                 !(gData->regexp->flags & JSREG_MULTILINE)) {
02415                 break;
02416             }
02417             if (!RE_IS_LINE_TERM(*x->cp))
02418                 break;
02419         }
02420         result = x;
02421         break;
02422       case REOP_WBDRY:
02423         if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
02424             !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
02425             result = x;
02426         }
02427         break;
02428       case REOP_WNONBDRY:
02429         if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
02430             (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
02431             result = x;
02432         }
02433         break;
02434       case REOP_DOT:
02435         if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
02436             result = x;
02437             result->cp++;
02438         }
02439         break;
02440       case REOP_DIGIT:
02441         if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) {
02442             result = x;
02443             result->cp++;
02444         }
02445         break;
02446       case REOP_NONDIGIT:
02447         if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) {
02448             result = x;
02449             result->cp++;
02450         }
02451         break;
02452       case REOP_ALNUM:
02453         if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
02454             result = x;
02455             result->cp++;
02456         }
02457         break;
02458       case REOP_NONALNUM:
02459         if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
02460             result = x;
02461             result->cp++;
02462         }
02463         break;
02464       case REOP_SPACE:
02465         if (x->cp != gData->cpend && isspaceW(*x->cp)) {
02466             result = x;
02467             result->cp++;
02468         }
02469         break;
02470       case REOP_NONSPACE:
02471         if (x->cp != gData->cpend && !isspaceW(*x->cp)) {
02472             result = x;
02473             result->cp++;
02474         }
02475         break;
02476       case REOP_BACKREF:
02477         pc = ReadCompactIndex(pc, &parenIndex);
02478         assert(parenIndex < gData->regexp->parenCount);
02479         result = BackrefMatcher(gData, x, parenIndex);
02480         break;
02481       case REOP_FLAT:
02482         pc = ReadCompactIndex(pc, &offset);
02483         assert(offset < SysStringLen(gData->regexp->source));
02484         pc = ReadCompactIndex(pc, &length);
02485         assert(1 <= length);
02486         assert(length <= SysStringLen(gData->regexp->source) - offset);
02487         if (length <= (size_t)(gData->cpend - x->cp)) {
02488             source = gData->regexp->source + offset;
02489             TRACE("%s\n", debugstr_wn(source, length));
02490             for (index = 0; index != length; index++) {
02491                 if (source[index] != x->cp[index])
02492                     return NULL;
02493             }
02494             x->cp += length;
02495             result = x;
02496         }
02497         break;
02498       case REOP_FLAT1:
02499         matchCh = *pc++;
02500         TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
02501         if (x->cp != gData->cpend && *x->cp == matchCh) {
02502             result = x;
02503             result->cp++;
02504         }
02505         break;
02506       case REOP_FLATi:
02507         pc = ReadCompactIndex(pc, &offset);
02508         assert(offset < SysStringLen(gData->regexp->source));
02509         pc = ReadCompactIndex(pc, &length);
02510         assert(1 <= length);
02511         assert(length <= SysStringLen(gData->regexp->source) - offset);
02512         source = gData->regexp->source;
02513         result = FlatNIMatcher(gData, x, source + offset, length);
02514         break;
02515       case REOP_FLAT1i:
02516         matchCh = *pc++;
02517         if (x->cp != gData->cpend && toupperW(*x->cp) == toupperW(matchCh)) {
02518             result = x;
02519             result->cp++;
02520         }
02521         break;
02522       case REOP_UCFLAT1:
02523         matchCh = GET_ARG(pc);
02524         TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
02525         pc += ARG_LEN;
02526         if (x->cp != gData->cpend && *x->cp == matchCh) {
02527             result = x;
02528             result->cp++;
02529         }
02530         break;
02531       case REOP_UCFLAT1i:
02532         matchCh = GET_ARG(pc);
02533         pc += ARG_LEN;
02534         if (x->cp != gData->cpend && toupperW(*x->cp) == toupperW(matchCh)) {
02535             result = x;
02536             result->cp++;
02537         }
02538         break;
02539       case REOP_CLASS:
02540         pc = ReadCompactIndex(pc, &index);
02541         assert(index < gData->regexp->classCount);
02542         if (x->cp != gData->cpend) {
02543             charSet = &gData->regexp->classList[index];
02544             assert(charSet->converted);
02545             ch = *x->cp;
02546             index = ch >> 3;
02547             if (charSet->length != 0 &&
02548                 ch <= charSet->length &&
02549                 (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
02550                 result = x;
02551                 result->cp++;
02552             }
02553         }
02554         break;
02555       case REOP_NCLASS:
02556         pc = ReadCompactIndex(pc, &index);
02557         assert(index < gData->regexp->classCount);
02558         if (x->cp != gData->cpend) {
02559             charSet = &gData->regexp->classList[index];
02560             assert(charSet->converted);
02561             ch = *x->cp;
02562             index = ch >> 3;
02563             if (charSet->length == 0 ||
02564                 ch > charSet->length ||
02565                 !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
02566                 result = x;
02567                 result->cp++;
02568             }
02569         }
02570         break;
02571 
02572       default:
02573         assert(FALSE);
02574     }
02575     if (result) {
02576         if (!updatecp)
02577             x->cp = startcp;
02578         *startpc = pc;
02579         TRACE(" *\n");
02580         return result;
02581     }
02582     x->cp = startcp;
02583     return NULL;
02584 }
02585 
02586 static inline REMatchState *
02587 ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
02588 {
02589     REMatchState *result = NULL;
02590     REBackTrackData *backTrackData;
02591     jsbytecode *nextpc, *testpc;
02592     REOp nextop;
02593     RECapture *cap;
02594     REProgState *curState;
02595     const WCHAR *startcp;
02596     size_t parenIndex, k;
02597     size_t parenSoFar = 0;
02598 
02599     WCHAR matchCh1, matchCh2;
02600     RECharSet *charSet;
02601 
02602     BOOL anchor;
02603     jsbytecode *pc = gData->regexp->program;
02604     REOp op = (REOp) *pc++;
02605 
02606     /*
02607      * If the first node is a simple match, step the index into the string
02608      * until that match is made, or fail if it can't be found at all.
02609      */
02610     if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & JSREG_STICKY)) {
02611         anchor = FALSE;
02612         while (x->cp <= gData->cpend) {
02613             nextpc = pc;    /* reset back to start each time */
02614             result = SimpleMatch(gData, x, op, &nextpc, TRUE);
02615             if (result) {
02616                 anchor = TRUE;
02617                 x = result;
02618                 pc = nextpc;    /* accept skip to next opcode */
02619                 op = (REOp) *pc++;
02620                 assert(op < REOP_LIMIT);
02621                 break;
02622             }
02623             gData->skipped++;
02624             x->cp++;
02625         }
02626         if (!anchor)
02627             goto bad;
02628     }
02629 
02630     for (;;) {
02631         const char *opname = reop_names[op];
02632         TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
02633               (int)gData->stateStackTop * 2, "", opname);
02634 
02635         if (REOP_IS_SIMPLE(op)) {
02636             result = SimpleMatch(gData, x, op, &pc, TRUE);
02637         } else {
02638             curState = &gData->stateStack[gData->stateStackTop];
02639             switch (op) {
02640               case REOP_END:
02641                 goto good;
02642               case REOP_ALTPREREQ2:
02643                 nextpc = pc + GET_OFFSET(pc);   /* start of next op */
02644                 pc += ARG_LEN;
02645                 matchCh2 = GET_ARG(pc);
02646                 pc += ARG_LEN;
02647                 k = GET_ARG(pc);
02648                 pc += ARG_LEN;
02649 
02650                 if (x->cp != gData->cpend) {
02651                     if (*x->cp == matchCh2)
02652                         goto doAlt;
02653 
02654                     charSet = &gData->regexp->classList[k];
02655                     if (!charSet->converted && !ProcessCharSet(gData, charSet))
02656                         goto bad;
02657                     matchCh1 = *x->cp;
02658                     k = matchCh1 >> 3;
02659                     if ((charSet->length == 0 ||
02660                          matchCh1 > charSet->length ||
02661                          !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
02662                         charSet->sense) {
02663                         goto doAlt;
02664                     }
02665                 }
02666                 result = NULL;
02667                 break;
02668 
02669               case REOP_ALTPREREQ:
02670                 nextpc = pc + GET_OFFSET(pc);   /* start of next op */
02671                 pc += ARG_LEN;
02672                 matchCh1 = GET_ARG(pc);
02673                 pc += ARG_LEN;
02674                 matchCh2 = GET_ARG(pc);
02675                 pc += ARG_LEN;
02676                 if (x->cp == gData->cpend ||
02677                     (*x->cp != matchCh1 && *x->cp != matchCh2)) {
02678                     result = NULL;
02679                     break;
02680                 }
02681                 /* else false thru... */
02682 
02683               case REOP_ALT:
02684               doAlt:
02685                 nextpc = pc + GET_OFFSET(pc);   /* start of next alternate */
02686                 pc += ARG_LEN;                  /* start of this alternate */
02687                 curState->parenSoFar = parenSoFar;
02688                 PUSH_STATE_STACK(gData);
02689                 op = (REOp) *pc++;
02690                 startcp = x->cp;
02691                 if (REOP_IS_SIMPLE(op)) {
02692                     if (!SimpleMatch(gData, x, op, &pc, TRUE)) {
02693                         op = (REOp) *nextpc++;
02694                         pc = nextpc;
02695                         continue;
02696                     }
02697                     result = x;
02698                     op = (REOp) *pc++;
02699                 }
02700                 nextop = (REOp) *nextpc++;
02701                 if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
02702                     goto bad;
02703                 continue;
02704 
02705               /*
02706                * Occurs at (successful) end of REOP_ALT,
02707                */
02708               case REOP_JUMP:
02709                 /*
02710                  * If we have not gotten a result here, it is because of an
02711                  * empty match.  Do the same thing REOP_EMPTY would do.
02712                  */
02713                 if (!result)
02714                     result = x;
02715 
02716                 --gData->stateStackTop;
02717                 pc += GET_OFFSET(pc);
02718                 op = (REOp) *pc++;
02719                 continue;
02720 
02721               /*
02722                * Occurs at last (successful) end of REOP_ALT,
02723                */
02724               case REOP_ENDALT:
02725                 /*
02726                  * If we have not gotten a result here, it is because of an
02727                  * empty match.  Do the same thing REOP_EMPTY would do.
02728                  */
02729                 if (!result)
02730                     result = x;
02731 
02732                 --gData->stateStackTop;
02733                 op = (REOp) *pc++;
02734                 continue;
02735 
02736               case REOP_LPAREN:
02737                 pc = ReadCompactIndex(pc, &parenIndex);
02738                 TRACE("[ %lu ]\n", (ULONG_PTR)parenIndex);
02739                 assert(parenIndex < gData->regexp->parenCount);
02740                 if (parenIndex + 1 > parenSoFar)
02741                     parenSoFar = parenIndex + 1;
02742                 x->parens[parenIndex].index = x->cp - gData->cpbegin;
02743                 x->parens[parenIndex].length = 0;
02744                 op = (REOp) *pc++;
02745                 continue;
02746 
02747               case REOP_RPAREN:
02748               {
02749                 ptrdiff_t delta;
02750 
02751                 pc = ReadCompactIndex(pc, &parenIndex);
02752                 assert(parenIndex < gData->regexp->parenCount);
02753                 cap = &x->parens[parenIndex];
02754                 delta = x->cp - (gData->cpbegin + cap->index);
02755                 cap->length = (delta < 0) ? 0 : (size_t) delta;
02756                 op = (REOp) *pc++;
02757 
02758                 if (!result)
02759                     result = x;
02760                 continue;
02761               }
02762               case REOP_ASSERT:
02763                 nextpc = pc + GET_OFFSET(pc);  /* start of term after ASSERT */
02764                 pc += ARG_LEN;                 /* start of ASSERT child */
02765                 op = (REOp) *pc++;
02766                 testpc = pc;
02767                 if (REOP_IS_SIMPLE(op) &&
02768                     !SimpleMatch(gData, x, op, &testpc, FALSE)) {
02769                     result = NULL;
02770                     break;
02771                 }
02772                 curState->u.assertion.top =
02773                     (char *)gData->backTrackSP - (char *)gData->backTrackStack;
02774                 curState->u.assertion.sz = gData->cursz;
02775                 curState->index = x->cp - gData->cpbegin;
02776                 curState->parenSoFar = parenSoFar;
02777                 PUSH_STATE_STACK(gData);
02778                 if (!PushBackTrackState(gData, REOP_ASSERTTEST,
02779                                         nextpc, x, x->cp, 0, 0)) {
02780                     goto bad;
02781                 }
02782                 continue;
02783 
02784               case REOP_ASSERT_NOT:
02785                 nextpc = pc + GET_OFFSET(pc);
02786                 pc += ARG_LEN;
02787                 op = (REOp) *pc++;
02788                 testpc = pc;
02789                 if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
02790                     SimpleMatch(gData, x, op, &testpc, FALSE) &&
02791                     *testpc == REOP_ASSERTNOTTEST) {
02792                     result = NULL;
02793                     break;
02794                 }
02795                 curState->u.assertion.top
02796                     = (char *)gData->backTrackSP -
02797                       (char *)gData->backTrackStack;
02798                 curState->u.assertion.sz = gData->cursz;
02799                 curState->index = x->cp - gData->cpbegin;
02800                 curState->parenSoFar = parenSoFar;
02801                 PUSH_STATE_STACK(gData);
02802                 if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST,
02803                                         nextpc, x, x->cp, 0, 0)) {
02804                     goto bad;
02805                 }
02806                 continue;
02807 
02808               case REOP_ASSERTTEST:
02809                 --gData->stateStackTop;
02810                 --curState;
02811                 x->cp = gData->cpbegin + curState->index;
02812                 gData->backTrackSP =
02813                     (REBackTrackData *) ((char *)gData->backTrackStack +
02814                                          curState->u.assertion.top);
02815                 gData->cursz = curState->u.assertion.sz;
02816                 if (result)
02817                     result = x;
02818                 break;
02819 
02820               case REOP_ASSERTNOTTEST:
02821                 --gData->stateStackTop;
02822                 --curState;
02823                 x->cp = gData->cpbegin + curState->index;
02824                 gData->backTrackSP =
02825                     (REBackTrackData *) ((char *)gData->backTrackStack +
02826                                          curState->u.assertion.top);
02827                 gData->cursz = curState->u.assertion.sz;
02828                 result = (!result) ? x : NULL;
02829                 break;
02830               case REOP_STAR:
02831                 curState->u.quantifier.min = 0;
02832                 curState->u.quantifier.max = (UINT)-1;
02833                 goto quantcommon;
02834               case REOP_PLUS:
02835                 curState->u.quantifier.min = 1;
02836                 curState->u.quantifier.max = (UINT)-1;
02837                 goto quantcommon;
02838               case REOP_OPT:
02839                 curState->u.quantifier.min = 0;
02840                 curState->u.quantifier.max = 1;
02841                 goto quantcommon;
02842               case REOP_QUANT:
02843                 pc = ReadCompactIndex(pc, &k);
02844                 curState->u.quantifier.min = k;
02845                 pc = ReadCompactIndex(pc, &k);
02846                 /* max is k - 1 to use one byte for (UINT)-1 sentinel. */
02847                 curState->u.quantifier.max = k - 1;
02848                 assert(curState->u.quantifier.min <= curState->u.quantifier.max);
02849               quantcommon:
02850                 if (curState->u.quantifier.max == 0) {
02851                     pc = pc + GET_OFFSET(pc);
02852                     op = (REOp) *pc++;
02853                     result = x;
02854                     continue;
02855                 }
02856                 /* Step over <next> */
02857                 nextpc = pc + ARG_LEN;
02858                 op = (REOp) *nextpc++;
02859                 startcp = x->cp;
02860                 if (REOP_IS_SIMPLE(op)) {
02861                     if (!SimpleMatch(gData, x, op, &nextpc, TRUE)) {
02862                         if (curState->u.quantifier.min == 0)
02863                             result = x;
02864                         else
02865                             result = NULL;
02866                         pc = pc + GET_OFFSET(pc);
02867                         break;
02868                     }
02869                     op = (REOp) *nextpc++;
02870                     result = x;
02871                 }
02872                 curState->index = startcp - gData->cpbegin;
02873                 curState->continue_op = REOP_REPEAT;
02874                 curState->continue_pc = pc;
02875                 curState->parenSoFar = parenSoFar;
02876                 PUSH_STATE_STACK(gData);
02877                 if (curState->u.quantifier.min == 0 &&
02878                     !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
02879                                         0, 0)) {
02880                     goto bad;
02881                 }
02882                 pc = nextpc;
02883                 continue;
02884 
02885               case REOP_ENDCHILD: /* marks the end of a quantifier child */
02886                 pc = curState[-1].continue_pc;
02887                 op = (REOp) curState[-1].continue_op;
02888 
02889                 if (!result)
02890                     result = x;
02891                 continue;
02892 
02893               case REOP_REPEAT:
02894                 --curState;
02895                 do {
02896                     --gData->stateStackTop;
02897                     if (!result) {
02898                         /* Failed, see if we have enough children. */
02899                         if (curState->u.quantifier.min == 0)
02900                             goto repeatDone;
02901                         goto break_switch;
02902                     }
02903                     if (curState->u.quantifier.min == 0 &&
02904                         x->cp == gData->cpbegin + curState->index) {
02905                         /* matched an empty string, that'll get us nowhere */
02906                         result = NULL;
02907                         goto break_switch;
02908                     }
02909                     if (curState->u.quantifier.min != 0)
02910                         curState->u.quantifier.min--;
02911                     if (curState->u.quantifier.max != (UINT) -1)
02912                         curState->u.quantifier.max--;
02913                     if (curState->u.quantifier.max == 0)
02914                         goto repeatDone;
02915                     nextpc = pc + ARG_LEN;
02916                     nextop = (REOp) *nextpc;
02917                     startcp = x->cp;
02918                     if (REOP_IS_SIMPLE(nextop)) {
02919                         nextpc++;
02920                         if (!SimpleMatch(gData, x, nextop, &nextpc, TRUE)) {
02921                             if (curState->u.quantifier.min == 0)
02922                                 goto repeatDone;
02923                             result = NULL;
02924                             goto break_switch;
02925                         }
02926                         result = x;
02927                     }
02928                     curState->index = startcp - gData->cpbegin;
02929                     PUSH_STATE_STACK(gData);
02930                     if (curState->u.quantifier.min == 0 &&
02931                         !PushBackTrackState(gData, REOP_REPEAT,
02932                                             pc, x, startcp,
02933                                             curState->parenSoFar,
02934                                             parenSoFar -
02935                                             curState->parenSoFar)) {
02936                         goto bad;
02937                     }
02938                 } while (*nextpc == REOP_ENDCHILD);
02939                 pc = nextpc;
02940                 op = (REOp) *pc++;
02941                 parenSoFar = curState->parenSoFar;
02942                 continue;
02943 
02944               repeatDone:
02945                 result = x;
02946                 pc += GET_OFFSET(pc);
02947                 goto break_switch;
02948 
02949               case REOP_MINIMALSTAR:
02950                 curState->u.quantifier.min = 0;
02951                 curState->u.quantifier.max = (UINT)-1;
02952                 goto minimalquantcommon;
02953               case REOP_MINIMALPLUS:
02954                 curState->u.quantifier.min = 1;
02955                 curState->u.quantifier.max = (UINT)-1;
02956                 goto minimalquantcommon;
02957               case REOP_MINIMALOPT:
02958                 curState->u.quantifier.min = 0;
02959                 curState->u.quantifier.max = 1;
02960                 goto minimalquantcommon;
02961               case REOP_MINIMALQUANT:
02962                 pc = ReadCompactIndex(pc, &k);
02963                 curState->u.quantifier.min = k;
02964                 pc = ReadCompactIndex(pc, &k);
02965                 /* See REOP_QUANT comments about k - 1. */
02966                 curState->u.quantifier.max = k - 1;
02967                 assert(curState->u.quantifier.min
02968                           <= curState->u.quantifier.max);
02969               minimalquantcommon:
02970                 curState->index = x->cp - gData->cpbegin;
02971                 curState->parenSoFar = parenSoFar;
02972                 PUSH_STATE_STACK(gData);
02973                 if (curState->u.quantifier.min != 0) {
02974                     curState->continue_op = REOP_MINIMALREPEAT;
02975                     curState->continue_pc = pc;
02976                     /* step over <next> */
02977                     pc += OFFSET_LEN;
02978                     op = (REOp) *pc++;
02979                 } else {
02980                     if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
02981                                             pc, x, x->cp, 0, 0)) {
02982                         goto bad;
02983                     }
02984                     --gData->stateStackTop;
02985                     pc = pc + GET_OFFSET(pc);
02986                     op = (REOp) *pc++;
02987                 }
02988                 continue;
02989 
02990               case REOP_MINIMALREPEAT:
02991                 --gData->stateStackTop;
02992                 --curState;
02993 
02994                 TRACE("{%d,%d}\n", curState->u.quantifier.min, curState->u.quantifier.max);
02995 #define PREPARE_REPEAT()                                                      \
02996     do {                                                                      \
02997         curState->index = x->cp - gData->cpbegin;                             \
02998         curState->continue_op = REOP_MINIMALREPEAT;                           \
02999         curState->continue_pc = pc;                                           \
03000         pc += ARG_LEN;                                                        \
03001         for (k = curState->parenSoFar; k < parenSoFar; k++)                   \
03002             x->parens[k].index = -1;                                          \
03003         PUSH_STATE_STACK(gData);                                              \
03004         op = (REOp) *pc++;                                                    \
03005         assert(op < REOP_LIMIT);                                              \
03006     }while(0)
03007 
03008                 if (!result) {
03009                     TRACE(" -\n");
03010                     /*
03011                      * Non-greedy failure - try to consume another child.
03012                      */
03013                     if (curState->u.quantifier.max == (UINT) -1 ||
03014                         curState->u.quantifier.max > 0) {
03015                         PREPARE_REPEAT();
03016                         continue;
03017                     }
03018                     /* Don't need to adjust pc since we're going to pop. */
03019                     break;
03020                 }
03021                 if (curState->u.quantifier.min == 0 &&
03022                     x->cp == gData->cpbegin + curState->index) {
03023                     /* Matched an empty string, that'll get us nowhere. */
03024                     result = NULL;
03025                     break;
03026                 }
03027                 if (curState->u.quantifier.min != 0)
03028                     curState->u.quantifier.min--;
03029                 if (curState->u.quantifier.max != (UINT) -1)
03030                     curState->u.quantifier.max--;
03031                 if (curState->u.quantifier.min != 0) {
03032                     PREPARE_REPEAT();
03033                     continue;
03034                 }
03035                 curState->index = x->cp - gData->cpbegin;
03036                 curState->parenSoFar = parenSoFar;
03037                 PUSH_STATE_STACK(gData);
03038                 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
03039                                         pc, x, x->cp,
03040                                         curState->parenSoFar,
03041                                         parenSoFar - curState->parenSoFar)) {
03042                     goto bad;
03043                 }
03044                 --gData->stateStackTop;
03045                 pc = pc + GET_OFFSET(pc);
03046                 op = (REOp) *pc++;
03047                 assert(op < REOP_LIMIT);
03048                 continue;
03049               default:
03050                 assert(FALSE);
03051                 result = NULL;
03052             }
03053           break_switch:;
03054         }
03055 
03056         /*
03057          *  If the match failed and there's a backtrack option, take it.
03058          *  Otherwise this is a complete and utter failure.
03059          */
03060         if (!result) {
03061             if (gData->cursz == 0)
03062                 return NULL;
03063 
03064             /* Potentially detect explosive regex here. */
03065             gData->backTrackCount++;
03066             if (gData->backTrackLimit &&
03067                 gData->backTrackCount >= gData->backTrackLimit) {
03068                 JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL,
03069                                      JSMSG_REGEXP_TOO_COMPLEX);
03070                 gData->ok = FALSE;
03071                 return NULL;
03072             }
03073 
03074             backTrackData = gData->backTrackSP;
03075             gData->cursz = backTrackData->sz;
03076             gData->backTrackSP =
03077                 (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
03078             x->cp = backTrackData->cp;
03079             pc = backTrackData->backtrack_pc;
03080             op = (REOp) backTrackData->backtrack_op;
03081             assert(op < REOP_LIMIT);
03082             gData->stateStackTop = backTrackData->saveStateStackTop;
03083             assert(gData->stateStackTop);
03084 
03085             memcpy(gData->stateStack, backTrackData + 1,
03086                    sizeof(REProgState) * backTrackData->saveStateStackTop);
03087             curState = &gData->stateStack[gData->stateStackTop - 1];
03088 
03089             if (backTrackData->parenCount) {
03090                 memcpy(&x->parens[backTrackData->parenIndex],
03091                        (char *)(backTrackData + 1) +
03092                        sizeof(REProgState) * backTrackData->saveStateStackTop,
03093                        sizeof(RECapture) * backTrackData->parenCount);
03094                 parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
03095             } else {
03096                 for (k = curState->parenSoFar; k < parenSoFar; k++)
03097                     x->parens[k].index = -1;
03098                 parenSoFar = curState->parenSoFar;
03099             }
03100 
03101             TRACE("\tBT_Pop: %ld,%ld\n",
03102                      (ULONG_PTR)backTrackData->parenIndex,
03103                      (ULONG_PTR)backTrackData->parenCount);
03104             continue;
03105         }
03106         x = result;
03107 
03108         /*
03109          *  Continue with the expression.
03110          */
03111         op = (REOp)*pc++;
03112         assert(op < REOP_LIMIT);
03113     }
03114 
03115 bad:
03116     TRACE("\n");
03117     return NULL;
03118 
03119 good:
03120     TRACE("\n");
03121     return x;
03122 }
03123 
03124 static REMatchState *MatchRegExp(REGlobalData *gData, REMatchState *x)
03125 {
03126     REMatchState *result;
03127     const WCHAR *cp = x->cp;
03128     const WCHAR *cp2;
03129     UINT j;
03130 
03131     /*
03132      * Have to include the position beyond the last character
03133      * in order to detect end-of-input/line condition.
03134      */
03135     for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
03136         gData->skipped = cp2 - cp;
03137         x->cp = cp2;
03138         for (j = 0; j < gData->regexp->parenCount; j++)
03139             x->parens[j].index = -1;
03140         result = ExecuteREBytecode(gData, x);
03141         if (!gData->ok || result || (gData->regexp->flags & JSREG_STICKY))
03142             return result;
03143         gData->backTrackSP = gData->backTrackStack;
03144         gData->cursz = 0;
03145         gData->stateStackTop = 0;
03146         cp2 = cp + gData->skipped;
03147     }
03148     return NULL;
03149 }
03150 
03151 #define MIN_BACKTRACK_LIMIT 400000
03152 
03153 static REMatchState *InitMatch(script_ctx_t *cx, REGlobalData *gData, JSRegExp *re, size_t length)
03154 {
03155     REMatchState *result;
03156     UINT i;
03157 
03158     gData->backTrackStackSize = INITIAL_BACKTRACK;
03159     gData->backTrackStack = jsheap_alloc(gData->pool, INITIAL_BACKTRACK);
03160     if (!gData->backTrackStack)
03161         goto bad;
03162 
03163     gData->backTrackSP = gData->backTrackStack;
03164     gData->cursz = 0;
03165     gData->backTrackCount = 0;
03166     gData->backTrackLimit = 0;
03167 
03168     gData->stateStackLimit = INITIAL_STATESTACK;
03169     gData->stateStack = jsheap_alloc(gData->pool, sizeof(REProgState) * INITIAL_STATESTACK);
03170     if (!gData->stateStack)
03171         goto bad;
03172 
03173     gData->stateStackTop = 0;
03174     gData->cx = cx;
03175     gData->regexp = re;
03176     gData->ok = TRUE;
03177 
03178     result = jsheap_alloc(gData->pool, offsetof(REMatchState, parens) + re->parenCount * sizeof(RECapture));
03179     if (!result)
03180         goto bad;
03181 
03182     for (i = 0; i < re->classCount; i++) {
03183         if (!re->classList[i].converted &&
03184             !ProcessCharSet(gData, &re->classList[i])) {
03185             return NULL;
03186         }
03187     }
03188 
03189     return result;
03190 
03191 bad:
03192     js_ReportOutOfScriptQuota(cx);
03193     gData->ok = FALSE;
03194     return NULL;
03195 }
03196 
03197 static void
03198 js_DestroyRegExp(JSRegExp *re)
03199 {
03200     if (re->classList) {
03201         UINT i;
03202         for (i = 0; i < re->classCount; i++) {
03203             if (re->classList[i].converted)
03204                 heap_free(re->classList[i].u.bits);
03205             re->classList[i].u.bits = NULL;
03206         }
03207         heap_free(re->classList);
03208     }
03209     heap_free(re);
03210 }
03211 
03212 static JSRegExp *
03213 js_NewRegExp(script_ctx_t *cx, BSTR str, UINT flags, BOOL flat)
03214 {
03215     JSRegExp *re;
03216     jsheap_t *mark;
03217     CompilerState state;
03218     size_t resize;
03219     jsbytecode *endPC;
03220     UINT i;
03221     size_t len;
03222 
03223     re = NULL;
03224     mark = jsheap_mark(&cx->tmp_heap);
03225     len = SysStringLen(str);
03226 
03227     state.context = cx;
03228     state.cp = str;
03229     if (!state.cp)
03230         goto out;
03231     state.cpbegin = state.cp;
03232     state.cpend = state.cp + len;
03233     state.flags = flags;
03234     state.parenCount = 0;
03235     state.classCount = 0;
03236     state.progLength = 0;
03237     state.treeDepth = 0;
03238     state.classBitmapsMem = 0;
03239     for (i = 0; i < CLASS_CACHE_SIZE; i++)
03240         state.classCache[i].start = NULL;
03241 
03242     if (len != 0 && flat) {
03243         state.result = NewRENode(&state, REOP_FLAT);
03244         if (!state.result)
03245             goto out;
03246         state.result->u.flat.chr = *state.cpbegin;
03247         state.result->u.flat.length = len;
03248         state.result->kid = (void *) state.cpbegin;
03249         /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
03250         state.progLength += 1 + GetCompactIndexWidth(0)
03251                           + GetCompactIndexWidth(len);
03252     } else {
03253         if (!ParseRegExp(&state))
03254             goto out;
03255     }
03256     resize = offsetof(JSRegExp, program) + state.progLength + 1;
03257     re = heap_alloc(resize);
03258     if (!re)
03259         goto out;
03260 
03261     assert(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
03262     re->classCount = state.classCount;
03263     if (re->classCount) {
03264         re->classList = heap_alloc(re->classCount * sizeof(RECharSet));
03265         if (!re->classList) {
03266             js_DestroyRegExp(re);
03267             re = NULL;
03268             goto out;
03269         }
03270         for (i = 0; i < re->classCount; i++)
03271             re->classList[i].converted = FALSE;
03272     } else {
03273         re->classList = NULL;
03274     }
03275     endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
03276     if (!endPC) {
03277         js_DestroyRegExp(re);
03278         re = NULL;
03279         goto out;
03280     }
03281     *endPC++ = REOP_END;
03282     /*
03283      * Check whether size was overestimated and shrink using realloc.
03284      * This is safe since no pointers to newly parsed regexp or its parts
03285      * besides re exist here.
03286      */
03287     if ((size_t)(endPC - re->program) != state.progLength + 1) {
03288         JSRegExp *tmp;
03289         assert((size_t)(endPC - re->program) < state.progLength + 1);
03290         resize = offsetof(JSRegExp, program) + (endPC - re->program);
03291         tmp = heap_realloc(re, resize);
03292         if (tmp)
03293             re = tmp;
03294     }
03295 
03296     re->flags = flags;
03297     re->parenCount = state.parenCount;
03298     re->source = str;
03299 
03300 out:
03301     jsheap_clear(mark);
03302     return re;
03303 }
03304 
03305 static inline RegExpInstance *regexp_from_vdisp(vdisp_t *vdisp)
03306 {
03307     return (RegExpInstance*)vdisp->u.jsdisp;
03308 }
03309 
03310 static void set_last_index(RegExpInstance *This, DWORD last_index)
03311 {
03312     This->last_index = last_index;
03313     VariantClear(&This->last_index_var);
03314     num_set_val(&This->last_index_var, last_index);
03315 }
03316 
03317 static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, DWORD rem_flags,
03318         const WCHAR *str, DWORD len, const WCHAR **cp, match_result_t **parens, DWORD *parens_size,
03319         DWORD *parens_cnt, match_result_t *ret)
03320 {
03321     REMatchState *x, *result;
03322     REGlobalData gData;
03323     DWORD matchlen;
03324 
03325     gData.cpbegin = *cp;
03326     gData.cpend = str + len;
03327     gData.start = *cp-str;
03328     gData.skipped = 0;
03329     gData.pool = &ctx->tmp_heap;
03330 
03331     x = InitMatch(NULL, &gData, regexp->jsregexp, gData.cpend - gData.cpbegin);
03332     if(!x) {
03333         WARN("InitMatch failed\n");
03334         return E_FAIL;
03335     }
03336 
03337     x->cp = *cp;
03338     result = MatchRegExp(&gData, x);
03339     if(!gData.ok) {
03340         WARN("MatchRegExp failed\n");
03341         return E_FAIL;
03342     }
03343 
03344     if(!result) {
03345         if(rem_flags & REM_RESET_INDEX)
03346             set_last_index(regexp, 0);
03347         return S_FALSE;
03348     }
03349 
03350     if(parens) {
03351         if(regexp->jsregexp->parenCount > *parens_size) {
03352             match_result_t *new_parens;
03353 
03354             if(*parens)
03355                 new_parens = heap_realloc(*parens, sizeof(match_result_t)*regexp->jsregexp->parenCount);
03356             else
03357                 new_parens = heap_alloc(sizeof(match_result_t)*regexp->jsregexp->parenCount);
03358             if(!new_parens)
03359                 return E_OUTOFMEMORY;
03360 
03361             *parens = new_parens;
03362         }
03363     }
03364 
03365     /* FIXME: We often already have a copy of input string that we could use to store last match */
03366     if(!(rem_flags & REM_NO_CTX_UPDATE) &&
03367        (!ctx->last_match || len != SysStringLen(ctx->last_match) || strncmpW(ctx->last_match, str, len))) {
03368         BSTR last_match;
03369 
03370         last_match = SysAllocStringLen(str, len);
03371         if(!last_match)
03372             return E_OUTOFMEMORY;
03373         SysFreeString(ctx->last_match);
03374         ctx->last_match = last_match;
03375     }
03376 
03377     if(parens) {
03378         DWORD i;
03379 
03380         *parens_cnt = regexp->jsregexp->parenCount;
03381 
03382         for(i=0; i < regexp->jsregexp->parenCount; i++) {
03383             if(result->parens[i].index == -1) {
03384                 (*parens)[i].str = NULL;
03385                 (*parens)[i].len = 0;
03386             }else {
03387                 (*parens)[i].str = *cp + result->parens[i].index;
03388                 (*parens)[i].len = result->parens[i].length;
03389             }
03390         }
03391     }
03392 
03393     matchlen = (result->cp-*cp) - gData.skipped;
03394     *cp = result->cp;
03395     ret->str = result->cp-matchlen;
03396     ret->len = matchlen;
03397     set_last_index(regexp, result->cp-str);
03398 
03399     if(!(rem_flags & REM_NO_CTX_UPDATE)) {
03400         ctx->last_match_index = ret->str-str;
03401         ctx->last_match_length = matchlen;
03402     }
03403 
03404     return S_OK;
03405 }
03406 
03407 HRESULT regexp_match_next(script_ctx_t *ctx, DispatchEx *dispex, DWORD rem_flags, const WCHAR *str,
03408         DWORD len, const WCHAR **cp, match_result_t **parens, DWORD *parens_size, DWORD *parens_cnt,
03409         match_result_t *ret)
03410 {
03411     RegExpInstance *regexp = (RegExpInstance*)dispex;
03412     jsheap_t *mark;
03413     HRESULT hres;
03414 
03415     if((rem_flags & REM_CHECK_GLOBAL) && !(regexp->jsregexp->flags & JSREG_GLOB))
03416         return S_FALSE;
03417 
03418     mark = jsheap_mark(&ctx->tmp_heap);
03419 
03420     hres = do_regexp_match_next(ctx, regexp, rem_flags, str, len, cp, parens, parens_size, parens_cnt, ret);
03421 
03422     jsheap_clear(mark);
03423     return hres;
03424 }
03425 
03426 HRESULT regexp_match(script_ctx_t *ctx, DispatchEx *dispex, const WCHAR *str, DWORD len, BOOL gflag,
03427         match_result_t **match_result, DWORD *result_cnt)
03428 {
03429     RegExpInstance *This = (RegExpInstance*)dispex;
03430     match_result_t *ret = NULL, cres;
03431     const WCHAR *cp = str;
03432     DWORD i=0, ret_size = 0;
03433     jsheap_t *mark;
03434     HRESULT hres;
03435 
03436     mark = jsheap_mark(&ctx->tmp_heap);
03437 
03438     while(1) {
03439         hres = do_regexp_match_next(ctx, This, 0, str, len, &cp, NULL, NULL, NULL, &cres);
03440         if(hres == S_FALSE) {
03441             hres = S_OK;
03442             break;
03443         }
03444 
03445         if(FAILED(hres))
03446             break;
03447 
03448         if(ret_size == i) {
03449             if(ret)
03450                 ret = heap_realloc(ret, (ret_size <<= 1) * sizeof(match_result_t));
03451             else
03452                 ret = heap_alloc((ret_size=4) * sizeof(match_result_t));
03453             if(!ret) {
03454                 hres = E_OUTOFMEMORY;
03455                 break;
03456             }
03457         }
03458 
03459         ret[i++] = cres;
03460 
03461         if(!gflag && !(This->jsregexp->flags & JSREG_GLOB)) {
03462             hres = S_OK;
03463             break;
03464         }
03465     }
03466 
03467     jsheap_clear(mark);
03468     if(FAILED(hres)) {
03469         heap_free(ret);
03470         return hres;
03471     }
03472 
03473     *match_result = ret;
03474     *result_cnt = i;
03475     return S_OK;
03476 }
03477 
03478 static HRESULT RegExp_source(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
03479         VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
03480 {
03481     TRACE("\n");
03482 
03483     switch(flags) {
03484     case DISPATCH_PROPERTYGET: {
03485         RegExpInstance *This = regexp_from_vdisp(jsthis);
03486 
03487         V_VT(retv) = VT_BSTR;
03488         V_BSTR(retv) = SysAllocString(This->str);
03489         if(!V_BSTR(retv))
03490             return E_OUTOFMEMORY;
03491         break;
03492     }
03493     default:
03494         FIXME("Unimplemnted flags %x\n", flags);
03495         return E_NOTIMPL;
03496     }
03497 
03498     return S_OK;
03499 }
03500 
03501 static HRESULT RegExp_global(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
03502         VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
03503 {
03504     FIXME("\n");
03505     return E_NOTIMPL;
03506 }
03507 
03508 static HRESULT RegExp_ignoreCase(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
03509         VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
03510 {
03511     FIXME("\n");
03512     return E_NOTIMPL;
03513 }
03514 
03515 static HRESULT RegExp_multiline(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
03516         VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
03517 {
03518     FIXME("\n");
03519     return E_NOTIMPL;
03520 }
03521 
03522 static INT index_from_var(script_ctx_t *ctx, VARIANT *v)
03523 {
03524     jsexcept_t ei;
03525     VARIANT num;
03526     HRESULT hres;
03527 
03528     memset(&ei, 0, sizeof(ei));
03529     hres = to_number(ctx, v, &ei, &num);
03530     if(FAILED(hres)) { /* FIXME: Move ignoring exceptions to to_promitive */
03531         VariantClear(&ei.var);
03532         return 0;
03533     }
03534 
03535     if(V_VT(&num) == VT_R8) {
03536         DOUBLE d = floor(V_R8(&num));
03537         return (DOUBLE)(INT)d == d ? d : 0;
03538     }
03539 
03540     return V_I4(&num);
03541 }
03542 
03543 static HRESULT RegExp_lastIndex(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
03544         VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
03545 {
03546     TRACE("\n");
03547 
03548     switch(flags) {
03549     case DISPATCH_PROPERTYGET: {
03550         RegExpInstance *regexp = regexp_from_vdisp(jsthis);
03551 
03552         V_VT(retv) = VT_EMPTY;
03553         return VariantCopy(retv, &regexp->last_index_var);
03554     }
03555     case DISPATCH_PROPERTYPUT: {
03556         RegExpInstance *regexp = regexp_from_vdisp(jsthis);
03557         VARIANT *arg;
03558         HRESULT hres;
03559 
03560         arg = get_arg(dp,0);
03561         hres = VariantCopy(&regexp->last_index_var, arg);
03562         if(FAILED(hres))
03563             return hres;
03564 
03565         regexp->last_index = index_from_var(ctx, arg);
03566         break;
03567     }
03568     default:
03569         FIXME("unimplemented flags: %x\n", flags);
03570         return E_NOTIMPL;
03571     }
03572 
03573     return S_OK;
03574 }
03575 
03576 static HRESULT RegExp_toString(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
03577         VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
03578 {
03579     FIXME("\n");
03580     return E_NOTIMPL;
03581 }
03582 
03583 static HRESULT create_match_array(script_ctx_t *ctx, BSTR input, const match_result_t *result,
03584         const match_result_t *parens, DWORD parens_cnt, jsexcept_t *ei, IDispatch **ret)
03585 {
03586     DispatchEx *array;
03587     VARIANT var;
03588     int i;
03589     HRESULT hres = S_OK;
03590 
03591     static const WCHAR indexW[] = {'i','n','d','e','x',0};
03592     static const WCHAR inputW[] = {'i','n','p','u','t',0};
03593     static const WCHAR zeroW[] = {'0',0};
03594 
03595     hres = create_array(ctx, parens_cnt+1, &array);
03596     if(FAILED(hres))
03597         return hres;
03598 
03599     for(i=0; i < parens_cnt; i++) {
03600         V_VT(&var) = VT_BSTR;
03601         V_BSTR(&var) = SysAllocStringLen(parens[i].str, parens[i].len);
03602         if(!V_BSTR(&var)) {
03603             hres = E_OUTOFMEMORY;
03604             break;
03605         }
03606 
03607         hres = jsdisp_propput_idx(array, i+1, &var, ei, NULL/*FIXME*/);
03608         SysFreeString(V_BSTR(&var));
03609         if(FAILED(hres))
03610             break;
03611     }
03612 
03613     while(SUCCEEDED(hres)) {
03614         V_VT(&var) = VT_I4;
03615         V_I4(&var) = result->str-input;
03616         hres = jsdisp_propput_name(array, indexW, &var, ei, NULL/*FIXME*/);
03617         if(FAILED(hres))
03618             break;
03619 
03620         V_VT(&var) = VT_BSTR;
03621         V_BSTR(&var) = input;
03622         hres = jsdisp_propput_name(array, inputW, &var, ei, NULL/*FIXME*/);
03623         if(FAILED(hres))
03624             break;
03625 
03626         V_BSTR(&var) = SysAllocStringLen(result->str, result->len);
03627         if(!V_BSTR(&var)) {
03628             hres = E_OUTOFMEMORY;
03629             break;
03630         }
03631         hres = jsdisp_propput_name(array, zeroW, &var, ei, NULL/*FIXME*/);
03632         SysFreeString(V_BSTR(&var));
03633         break;
03634     }
03635 
03636     if(FAILED(hres)) {
03637         jsdisp_release(array);
03638         return hres;
03639     }
03640 
03641     *ret = (IDispatch*)_IDispatchEx_(array);
03642     return S_OK;
03643 }
03644 
03645 static HRESULT run_exec(script_ctx_t *ctx, vdisp_t *jsthis, VARIANT *arg, jsexcept_t *ei, BSTR *input,
03646         match_result_t *match, match_result_t **parens, DWORD *parens_cnt, VARIANT_BOOL *ret)
03647 {
03648     RegExpInstance *regexp;
03649     DWORD parens_size = 0, last_index = 0, length;
03650     const WCHAR *cp;
03651     BSTR string;
03652     HRESULT hres;
03653 
03654     if(!is_vclass(jsthis, JSCLASS_REGEXP)) {
03655         FIXME("Not a RegExp\n");
03656         return E_NOTIMPL;
03657     }
03658 
03659     regexp = regexp_from_vdisp(jsthis);
03660 
03661     if(arg) {
03662         hres = to_string(ctx, arg, ei, &string);
03663         if(FAILED(hres))
03664             return hres;
03665         length = SysStringLen(string);
03666     }else {
03667         string = NULL;
03668         length = 0;
03669     }
03670 
03671     if(regexp->jsregexp->flags & JSREG_GLOB) {
03672         if(regexp->last_index < 0) {
03673             SysFreeString(string);
03674             set_last_index(regexp, 0);
03675             *ret = VARIANT_FALSE;
03676             if(input) {
03677                 *input = NULL;
03678             }
03679             return S_OK;
03680         }
03681 
03682         last_index = regexp->last_index;
03683     }
03684 
03685     cp = string + last_index;
03686     hres = regexp_match_next(ctx, &regexp->dispex, REM_RESET_INDEX, string, length, &cp, parens,
03687             parens ? &parens_size : NULL, parens_cnt, match);
03688     if(FAILED(hres)) {
03689         SysFreeString(string);
03690         return hres;
03691     }
03692 
03693     *ret = hres == S_OK ? VARIANT_TRUE : VARIANT_FALSE;
03694     if(input) {
03695         *input = string;
03696     }else {
03697         SysFreeString(string);
03698     }
03699     return S_OK;
03700 }
03701 
03702 static HRESULT RegExp_exec(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
03703         VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
03704 {
03705     match_result_t *parens = NULL, match;
03706     DWORD parens_cnt = 0;
03707     VARIANT_BOOL b;
03708     BSTR string;
03709     HRESULT hres;
03710 
03711     TRACE("\n");
03712 
03713     hres = run_exec(ctx, jsthis, arg_cnt(dp) ? get_arg(dp,0) : NULL, ei, &string, &match, &parens, &parens_cnt, &b);
03714     if(FAILED(hres))
03715         return hres;
03716 
03717     if(retv) {
03718         if(b) {
03719             IDispatch *ret;
03720 
03721             hres = create_match_array(ctx, string, &match, parens, parens_cnt, ei, &ret);
03722             if(SUCCEEDED(hres)) {
03723                 V_VT(retv) = VT_DISPATCH;
03724                 V_DISPATCH(retv) = ret;
03725             }
03726         }else {
03727             V_VT(retv) = VT_NULL;
03728         }
03729     }
03730 
03731     heap_free(parens);
03732     SysFreeString(string);
03733     return hres;
03734 }
03735 
03736 static HRESULT RegExp_test(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
03737         VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
03738 {
03739     match_result_t match;
03740     VARIANT undef_var;
03741     VARIANT_BOOL b;
03742     DWORD argc;
03743     HRESULT hres;
03744 
03745     TRACE("\n");
03746 
03747     argc = arg_cnt(dp);
03748     if(!argc) {
03749         V_VT(&undef_var) = VT_BSTR;
03750         V_BSTR(&undef_var) = SysAllocString(undefinedW);
03751         if(!V_BSTR(&undef_var))
03752             return E_OUTOFMEMORY;
03753     }
03754 
03755     hres = run_exec(ctx, jsthis, argc ? get_arg(dp,0) : &undef_var, ei, NULL, &match, NULL, NULL, &b);
03756     if(!argc)
03757         SysFreeString(V_BSTR(&undef_var));
03758     if(FAILED(hres))
03759         return hres;
03760 
03761     if(retv) {
03762         V_VT(retv) = VT_BOOL;
03763         V_BOOL(retv) = b;
03764     }
03765     return S_OK;
03766 }
03767 
03768 static HRESULT RegExp_value(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
03769         VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
03770 {
03771     TRACE("\n");
03772 
03773     switch(flags) {
03774     case INVOKE_FUNC:
03775         return throw_type_error(ctx, ei, IDS_NOT_FUNC, NULL);
03776     default:
03777         FIXME("unimplemented flags %x\n", flags);
03778         return E_NOTIMPL;
03779     }
03780 
03781     return S_OK;
03782 }
03783 
03784 static void RegExp_destructor(DispatchEx *dispex)
03785 {
03786     RegExpInstance *This = (RegExpInstance*)dispex;
03787 
03788     if(This->jsregexp)
03789         js_DestroyRegExp(This->jsregexp);
03790     VariantClear(&This->last_index_var);
03791     SysFreeString(This->str);
03792     heap_free(This);
03793 }
03794 
03795 static const builtin_prop_t RegExp_props[] = {
03796     {execW,                  RegExp_exec,                  PROPF_METHOD|1},
03797     {globalW,                RegExp_global,                0},
03798     {ignoreCaseW,            RegExp_ignoreCase,            0},
03799     {lastIndexW,             RegExp_lastIndex,             0},
03800     {multilineW,             RegExp_multiline,             0},
03801     {sourceW,                RegExp_source,                0},
03802     {testW,                  RegExp_test,                  PROPF_METHOD|1},
03803     {toStringW,              RegExp_toString,              PROPF_METHOD}
03804 };
03805 
03806 static const builtin_info_t RegExp_info = {
03807     JSCLASS_REGEXP,
03808     {NULL, RegExp_value, 0},
03809     sizeof(RegExp_props)/sizeof(*RegExp_props),
03810     RegExp_props,
03811     RegExp_destructor,
03812     NULL
03813 };
03814 
03815 static HRESULT alloc_regexp(script_ctx_t *ctx, DispatchEx *object_prototype, RegExpInstance **ret)
03816 {
03817     RegExpInstance *regexp;
03818     HRESULT hres;
03819 
03820     regexp = heap_alloc_zero(sizeof(RegExpInstance));
03821     if(!regexp)
03822         return E_OUTOFMEMORY;
03823 
03824     if(object_prototype)
03825         hres = init_dispex(&regexp->dispex, ctx, &RegExp_info, object_prototype);
03826     else
03827         hres = init_dispex_from_constr(&regexp->dispex, ctx, &RegExp_info, ctx->regexp_constr);
03828 
03829     if(FAILED(hres)) {
03830         heap_free(regexp);
03831         return hres;
03832     }
03833 
03834     *ret = regexp;
03835     return S_OK;
03836 }
03837 
03838 HRESULT create_regexp(script_ctx_t *ctx, const WCHAR *exp, int len, DWORD flags, DispatchEx **ret)
03839 {
03840     RegExpInstance *regexp;
03841     HRESULT hres;
03842 
03843     TRACE("%s %x\n", debugstr_w(exp), flags);
03844 
03845     hres = alloc_regexp(ctx, NULL, &regexp);
03846     if(FAILED(hres))
03847         return hres;
03848 
03849     if(len == -1)
03850         regexp->str = SysAllocString(exp);
03851     else
03852         regexp->str = SysAllocStringLen(exp, len);
03853     if(!regexp->str) {
03854         jsdisp_release(&regexp->dispex);
03855         return E_OUTOFMEMORY;
03856     }
03857 
03858     regexp->jsregexp = js_NewRegExp(ctx, regexp->str, flags, FALSE);
03859     if(!regexp->jsregexp) {
03860         WARN("js_NewRegExp failed\n");
03861         jsdisp_release(&regexp->dispex);
03862         return E_FAIL;
03863     }
03864 
03865     V_VT(&regexp->last_index_var) = VT_I4;
03866     V_I4(&regexp->last_index_var) = 0;
03867 
03868     *ret = &regexp->dispex;
03869     return S_OK;
03870 }
03871 
03872 HRESULT create_regexp_var(script_ctx_t *ctx, VARIANT *src_arg, VARIANT *flags_arg, DispatchEx **ret)
03873 {
03874     const WCHAR *opt = emptyW, *src;
03875     DWORD flags;
03876     HRESULT hres;
03877 
03878     if(V_VT(src_arg) == VT_DISPATCH) {
03879         DispatchEx *obj;
03880 
03881         obj = iface_to_jsdisp((IUnknown*)V_DISPATCH(src_arg));
03882         if(obj) {
03883             if(is_class(obj, JSCLASS_REGEXP)) {
03884                 RegExpInstance *regexp = (RegExpInstance*)obj;
03885 
03886                 hres = create_regexp(ctx, regexp->str, -1, regexp->jsregexp->flags, ret);
03887                 jsdisp_release(obj);
03888                 return hres;
03889             }
03890 
03891             jsdisp_release(obj);
03892         }
03893     }
03894 
03895     if(V_VT(src_arg) != VT_BSTR) {
03896         FIXME("flags_arg = %s\n", debugstr_variant(flags_arg));
03897         return E_NOTIMPL;
03898     }
03899 
03900     src = V_BSTR(src_arg);
03901 
03902     if(flags_arg) {
03903         if(V_VT(flags_arg) != VT_BSTR) {
03904             FIXME("unimplemented for vt %d\n", V_VT(flags_arg));
03905             return E_NOTIMPL;
03906         }
03907 
03908         opt = V_BSTR(flags_arg);
03909     }
03910 
03911     hres = parse_regexp_flags(opt, strlenW(opt), &flags);
03912     if(FAILED(hres))
03913         return hres;
03914 
03915     return create_regexp(ctx, src, -1, flags, ret);
03916 }
03917 
03918 HRESULT regexp_string_match(script_ctx_t *ctx, DispatchEx *re, BSTR str,
03919         VARIANT *retv, jsexcept_t *ei)
03920 {
03921     RegExpInstance *regexp = (RegExpInstance*)re;
03922     match_result_t *match_result;
03923     DWORD match_cnt, i, length;
03924     DispatchEx *array;
03925     VARIANT var;
03926     HRESULT hres;
03927 
03928     length = SysStringLen(str);
03929 
03930     if(!(regexp->jsregexp->flags & JSREG_GLOB)) {
03931         match_result_t match, *parens = NULL;
03932         DWORD parens_cnt, parens_size = 0;
03933         const WCHAR *cp = str;
03934 
03935         hres = regexp_match_next(ctx, &regexp->dispex, 0, str, length, &cp, &parens, &parens_size, &parens_cnt, &match);
03936         if(FAILED(hres))
03937             return hres;
03938 
03939         if(retv) {
03940             if(hres == S_OK) {
03941                 IDispatch *ret;
03942 
03943                 hres = create_match_array(ctx, str, &match, parens, parens_cnt, ei, &ret);
03944                 if(SUCCEEDED(hres)) {
03945                     V_VT(retv) = VT_DISPATCH;
03946                     V_DISPATCH(retv) = ret;
03947                 }
03948             }else {
03949                 V_VT(retv) = VT_NULL;
03950             }
03951         }
03952 
03953         heap_free(parens);
03954         return S_OK;
03955     }
03956 
03957     hres = regexp_match(ctx, &regexp->dispex, str, length, FALSE, &match_result, &match_cnt);
03958     if(FAILED(hres))
03959         return hres;
03960 
03961     if(!match_cnt) {
03962         TRACE("no match\n");
03963 
03964         if(retv)
03965             V_VT(retv) = VT_NULL;
03966         return S_OK;
03967     }
03968 
03969     hres = create_array(ctx, match_cnt, &array);
03970     if(FAILED(hres))
03971         return hres;
03972 
03973     V_VT(&var) = VT_BSTR;
03974 
03975     for(i=0; i < match_cnt; i++) {
03976         V_BSTR(&var) = SysAllocStringLen(match_result[i].str, match_result[i].len);
03977         if(!V_BSTR(&var)) {
03978             hres = E_OUTOFMEMORY;
03979             break;
03980         }
03981 
03982         hres = jsdisp_propput_idx(array, i, &var, ei, NULL/*FIXME*/);
03983         SysFreeString(V_BSTR(&var));
03984         if(FAILED(hres))
03985             break;
03986     }
03987 
03988     heap_free(match_result);
03989 
03990     if(SUCCEEDED(hres) && retv) {
03991         V_VT(retv) = VT_DISPATCH;
03992         V_DISPATCH(retv) = (IDispatch*)_IDispatchEx_(array);
03993     }else {
03994         jsdisp_release(array);
03995     }
03996     return hres;
03997 }
03998 
03999 static HRESULT RegExpConstr_leftContext(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags,
04000          DISPPARAMS *dp, VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
04001 {
04002     TRACE("\n");
04003 
04004     switch(flags) {
04005     case DISPATCH_PROPERTYGET: {
04006         BSTR ret;
04007 
04008         ret = SysAllocStringLen(ctx->last_match, ctx->last_match_index);
04009         if(!ret)
04010             return E_OUTOFMEMORY;
04011 
04012         V_VT(retv) = VT_BSTR;
04013         V_BSTR(retv) = ret;
04014     }
04015     case DISPATCH_PROPERTYPUT:
04016         return S_OK;
04017     default:
04018         FIXME("unsupported flags\n");
04019         return E_NOTIMPL;
04020     }
04021 
04022     return S_OK;
04023 }
04024 
04025 static HRESULT RegExpConstr_rightContext(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags,
04026          DISPPARAMS *dp, VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
04027 {
04028     TRACE("\n");
04029 
04030     switch(flags) {
04031     case DISPATCH_PROPERTYGET: {
04032         BSTR ret;
04033 
04034         ret = SysAllocString(ctx->last_match+ctx->last_match_index+ctx->last_match_length);
04035         if(!ret)
04036             return E_OUTOFMEMORY;
04037 
04038         V_VT(retv) = VT_BSTR;
04039         V_BSTR(retv) = ret;
04040     }
04041     case DISPATCH_PROPERTYPUT:
04042         return S_OK;
04043     default:
04044         FIXME("unsupported flags\n");
04045         return E_NOTIMPL;
04046     }
04047 
04048     return S_OK;
04049 }
04050 
04051 static HRESULT RegExpConstr_value(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp,
04052         VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp)
04053 {
04054     TRACE("\n");
04055 
04056     switch(flags) {
04057     case DISPATCH_METHOD:
04058         if(arg_cnt(dp)) {
04059             VARIANT *arg = get_arg(dp,0);
04060             if(V_VT(arg) == VT_DISPATCH) {
04061                 DispatchEx *jsdisp = iface_to_jsdisp((IUnknown*)V_DISPATCH(arg));
04062                 if(jsdisp) {
04063                     if(is_class(jsdisp, JSCLASS_REGEXP)) {
04064                         if(arg_cnt(dp) > 1 && V_VT(get_arg(dp,1)) != VT_EMPTY) {
04065                             jsdisp_release(jsdisp);
04066                             return throw_regexp_error(ctx, ei, IDS_REGEXP_SYNTAX_ERROR, NULL);
04067                         }
04068 
04069                         if(retv) {
04070                             V_VT(retv) = VT_DISPATCH;
04071                             V_DISPATCH(retv) = (IDispatch*)_IDispatchEx_(jsdisp);
04072                         }else {
04073                             jsdisp_release(jsdisp);
04074                         }
04075                         return S_OK;
04076                     }
04077                     jsdisp_release(jsdisp);
04078                 }
04079             }
04080         }
04081         /* fall through */
04082     case DISPATCH_CONSTRUCT: {
04083         DispatchEx *ret;
04084         HRESULT hres;
04085 
04086         if(!arg_cnt(dp)) {
04087             FIXME("no args\n");
04088             return E_NOTIMPL;
04089         }
04090 
04091         hres = create_regexp_var(ctx, get_arg(dp,0), arg_cnt(dp) > 1 ? get_arg(dp,1) : NULL, &ret);
04092         if(FAILED(hres))
04093             return hres;
04094 
04095         if(retv) {
04096             V_VT(retv) = VT_DISPATCH;
04097             V_DISPATCH(retv) = (IDispatch*)_IDispatchEx_(ret);
04098         }else {
04099             jsdisp_release(ret);
04100         }
04101         return S_OK;
04102     }
04103     default:
04104         FIXME("unimplemented flags: %x\n", flags);
04105         return E_NOTIMPL;
04106     }
04107 
04108     return S_OK;
04109 }
04110 
04111 static const builtin_prop_t RegExpConstr_props[] = {
04112     {leftContextW,    RegExpConstr_leftContext,    0},
04113     {rightContextW,   RegExpConstr_rightContext,   0}
04114 };
04115 
04116 static const builtin_info_t RegExpConstr_info = {
04117     JSCLASS_FUNCTION,
04118     {NULL, Function_value, 0},
04119     sizeof(RegExpConstr_props)/sizeof(*RegExpConstr_props),
04120     RegExpConstr_props,
04121     NULL,
04122     NULL
04123 };
04124 
04125 HRESULT create_regexp_constr(script_ctx_t *ctx, DispatchEx *object_prototype, DispatchEx **ret)
04126 {
04127     RegExpInstance *regexp;
04128     HRESULT hres;
04129 
04130     static const WCHAR RegExpW[] = {'R','e','g','E','x','p',0};
04131 
04132     hres = alloc_regexp(ctx, object_prototype, &regexp);
04133     if(FAILED(hres))
04134         return hres;
04135 
04136     hres = create_builtin_function(ctx, RegExpConstr_value, RegExpW, &RegExpConstr_info,
04137             PROPF_CONSTR|2, &regexp->dispex, ret);
04138 
04139     jsdisp_release(&regexp->dispex);
04140     return hres;
04141 }
04142 
04143 HRESULT parse_regexp_flags(const WCHAR *str, DWORD str_len, DWORD *ret)
04144 {
04145     const WCHAR *p;
04146     DWORD flags = 0;
04147 
04148     for (p = str; p < str+str_len; p++) {
04149         switch (*p) {
04150         case 'g':
04151             flags |= JSREG_GLOB;
04152             break;
04153         case 'i':
04154             flags |= JSREG_FOLD;
04155             break;
04156         case 'm':
04157             flags |= JSREG_MULTILINE;
04158             break;
04159         case 'y':
04160             flags |= JSREG_STICKY;
04161             break;
04162         default:
04163             WARN("wrong flag %c\n", *p);
04164             return E_FAIL;
04165         }
04166     }
04167 
04168     *ret = flags;
04169     return S_OK;
04170 }

Generated on Mon May 28 2012 04:24:02 for ReactOS by doxygen 1.7.6.1

ReactOS is a registered trademark or a trademark of ReactOS Foundation in the United States and other countries.