Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygenregexp.c
Go to the documentation of this file.
00001 /* 00002 * Copyright 2008 Jacek Caban for CodeWeavers 00003 * 00004 * This library is free software; you can redistribute it and/or 00005 * modify it under the terms of the GNU Lesser General Public 00006 * License as published by the Free Software Foundation; either 00007 * version 2.1 of the License, or (at your option) any later version. 00008 * 00009 * This library is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00012 * Lesser General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU Lesser General Public 00015 * License along with this library; if not, write to the Free Software 00016 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 00017 */ 00018 00019 /* 00020 * Code in this file is based on files: 00021 * js/src/jsregexp.h 00022 * js/src/jsregexp.c 00023 * from Mozilla project, released under LGPL 2.1 or later. 00024 * 00025 * The Original Code is Mozilla Communicator client code, released 00026 * March 31, 1998. 00027 * 00028 * The Initial Developer of the Original Code is 00029 * Netscape Communications Corporation. 00030 * Portions created by the Initial Developer are Copyright (C) 1998 00031 * the Initial Developer. All Rights Reserved. 00032 */ 00033 00034 #include <assert.h> 00035 #include <math.h> 00036 00037 #include "jscript.h" 00038 00039 #include "wine/debug.h" 00040 00041 WINE_DEFAULT_DEBUG_CHANNEL(jscript); 00042 00043 #define JSREG_FOLD 0x01 /* fold uppercase to lowercase */ 00044 #define JSREG_GLOB 0x02 /* global exec, creates array of matches */ 00045 #define JSREG_MULTILINE 0x04 /* treat ^ and $ as begin and end of line */ 00046 #define JSREG_STICKY 0x08 /* only match starting at lastIndex */ 00047 00048 typedef BYTE JSPackedBool; 00049 typedef BYTE jsbytecode; 00050 00051 /* 00052 * This struct holds a bitmap representation of a class from a regexp. 00053 * There's a list of these referenced by the classList field in the JSRegExp 00054 * struct below. The initial state has startIndex set to the offset in the 00055 * original regexp source of the beginning of the class contents. The first 00056 * use of the class converts the source representation into a bitmap. 00057 * 00058 */ 00059 typedef struct RECharSet { 00060 JSPackedBool converted; 00061 JSPackedBool sense; 00062 WORD length; 00063 union { 00064 BYTE *bits; 00065 struct { 00066 size_t startIndex; 00067 size_t length; 00068 } src; 00069 } u; 00070 } RECharSet; 00071 00072 typedef struct { 00073 WORD flags; /* flags, see jsapi.h's JSREG_* defines */ 00074 size_t parenCount; /* number of parenthesized submatches */ 00075 size_t classCount; /* count [...] bitmaps */ 00076 RECharSet *classList; /* list of [...] bitmaps */ 00077 BSTR source; /* locked source string, sans // */ 00078 jsbytecode program[1]; /* regular expression bytecode */ 00079 } JSRegExp; 00080 00081 typedef struct { 00082 DispatchEx dispex; 00083 00084 JSRegExp *jsregexp; 00085 BSTR str; 00086 INT last_index; 00087 VARIANT last_index_var; 00088 } RegExpInstance; 00089 00090 static const WCHAR sourceW[] = {'s','o','u','r','c','e',0}; 00091 static const WCHAR globalW[] = {'g','l','o','b','a','l',0}; 00092 static const WCHAR ignoreCaseW[] = {'i','g','n','o','r','e','C','a','s','e',0}; 00093 static const WCHAR multilineW[] = {'m','u','l','t','i','l','i','n','e',0}; 00094 static const WCHAR lastIndexW[] = {'l','a','s','t','I','n','d','e','x',0}; 00095 static const WCHAR toStringW[] = {'t','o','S','t','r','i','n','g',0}; 00096 static const WCHAR execW[] = {'e','x','e','c',0}; 00097 static const WCHAR testW[] = {'t','e','s','t',0}; 00098 00099 static const WCHAR leftContextW[] = 00100 {'l','e','f','t','C','o','n','t','e','x','t',0}; 00101 static const WCHAR rightContextW[] = 00102 {'r','i','g','h','t','C','o','n','t','e','x','t',0}; 00103 00104 static const WCHAR undefinedW[] = {'u','n','d','e','f','i','n','e','d',0}; 00105 static const WCHAR emptyW[] = {0}; 00106 00107 /* FIXME: Better error handling */ 00108 #define ReportRegExpError(a,b,c) 00109 #define ReportRegExpErrorHelper(a,b,c,d) 00110 #define JS_ReportErrorNumber(a,b,c,d) 00111 #define JS_ReportErrorFlagsAndNumber(a,b,c,d,e,f) 00112 #define js_ReportOutOfScriptQuota(a) 00113 #define JS_ReportOutOfMemory(a) 00114 #define JS_COUNT_OPERATION(a,b) 00115 00116 #define JSMSG_MIN_TOO_BIG 47 00117 #define JSMSG_MAX_TOO_BIG 48 00118 #define JSMSG_OUT_OF_ORDER 49 00119 #define JSMSG_OUT_OF_MEMORY 137 00120 00121 #define LINE_SEPARATOR 0x2028 00122 #define PARA_SEPARATOR 0x2029 00123 00124 #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \ 00125 ((c >= 'a') && (c <= 'z')) ) 00126 #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \ 00127 (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR)) 00128 00129 #define JS_ISWORD(c) ((c) < 128 && (isalnum(c) || (c) == '_')) 00130 00131 #define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9) 00132 #define JS7_UNDEC(c) ((c) - '0') 00133 00134 typedef enum REOp { 00135 REOP_EMPTY, 00136 REOP_BOL, 00137 REOP_EOL, 00138 REOP_WBDRY, 00139 REOP_WNONBDRY, 00140 REOP_DOT, 00141 REOP_DIGIT, 00142 REOP_NONDIGIT, 00143 REOP_ALNUM, 00144 REOP_NONALNUM, 00145 REOP_SPACE, 00146 REOP_NONSPACE, 00147 REOP_BACKREF, 00148 REOP_FLAT, 00149 REOP_FLAT1, 00150 REOP_FLATi, 00151 REOP_FLAT1i, 00152 REOP_UCFLAT1, 00153 REOP_UCFLAT1i, 00154 REOP_UCFLAT, 00155 REOP_UCFLATi, 00156 REOP_CLASS, 00157 REOP_NCLASS, 00158 REOP_ALT, 00159 REOP_QUANT, 00160 REOP_STAR, 00161 REOP_PLUS, 00162 REOP_OPT, 00163 REOP_LPAREN, 00164 REOP_RPAREN, 00165 REOP_JUMP, 00166 REOP_DOTSTAR, 00167 REOP_LPARENNON, 00168 REOP_ASSERT, 00169 REOP_ASSERT_NOT, 00170 REOP_ASSERTTEST, 00171 REOP_ASSERTNOTTEST, 00172 REOP_MINIMALSTAR, 00173 REOP_MINIMALPLUS, 00174 REOP_MINIMALOPT, 00175 REOP_MINIMALQUANT, 00176 REOP_ENDCHILD, 00177 REOP_REPEAT, 00178 REOP_MINIMALREPEAT, 00179 REOP_ALTPREREQ, 00180 REOP_ALTPREREQ2, 00181 REOP_ENDALT, 00182 REOP_CONCAT, 00183 REOP_END, 00184 REOP_LIMIT /* META: no operator >= to this */ 00185 } REOp; 00186 00187 #define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS) 00188 00189 static const char *reop_names[] = { 00190 "empty", 00191 "bol", 00192 "eol", 00193 "wbdry", 00194 "wnonbdry", 00195 "dot", 00196 "digit", 00197 "nondigit", 00198 "alnum", 00199 "nonalnum", 00200 "space", 00201 "nonspace", 00202 "backref", 00203 "flat", 00204 "flat1", 00205 "flati", 00206 "flat1i", 00207 "ucflat1", 00208 "ucflat1i", 00209 "ucflat", 00210 "ucflati", 00211 "class", 00212 "nclass", 00213 "alt", 00214 "quant", 00215 "star", 00216 "plus", 00217 "opt", 00218 "lparen", 00219 "rparen", 00220 "jump", 00221 "dotstar", 00222 "lparennon", 00223 "assert", 00224 "assert_not", 00225 "asserttest", 00226 "assertnottest", 00227 "minimalstar", 00228 "minimalplus", 00229 "minimalopt", 00230 "minimalquant", 00231 "endchild", 00232 "repeat", 00233 "minimalrepeat", 00234 "altprereq", 00235 "alrprereq2", 00236 "endalt", 00237 "concat", 00238 "end", 00239 NULL 00240 }; 00241 00242 typedef struct RECapture { 00243 ptrdiff_t index; /* start of contents, -1 for empty */ 00244 size_t length; /* length of capture */ 00245 } RECapture; 00246 00247 typedef struct REMatchState { 00248 const WCHAR *cp; 00249 RECapture parens[1]; /* first of 're->parenCount' captures, 00250 allocated at end of this struct */ 00251 } REMatchState; 00252 00253 typedef struct REProgState { 00254 jsbytecode *continue_pc; /* current continuation data */ 00255 jsbytecode continue_op; 00256 ptrdiff_t index; /* progress in text */ 00257 size_t parenSoFar; /* highest indexed paren started */ 00258 union { 00259 struct { 00260 UINT min; /* current quantifier limits */ 00261 UINT max; 00262 } quantifier; 00263 struct { 00264 size_t top; /* backtrack stack state */ 00265 size_t sz; 00266 } assertion; 00267 } u; 00268 } REProgState; 00269 00270 typedef struct REBackTrackData { 00271 size_t sz; /* size of previous stack entry */ 00272 jsbytecode *backtrack_pc; /* where to backtrack to */ 00273 jsbytecode backtrack_op; 00274 const WCHAR *cp; /* index in text of match at backtrack */ 00275 size_t parenIndex; /* start index of saved paren contents */ 00276 size_t parenCount; /* # of saved paren contents */ 00277 size_t saveStateStackTop; /* number of parent states */ 00278 /* saved parent states follow */ 00279 /* saved paren contents follow */ 00280 } REBackTrackData; 00281 00282 #define INITIAL_STATESTACK 100 00283 #define INITIAL_BACKTRACK 8000 00284 00285 typedef struct REGlobalData { 00286 script_ctx_t *cx; 00287 JSRegExp *regexp; /* the RE in execution */ 00288 BOOL ok; /* runtime error (out_of_memory only?) */ 00289 size_t start; /* offset to start at */ 00290 ptrdiff_t skipped; /* chars skipped anchoring this r.e. */ 00291 const WCHAR *cpbegin; /* text base address */ 00292 const WCHAR *cpend; /* text limit address */ 00293 00294 REProgState *stateStack; /* stack of state of current parents */ 00295 size_t stateStackTop; 00296 size_t stateStackLimit; 00297 00298 REBackTrackData *backTrackStack;/* stack of matched-so-far positions */ 00299 REBackTrackData *backTrackSP; 00300 size_t backTrackStackSize; 00301 size_t cursz; /* size of current stack entry */ 00302 size_t backTrackCount; /* how many times we've backtracked */ 00303 size_t backTrackLimit; /* upper limit on backtrack states */ 00304 00305 jsheap_t *pool; /* It's faster to use one malloc'd pool 00306 than to malloc/free the three items 00307 that are allocated from this pool */ 00308 } REGlobalData; 00309 00310 typedef struct RENode RENode; 00311 struct RENode { 00312 REOp op; /* r.e. op bytecode */ 00313 RENode *next; /* next in concatenation order */ 00314 void *kid; /* first operand */ 00315 union { 00316 void *kid2; /* second operand */ 00317 INT num; /* could be a number */ 00318 size_t parenIndex; /* or a parenthesis index */ 00319 struct { /* or a quantifier range */ 00320 UINT min; 00321 UINT max; 00322 JSPackedBool greedy; 00323 } range; 00324 struct { /* or a character class */ 00325 size_t startIndex; 00326 size_t kidlen; /* length of string at kid, in jschars */ 00327 size_t index; /* index into class list */ 00328 WORD bmsize; /* bitmap size, based on max char code */ 00329 JSPackedBool sense; 00330 } ucclass; 00331 struct { /* or a literal sequence */ 00332 WCHAR chr; /* of one character */ 00333 size_t length; /* or many (via the kid) */ 00334 } flat; 00335 struct { 00336 RENode *kid2; /* second operand from ALT */ 00337 WCHAR ch1; /* match char for ALTPREREQ */ 00338 WCHAR ch2; /* ditto, or class index for ALTPREREQ2 */ 00339 } altprereq; 00340 } u; 00341 }; 00342 00343 #define CLASS_CACHE_SIZE 4 00344 00345 typedef struct CompilerState { 00346 script_ctx_t *context; 00347 const WCHAR *cpbegin; 00348 const WCHAR *cpend; 00349 const WCHAR *cp; 00350 size_t parenCount; 00351 size_t classCount; /* number of [] encountered */ 00352 size_t treeDepth; /* maximum depth of parse tree */ 00353 size_t progLength; /* estimated bytecode length */ 00354 RENode *result; 00355 size_t classBitmapsMem; /* memory to hold all class bitmaps */ 00356 struct { 00357 const WCHAR *start; /* small cache of class strings */ 00358 size_t length; /* since they're often the same */ 00359 size_t index; 00360 } classCache[CLASS_CACHE_SIZE]; 00361 WORD flags; 00362 } CompilerState; 00363 00364 typedef struct EmitStateStackEntry { 00365 jsbytecode *altHead; /* start of REOP_ALT* opcode */ 00366 jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */ 00367 jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */ 00368 jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */ 00369 RENode *continueNode; /* original REOP_ALT* node being stacked */ 00370 jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */ 00371 JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to 00372 avoid 16-bit unsigned offset overflow */ 00373 } EmitStateStackEntry; 00374 00375 /* 00376 * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h, 00377 * the getters and setters take the pc of the offset, not of the opcode before 00378 * the offset. 00379 */ 00380 #define ARG_LEN 2 00381 #define GET_ARG(pc) ((WORD)(((pc)[0] << 8) | (pc)[1])) 00382 #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \ 00383 (pc)[1] = (jsbytecode) (arg)) 00384 00385 #define OFFSET_LEN ARG_LEN 00386 #define OFFSET_MAX ((1 << (ARG_LEN * 8)) - 1) 00387 #define GET_OFFSET(pc) GET_ARG(pc) 00388 00389 static BOOL ParseRegExp(CompilerState*); 00390 00391 /* 00392 * Maximum supported tree depth is maximum size of EmitStateStackEntry stack. 00393 * For sanity, we limit it to 2^24 bytes. 00394 */ 00395 #define TREE_DEPTH_MAX ((1 << 24) / sizeof(EmitStateStackEntry)) 00396 00397 /* 00398 * The maximum memory that can be allocated for class bitmaps. 00399 * For sanity, we limit it to 2^24 bytes. 00400 */ 00401 #define CLASS_BITMAPS_MEM_LIMIT (1 << 24) 00402 00403 /* 00404 * Functions to get size and write/read bytecode that represent small indexes 00405 * compactly. 00406 * Each byte in the code represent 7-bit chunk of the index. 8th bit when set 00407 * indicates that the following byte brings more bits to the index. Otherwise 00408 * this is the last byte in the index bytecode representing highest index bits. 00409 */ 00410 static size_t 00411 GetCompactIndexWidth(size_t index) 00412 { 00413 size_t width; 00414 00415 for (width = 1; (index >>= 7) != 0; ++width) { } 00416 return width; 00417 } 00418 00419 static inline jsbytecode * 00420 WriteCompactIndex(jsbytecode *pc, size_t index) 00421 { 00422 size_t next; 00423 00424 while ((next = index >> 7) != 0) { 00425 *pc++ = (jsbytecode)(index | 0x80); 00426 index = next; 00427 } 00428 *pc++ = (jsbytecode)index; 00429 return pc; 00430 } 00431 00432 static inline jsbytecode * 00433 ReadCompactIndex(jsbytecode *pc, size_t *result) 00434 { 00435 size_t nextByte; 00436 00437 nextByte = *pc++; 00438 if ((nextByte & 0x80) == 0) { 00439 /* 00440 * Short-circuit the most common case when compact index <= 127. 00441 */ 00442 *result = nextByte; 00443 } else { 00444 size_t shift = 7; 00445 *result = 0x7F & nextByte; 00446 do { 00447 nextByte = *pc++; 00448 *result |= (nextByte & 0x7F) << shift; 00449 shift += 7; 00450 } while ((nextByte & 0x80) != 0); 00451 } 00452 return pc; 00453 } 00454 00455 /* Construct and initialize an RENode, returning NULL for out-of-memory */ 00456 static RENode * 00457 NewRENode(CompilerState *state, REOp op) 00458 { 00459 RENode *ren; 00460 00461 ren = jsheap_alloc(&state->context->tmp_heap, sizeof(*ren)); 00462 if (!ren) { 00463 /* js_ReportOutOfScriptQuota(cx); */ 00464 return NULL; 00465 } 00466 ren->op = op; 00467 ren->next = NULL; 00468 ren->kid = NULL; 00469 return ren; 00470 } 00471 00472 /* 00473 * Validates and converts hex ascii value. 00474 */ 00475 static BOOL 00476 isASCIIHexDigit(WCHAR c, UINT *digit) 00477 { 00478 UINT cv = c; 00479 00480 if (cv < '0') 00481 return FALSE; 00482 if (cv <= '9') { 00483 *digit = cv - '0'; 00484 return TRUE; 00485 } 00486 cv |= 0x20; 00487 if (cv >= 'a' && cv <= 'f') { 00488 *digit = cv - 'a' + 10; 00489 return TRUE; 00490 } 00491 return FALSE; 00492 } 00493 00494 typedef struct { 00495 REOp op; 00496 const WCHAR *errPos; 00497 size_t parenIndex; 00498 } REOpData; 00499 00500 #define JUMP_OFFSET_HI(off) ((jsbytecode)((off) >> 8)) 00501 #define JUMP_OFFSET_LO(off) ((jsbytecode)(off)) 00502 00503 static BOOL 00504 SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target) 00505 { 00506 ptrdiff_t offset = target - jump; 00507 00508 /* Check that target really points forward. */ 00509 assert(offset >= 2); 00510 if ((size_t)offset > OFFSET_MAX) 00511 return FALSE; 00512 00513 jump[0] = JUMP_OFFSET_HI(offset); 00514 jump[1] = JUMP_OFFSET_LO(offset); 00515 return TRUE; 00516 } 00517 00518 /* 00519 * Generate bytecode for the tree rooted at t using an explicit stack instead 00520 * of recursion. 00521 */ 00522 static jsbytecode * 00523 EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth, 00524 jsbytecode *pc, RENode *t) 00525 { 00526 EmitStateStackEntry *emitStateSP, *emitStateStack; 00527 RECharSet *charSet; 00528 REOp op; 00529 00530 if (treeDepth == 0) { 00531 emitStateStack = NULL; 00532 } else { 00533 emitStateStack = heap_alloc(sizeof(EmitStateStackEntry) * treeDepth); 00534 if (!emitStateStack) 00535 return NULL; 00536 } 00537 emitStateSP = emitStateStack; 00538 op = t->op; 00539 assert(op < REOP_LIMIT); 00540 00541 for (;;) { 00542 *pc++ = op; 00543 switch (op) { 00544 case REOP_EMPTY: 00545 --pc; 00546 break; 00547 00548 case REOP_ALTPREREQ2: 00549 case REOP_ALTPREREQ: 00550 assert(emitStateSP); 00551 emitStateSP->altHead = pc - 1; 00552 emitStateSP->endTermFixup = pc; 00553 pc += OFFSET_LEN; 00554 SET_ARG(pc, t->u.altprereq.ch1); 00555 pc += ARG_LEN; 00556 SET_ARG(pc, t->u.altprereq.ch2); 00557 pc += ARG_LEN; 00558 00559 emitStateSP->nextAltFixup = pc; /* offset to next alternate */ 00560 pc += OFFSET_LEN; 00561 00562 emitStateSP->continueNode = t; 00563 emitStateSP->continueOp = REOP_JUMP; 00564 emitStateSP->jumpToJumpFlag = FALSE; 00565 ++emitStateSP; 00566 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth); 00567 t = t->kid; 00568 op = t->op; 00569 assert(op < REOP_LIMIT); 00570 continue; 00571 00572 case REOP_JUMP: 00573 emitStateSP->nextTermFixup = pc; /* offset to following term */ 00574 pc += OFFSET_LEN; 00575 if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc)) 00576 goto jump_too_big; 00577 emitStateSP->continueOp = REOP_ENDALT; 00578 ++emitStateSP; 00579 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth); 00580 t = t->u.kid2; 00581 op = t->op; 00582 assert(op < REOP_LIMIT); 00583 continue; 00584 00585 case REOP_ENDALT: 00586 /* 00587 * If we already patched emitStateSP->nextTermFixup to jump to 00588 * a nearer jump, to avoid 16-bit immediate offset overflow, we 00589 * are done here. 00590 */ 00591 if (emitStateSP->jumpToJumpFlag) 00592 break; 00593 00594 /* 00595 * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT. 00596 * REOP_ENDALT is executed only on successful match of the last 00597 * alternate in a group. 00598 */ 00599 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc)) 00600 goto jump_too_big; 00601 if (t->op != REOP_ALT) { 00602 if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc)) 00603 goto jump_too_big; 00604 } 00605 00606 /* 00607 * If the program is bigger than the REOP_JUMP offset range, then 00608 * we must check for alternates before this one that are part of 00609 * the same group, and fix up their jump offsets to target jumps 00610 * close enough to fit in a 16-bit unsigned offset immediate. 00611 */ 00612 if ((size_t)(pc - re->program) > OFFSET_MAX && 00613 emitStateSP > emitStateStack) { 00614 EmitStateStackEntry *esp, *esp2; 00615 jsbytecode *alt, *jump; 00616 ptrdiff_t span, header; 00617 00618 esp2 = emitStateSP; 00619 alt = esp2->altHead; 00620 for (esp = esp2 - 1; esp >= emitStateStack; --esp) { 00621 if (esp->continueOp == REOP_ENDALT && 00622 !esp->jumpToJumpFlag && 00623 esp->nextTermFixup + OFFSET_LEN == alt && 00624 (size_t)(pc - ((esp->continueNode->op != REOP_ALT) 00625 ? esp->endTermFixup 00626 : esp->nextTermFixup)) > OFFSET_MAX) { 00627 alt = esp->altHead; 00628 jump = esp->nextTermFixup; 00629 00630 /* 00631 * The span must be 1 less than the distance from 00632 * jump offset to jump offset, so we actually jump 00633 * to a REOP_JUMP bytecode, not to its offset! 00634 */ 00635 for (;;) { 00636 assert(jump < esp2->nextTermFixup); 00637 span = esp2->nextTermFixup - jump - 1; 00638 if ((size_t)span <= OFFSET_MAX) 00639 break; 00640 do { 00641 if (--esp2 == esp) 00642 goto jump_too_big; 00643 } while (esp2->continueOp != REOP_ENDALT); 00644 } 00645 00646 jump[0] = JUMP_OFFSET_HI(span); 00647 jump[1] = JUMP_OFFSET_LO(span); 00648 00649 if (esp->continueNode->op != REOP_ALT) { 00650 /* 00651 * We must patch the offset at esp->endTermFixup 00652 * as well, for the REOP_ALTPREREQ{,2} opcodes. 00653 * If we're unlucky and endTermFixup is more than 00654 * OFFSET_MAX bytes from its target, we cheat by 00655 * jumping 6 bytes to the jump whose offset is at 00656 * esp->nextTermFixup, which has the same target. 00657 */ 00658 jump = esp->endTermFixup; 00659 header = esp->nextTermFixup - jump; 00660 span += header; 00661 if ((size_t)span > OFFSET_MAX) 00662 span = header; 00663 00664 jump[0] = JUMP_OFFSET_HI(span); 00665 jump[1] = JUMP_OFFSET_LO(span); 00666 } 00667 00668 esp->jumpToJumpFlag = TRUE; 00669 } 00670 } 00671 } 00672 break; 00673 00674 case REOP_ALT: 00675 assert(emitStateSP); 00676 emitStateSP->altHead = pc - 1; 00677 emitStateSP->nextAltFixup = pc; /* offset to next alternate */ 00678 pc += OFFSET_LEN; 00679 emitStateSP->continueNode = t; 00680 emitStateSP->continueOp = REOP_JUMP; 00681 emitStateSP->jumpToJumpFlag = FALSE; 00682 ++emitStateSP; 00683 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth); 00684 t = t->kid; 00685 op = t->op; 00686 assert(op < REOP_LIMIT); 00687 continue; 00688 00689 case REOP_FLAT: 00690 /* 00691 * Coalesce FLATs if possible and if it would not increase bytecode 00692 * beyond preallocated limit. The latter happens only when bytecode 00693 * size for coalesced string with offset p and length 2 exceeds 6 00694 * bytes preallocated for 2 single char nodes, i.e. when 00695 * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or 00696 * GetCompactIndexWidth(p) > 4. 00697 * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more 00698 * nodes strictly decreases bytecode size, the check has to be 00699 * done only for the first coalescing. 00700 */ 00701 if (t->kid && 00702 GetCompactIndexWidth((WCHAR*)t->kid - state->cpbegin) <= 4) 00703 { 00704 while (t->next && 00705 t->next->op == REOP_FLAT && 00706 (WCHAR*)t->kid + t->u.flat.length == 00707 t->next->kid) { 00708 t->u.flat.length += t->next->u.flat.length; 00709 t->next = t->next->next; 00710 } 00711 } 00712 if (t->kid && t->u.flat.length > 1) { 00713 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLATi : REOP_FLAT; 00714 pc = WriteCompactIndex(pc, (WCHAR*)t->kid - state->cpbegin); 00715 pc = WriteCompactIndex(pc, t->u.flat.length); 00716 } else if (t->u.flat.chr < 256) { 00717 pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLAT1i : REOP_FLAT1; 00718 *pc++ = (jsbytecode) t->u.flat.chr; 00719 } else { 00720 pc[-1] = (state->flags & JSREG_FOLD) 00721 ? REOP_UCFLAT1i 00722 : REOP_UCFLAT1; 00723 SET_ARG(pc, t->u.flat.chr); 00724 pc += ARG_LEN; 00725 } 00726 break; 00727 00728 case REOP_LPAREN: 00729 assert(emitStateSP); 00730 pc = WriteCompactIndex(pc, t->u.parenIndex); 00731 emitStateSP->continueNode = t; 00732 emitStateSP->continueOp = REOP_RPAREN; 00733 ++emitStateSP; 00734 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth); 00735 t = t->kid; 00736 op = t->op; 00737 continue; 00738 00739 case REOP_RPAREN: 00740 pc = WriteCompactIndex(pc, t->u.parenIndex); 00741 break; 00742 00743 case REOP_BACKREF: 00744 pc = WriteCompactIndex(pc, t->u.parenIndex); 00745 break; 00746 00747 case REOP_ASSERT: 00748 assert(emitStateSP); 00749 emitStateSP->nextTermFixup = pc; 00750 pc += OFFSET_LEN; 00751 emitStateSP->continueNode = t; 00752 emitStateSP->continueOp = REOP_ASSERTTEST; 00753 ++emitStateSP; 00754 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth); 00755 t = t->kid; 00756 op = t->op; 00757 continue; 00758 00759 case REOP_ASSERTTEST: 00760 case REOP_ASSERTNOTTEST: 00761 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc)) 00762 goto jump_too_big; 00763 break; 00764 00765 case REOP_ASSERT_NOT: 00766 assert(emitStateSP); 00767 emitStateSP->nextTermFixup = pc; 00768 pc += OFFSET_LEN; 00769 emitStateSP->continueNode = t; 00770 emitStateSP->continueOp = REOP_ASSERTNOTTEST; 00771 ++emitStateSP; 00772 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth); 00773 t = t->kid; 00774 op = t->op; 00775 continue; 00776 00777 case REOP_QUANT: 00778 assert(emitStateSP); 00779 if (t->u.range.min == 0 && t->u.range.max == (UINT)-1) { 00780 pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR; 00781 } else if (t->u.range.min == 0 && t->u.range.max == 1) { 00782 pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT; 00783 } else if (t->u.range.min == 1 && t->u.range.max == (UINT) -1) { 00784 pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS; 00785 } else { 00786 if (!t->u.range.greedy) 00787 pc[-1] = REOP_MINIMALQUANT; 00788 pc = WriteCompactIndex(pc, t->u.range.min); 00789 /* 00790 * Write max + 1 to avoid using size_t(max) + 1 bytes 00791 * for (UINT)-1 sentinel. 00792 */ 00793 pc = WriteCompactIndex(pc, t->u.range.max + 1); 00794 } 00795 emitStateSP->nextTermFixup = pc; 00796 pc += OFFSET_LEN; 00797 emitStateSP->continueNode = t; 00798 emitStateSP->continueOp = REOP_ENDCHILD; 00799 ++emitStateSP; 00800 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth); 00801 t = t->kid; 00802 op = t->op; 00803 continue; 00804 00805 case REOP_ENDCHILD: 00806 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc)) 00807 goto jump_too_big; 00808 break; 00809 00810 case REOP_CLASS: 00811 if (!t->u.ucclass.sense) 00812 pc[-1] = REOP_NCLASS; 00813 pc = WriteCompactIndex(pc, t->u.ucclass.index); 00814 charSet = &re->classList[t->u.ucclass.index]; 00815 charSet->converted = FALSE; 00816 charSet->length = t->u.ucclass.bmsize; 00817 charSet->u.src.startIndex = t->u.ucclass.startIndex; 00818 charSet->u.src.length = t->u.ucclass.kidlen; 00819 charSet->sense = t->u.ucclass.sense; 00820 break; 00821 00822 default: 00823 break; 00824 } 00825 00826 t = t->next; 00827 if (t) { 00828 op = t->op; 00829 } else { 00830 if (emitStateSP == emitStateStack) 00831 break; 00832 --emitStateSP; 00833 t = emitStateSP->continueNode; 00834 op = (REOp) emitStateSP->continueOp; 00835 } 00836 } 00837 00838 cleanup: 00839 heap_free(emitStateStack); 00840 return pc; 00841 00842 jump_too_big: 00843 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX); 00844 pc = NULL; 00845 goto cleanup; 00846 } 00847 00848 /* 00849 * Process the op against the two top operands, reducing them to a single 00850 * operand in the penultimate slot. Update progLength and treeDepth. 00851 */ 00852 static BOOL 00853 ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack, 00854 INT operandSP) 00855 { 00856 RENode *result; 00857 00858 switch (opData->op) { 00859 case REOP_ALT: 00860 result = NewRENode(state, REOP_ALT); 00861 if (!result) 00862 return FALSE; 00863 result->kid = operandStack[operandSP - 2]; 00864 result->u.kid2 = operandStack[operandSP - 1]; 00865 operandStack[operandSP - 2] = result; 00866 00867 if (state->treeDepth == TREE_DEPTH_MAX) { 00868 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX); 00869 return FALSE; 00870 } 00871 ++state->treeDepth; 00872 00873 /* 00874 * Look at both alternates to see if there's a FLAT or a CLASS at 00875 * the start of each. If so, use a prerequisite match. 00876 */ 00877 if (((RENode *) result->kid)->op == REOP_FLAT && 00878 ((RENode *) result->u.kid2)->op == REOP_FLAT && 00879 (state->flags & JSREG_FOLD) == 0) { 00880 result->op = REOP_ALTPREREQ; 00881 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr; 00882 result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr; 00883 /* ALTPREREQ, <end>, uch1, uch2, <next>, ..., 00884 JUMP, <end> ... ENDALT */ 00885 state->progLength += 13; 00886 } 00887 else 00888 if (((RENode *) result->kid)->op == REOP_CLASS && 00889 ((RENode *) result->kid)->u.ucclass.index < 256 && 00890 ((RENode *) result->u.kid2)->op == REOP_FLAT && 00891 (state->flags & JSREG_FOLD) == 0) { 00892 result->op = REOP_ALTPREREQ2; 00893 result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr; 00894 result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index; 00895 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ..., 00896 JUMP, <end> ... ENDALT */ 00897 state->progLength += 13; 00898 } 00899 else 00900 if (((RENode *) result->kid)->op == REOP_FLAT && 00901 ((RENode *) result->u.kid2)->op == REOP_CLASS && 00902 ((RENode *) result->u.kid2)->u.ucclass.index < 256 && 00903 (state->flags & JSREG_FOLD) == 0) { 00904 result->op = REOP_ALTPREREQ2; 00905 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr; 00906 result->u.altprereq.ch2 = 00907 ((RENode *) result->u.kid2)->u.ucclass.index; 00908 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ..., 00909 JUMP, <end> ... ENDALT */ 00910 state->progLength += 13; 00911 } 00912 else { 00913 /* ALT, <next>, ..., JUMP, <end> ... ENDALT */ 00914 state->progLength += 7; 00915 } 00916 break; 00917 00918 case REOP_CONCAT: 00919 result = operandStack[operandSP - 2]; 00920 while (result->next) 00921 result = result->next; 00922 result->next = operandStack[operandSP - 1]; 00923 break; 00924 00925 case REOP_ASSERT: 00926 case REOP_ASSERT_NOT: 00927 case REOP_LPARENNON: 00928 case REOP_LPAREN: 00929 /* These should have been processed by a close paren. */ 00930 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN, 00931 opData->errPos); 00932 return FALSE; 00933 00934 default:; 00935 } 00936 return TRUE; 00937 } 00938 00939 /* 00940 * Hack two bits in CompilerState.flags, for use within FindParenCount to flag 00941 * its being on the stack, and to propagate errors to its callers. 00942 */ 00943 #define JSREG_FIND_PAREN_COUNT 0x8000 00944 #define JSREG_FIND_PAREN_ERROR 0x4000 00945 00946 /* 00947 * Magic return value from FindParenCount and GetDecimalValue, to indicate 00948 * overflow beyond GetDecimalValue's max parameter, or a computed maximum if 00949 * its findMax parameter is non-null. 00950 */ 00951 #define OVERFLOW_VALUE ((UINT)-1) 00952 00953 static UINT 00954 FindParenCount(CompilerState *state) 00955 { 00956 CompilerState temp; 00957 int i; 00958 00959 if (state->flags & JSREG_FIND_PAREN_COUNT) 00960 return OVERFLOW_VALUE; 00961 00962 /* 00963 * Copy state into temp, flag it so we never report an invalid backref, 00964 * and reset its members to parse the entire regexp. This is obviously 00965 * suboptimal, but GetDecimalValue calls us only if a backref appears to 00966 * refer to a forward parenthetical, which is rare. 00967 */ 00968 temp = *state; 00969 temp.flags |= JSREG_FIND_PAREN_COUNT; 00970 temp.cp = temp.cpbegin; 00971 temp.parenCount = 0; 00972 temp.classCount = 0; 00973 temp.progLength = 0; 00974 temp.treeDepth = 0; 00975 temp.classBitmapsMem = 0; 00976 for (i = 0; i < CLASS_CACHE_SIZE; i++) 00977 temp.classCache[i].start = NULL; 00978 00979 if (!ParseRegExp(&temp)) { 00980 state->flags |= JSREG_FIND_PAREN_ERROR; 00981 return OVERFLOW_VALUE; 00982 } 00983 return temp.parenCount; 00984 } 00985 00986 /* 00987 * Extract and return a decimal value at state->cp. The initial character c 00988 * has already been read. Return OVERFLOW_VALUE if the result exceeds max. 00989 * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in 00990 * state->flags to discover whether an error occurred under findMax. 00991 */ 00992 static UINT 00993 GetDecimalValue(WCHAR c, UINT max, UINT (*findMax)(CompilerState *state), 00994 CompilerState *state) 00995 { 00996 UINT value = JS7_UNDEC(c); 00997 BOOL overflow = (value > max && (!findMax || value > findMax(state))); 00998 00999 /* The following restriction allows simpler overflow checks. */ 01000 assert(max <= ((UINT)-1 - 9) / 10); 01001 while (state->cp < state->cpend) { 01002 c = *state->cp; 01003 if (!JS7_ISDEC(c)) 01004 break; 01005 value = 10 * value + JS7_UNDEC(c); 01006 if (!overflow && value > max && (!findMax || value > findMax(state))) 01007 overflow = TRUE; 01008 ++state->cp; 01009 } 01010 return overflow ? OVERFLOW_VALUE : value; 01011 } 01012 01013 /* 01014 * Calculate the total size of the bitmap required for a class expression. 01015 */ 01016 static BOOL 01017 CalculateBitmapSize(CompilerState *state, RENode *target, const WCHAR *src, 01018 const WCHAR *end) 01019 { 01020 UINT max = 0; 01021 BOOL inRange = FALSE; 01022 WCHAR c, rangeStart = 0; 01023 UINT n, digit, nDigits, i; 01024 01025 target->u.ucclass.bmsize = 0; 01026 target->u.ucclass.sense = TRUE; 01027 01028 if (src == end) 01029 return TRUE; 01030 01031 if (*src == '^') { 01032 ++src; 01033 target->u.ucclass.sense = FALSE; 01034 } 01035 01036 while (src != end) { 01037 BOOL canStartRange = TRUE; 01038 UINT localMax = 0; 01039 01040 switch (*src) { 01041 case '\\': 01042 ++src; 01043 c = *src++; 01044 switch (c) { 01045 case 'b': 01046 localMax = 0x8; 01047 break; 01048 case 'f': 01049 localMax = 0xC; 01050 break; 01051 case 'n': 01052 localMax = 0xA; 01053 break; 01054 case 'r': 01055 localMax = 0xD; 01056 break; 01057 case 't': 01058 localMax = 0x9; 01059 break; 01060 case 'v': 01061 localMax = 0xB; 01062 break; 01063 case 'c': 01064 if (src < end && RE_IS_LETTER(*src)) { 01065 localMax = (UINT) (*src++) & 0x1F; 01066 } else { 01067 --src; 01068 localMax = '\\'; 01069 } 01070 break; 01071 case 'x': 01072 nDigits = 2; 01073 goto lexHex; 01074 case 'u': 01075 nDigits = 4; 01076 lexHex: 01077 n = 0; 01078 for (i = 0; (i < nDigits) && (src < end); i++) { 01079 c = *src++; 01080 if (!isASCIIHexDigit(c, &digit)) { 01081 /* 01082 * Back off to accepting the original 01083 *'\' as a literal. 01084 */ 01085 src -= i + 1; 01086 n = '\\'; 01087 break; 01088 } 01089 n = (n << 4) | digit; 01090 } 01091 localMax = n; 01092 break; 01093 case 'd': 01094 canStartRange = FALSE; 01095 if (inRange) { 01096 JS_ReportErrorNumber(state->context, 01097 js_GetErrorMessage, NULL, 01098 JSMSG_BAD_CLASS_RANGE); 01099 return FALSE; 01100 } 01101 localMax = '9'; 01102 break; 01103 case 'D': 01104 case 's': 01105 case 'S': 01106 case 'w': 01107 case 'W': 01108 canStartRange = FALSE; 01109 if (inRange) { 01110 JS_ReportErrorNumber(state->context, 01111 js_GetErrorMessage, NULL, 01112 JSMSG_BAD_CLASS_RANGE); 01113 return FALSE; 01114 } 01115 max = 65535; 01116 01117 /* 01118 * If this is the start of a range, ensure that it's less than 01119 * the end. 01120 */ 01121 localMax = 0; 01122 break; 01123 case '0': 01124 case '1': 01125 case '2': 01126 case '3': 01127 case '4': 01128 case '5': 01129 case '6': 01130 case '7': 01131 /* 01132 * This is a non-ECMA extension - decimal escapes (in this 01133 * case, octal!) are supposed to be an error inside class 01134 * ranges, but supported here for backwards compatibility. 01135 * 01136 */ 01137 n = JS7_UNDEC(c); 01138 c = *src; 01139 if ('0' <= c && c <= '7') { 01140 src++; 01141 n = 8 * n + JS7_UNDEC(c); 01142 c = *src; 01143 if ('0' <= c && c <= '7') { 01144 src++; 01145 i = 8 * n + JS7_UNDEC(c); 01146 if (i <= 0377) 01147 n = i; 01148 else 01149 src--; 01150 } 01151 } 01152 localMax = n; 01153 break; 01154 01155 default: 01156 localMax = c; 01157 break; 01158 } 01159 break; 01160 default: 01161 localMax = *src++; 01162 break; 01163 } 01164 01165 if (inRange) { 01166 /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */ 01167 if (rangeStart > localMax) { 01168 JS_ReportErrorNumber(state->context, 01169 js_GetErrorMessage, NULL, 01170 JSMSG_BAD_CLASS_RANGE); 01171 return FALSE; 01172 } 01173 inRange = FALSE; 01174 } else { 01175 if (canStartRange && src < end - 1) { 01176 if (*src == '-') { 01177 ++src; 01178 inRange = TRUE; 01179 rangeStart = (WCHAR)localMax; 01180 continue; 01181 } 01182 } 01183 if (state->flags & JSREG_FOLD) 01184 rangeStart = localMax; /* one run of the uc/dc loop below */ 01185 } 01186 01187 if (state->flags & JSREG_FOLD) { 01188 WCHAR maxch = localMax; 01189 01190 for (i = rangeStart; i <= localMax; i++) { 01191 WCHAR uch, dch; 01192 01193 uch = toupperW(i); 01194 dch = tolowerW(i); 01195 if(maxch < uch) 01196 maxch = uch; 01197 if(maxch < dch) 01198 maxch = dch; 01199 } 01200 localMax = maxch; 01201 } 01202 01203 if (localMax > max) 01204 max = localMax; 01205 } 01206 target->u.ucclass.bmsize = max; 01207 return TRUE; 01208 } 01209 01210 static INT 01211 ParseMinMaxQuantifier(CompilerState *state, BOOL ignoreValues) 01212 { 01213 UINT min, max; 01214 WCHAR c; 01215 const WCHAR *errp = state->cp++; 01216 01217 c = *state->cp; 01218 if (JS7_ISDEC(c)) { 01219 ++state->cp; 01220 min = GetDecimalValue(c, 0xFFFF, NULL, state); 01221 c = *state->cp; 01222 01223 if (!ignoreValues && min == OVERFLOW_VALUE) 01224 return JSMSG_MIN_TOO_BIG; 01225 01226 if (c == ',') { 01227 c = *++state->cp; 01228 if (JS7_ISDEC(c)) { 01229 ++state->cp; 01230 max = GetDecimalValue(c, 0xFFFF, NULL, state); 01231 c = *state->cp; 01232 if (!ignoreValues && max == OVERFLOW_VALUE) 01233 return JSMSG_MAX_TOO_BIG; 01234 if (!ignoreValues && min > max) 01235 return JSMSG_OUT_OF_ORDER; 01236 } else { 01237 max = (UINT)-1; 01238 } 01239 } else { 01240 max = min; 01241 } 01242 if (c == '}') { 01243 state->result = NewRENode(state, REOP_QUANT); 01244 if (!state->result) 01245 return JSMSG_OUT_OF_MEMORY; 01246 state->result->u.range.min = min; 01247 state->result->u.range.max = max; 01248 /* 01249 * QUANT, <min>, <max>, <next> ... <ENDCHILD> 01250 * where <max> is written as compact(max+1) to make 01251 * (UINT)-1 sentinel to occupy 1 byte, not width_of(max)+1. 01252 */ 01253 state->progLength += (1 + GetCompactIndexWidth(min) 01254 + GetCompactIndexWidth(max + 1) 01255 +3); 01256 return 0; 01257 } 01258 } 01259 01260 state->cp = errp; 01261 return -1; 01262 } 01263 01264 static BOOL 01265 ParseQuantifier(CompilerState *state) 01266 { 01267 RENode *term; 01268 term = state->result; 01269 if (state->cp < state->cpend) { 01270 switch (*state->cp) { 01271 case '+': 01272 state->result = NewRENode(state, REOP_QUANT); 01273 if (!state->result) 01274 return FALSE; 01275 state->result->u.range.min = 1; 01276 state->result->u.range.max = (UINT)-1; 01277 /* <PLUS>, <next> ... <ENDCHILD> */ 01278 state->progLength += 4; 01279 goto quantifier; 01280 case '*': 01281 state->result = NewRENode(state, REOP_QUANT); 01282 if (!state->result) 01283 return FALSE; 01284 state->result->u.range.min = 0; 01285 state->result->u.range.max = (UINT)-1; 01286 /* <STAR>, <next> ... <ENDCHILD> */ 01287 state->progLength += 4; 01288 goto quantifier; 01289 case '?': 01290 state->result = NewRENode(state, REOP_QUANT); 01291 if (!state->result) 01292 return FALSE; 01293 state->result->u.range.min = 0; 01294 state->result->u.range.max = 1; 01295 /* <OPT>, <next> ... <ENDCHILD> */ 01296 state->progLength += 4; 01297 goto quantifier; 01298 case '{': /* balance '}' */ 01299 { 01300 INT err; 01301 01302 err = ParseMinMaxQuantifier(state, FALSE); 01303 if (err == 0) 01304 goto quantifier; 01305 if (err == -1) 01306 return TRUE; 01307 01308 ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp); 01309 return FALSE; 01310 } 01311 default:; 01312 } 01313 } 01314 return TRUE; 01315 01316 quantifier: 01317 if (state->treeDepth == TREE_DEPTH_MAX) { 01318 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX); 01319 return FALSE; 01320 } 01321 01322 ++state->treeDepth; 01323 ++state->cp; 01324 state->result->kid = term; 01325 if (state->cp < state->cpend && *state->cp == '?') { 01326 ++state->cp; 01327 state->result->u.range.greedy = FALSE; 01328 } else { 01329 state->result->u.range.greedy = TRUE; 01330 } 01331 return TRUE; 01332 } 01333 01334 /* 01335 * item: assertion An item is either an assertion or 01336 * quantatom a quantified atom. 01337 * 01338 * assertion: '^' Assertions match beginning of string 01339 * (or line if the class static property 01340 * RegExp.multiline is true). 01341 * '$' End of string (or line if the class 01342 * static property RegExp.multiline is 01343 * true). 01344 * '\b' Word boundary (between \w and \W). 01345 * '\B' Word non-boundary. 01346 * 01347 * quantatom: atom An unquantified atom. 01348 * quantatom '{' n ',' m '}' 01349 * Atom must occur between n and m times. 01350 * quantatom '{' n ',' '}' Atom must occur at least n times. 01351 * quantatom '{' n '}' Atom must occur exactly n times. 01352 * quantatom '*' Zero or more times (same as {0,}). 01353 * quantatom '+' One or more times (same as {1,}). 01354 * quantatom '?' Zero or one time (same as {0,1}). 01355 * 01356 * any of which can be optionally followed by '?' for ungreedy 01357 * 01358 * atom: '(' regexp ')' A parenthesized regexp (what matched 01359 * can be addressed using a backreference, 01360 * see '\' n below). 01361 * '.' Matches any char except '\n'. 01362 * '[' classlist ']' A character class. 01363 * '[' '^' classlist ']' A negated character class. 01364 * '\f' Form Feed. 01365 * '\n' Newline (Line Feed). 01366 * '\r' Carriage Return. 01367 * '\t' Horizontal Tab. 01368 * '\v' Vertical Tab. 01369 * '\d' A digit (same as [0-9]). 01370 * '\D' A non-digit. 01371 * '\w' A word character, [0-9a-z_A-Z]. 01372 * '\W' A non-word character. 01373 * '\s' A whitespace character, [ \b\f\n\r\t\v]. 01374 * '\S' A non-whitespace character. 01375 * '\' n A backreference to the nth (n decimal 01376 * and positive) parenthesized expression. 01377 * '\' octal An octal escape sequence (octal must be 01378 * two or three digits long, unless it is 01379 * 0 for the null character). 01380 * '\x' hex A hex escape (hex must be two digits). 01381 * '\u' unicode A unicode escape (must be four digits). 01382 * '\c' ctrl A control character, ctrl is a letter. 01383 * '\' literalatomchar Any character except one of the above 01384 * that follow '\' in an atom. 01385 * otheratomchar Any character not first among the other 01386 * atom right-hand sides. 01387 */ 01388 static BOOL 01389 ParseTerm(CompilerState *state) 01390 { 01391 WCHAR c = *state->cp++; 01392 UINT nDigits; 01393 UINT num, tmp, n, i; 01394 const WCHAR *termStart; 01395 01396 switch (c) { 01397 /* assertions and atoms */ 01398 case '^': 01399 state->result = NewRENode(state, REOP_BOL); 01400 if (!state->result) 01401 return FALSE; 01402 state->progLength++; 01403 return TRUE; 01404 case '$': 01405 state->result = NewRENode(state, REOP_EOL); 01406 if (!state->result) 01407 return FALSE; 01408 state->progLength++; 01409 return TRUE; 01410 case '\\': 01411 if (state->cp >= state->cpend) { 01412 /* a trailing '\' is an error */ 01413 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH); 01414 return FALSE; 01415 } 01416 c = *state->cp++; 01417 switch (c) { 01418 /* assertion escapes */ 01419 case 'b' : 01420 state->result = NewRENode(state, REOP_WBDRY); 01421 if (!state->result) 01422 return FALSE; 01423 state->progLength++; 01424 return TRUE; 01425 case 'B': 01426 state->result = NewRENode(state, REOP_WNONBDRY); 01427 if (!state->result) 01428 return FALSE; 01429 state->progLength++; 01430 return TRUE; 01431 /* Decimal escape */ 01432 case '0': 01433 /* Give a strict warning. See also the note below. */ 01434 WARN("non-octal digit in an escape sequence that doesn't match a back-reference\n"); 01435 doOctal: 01436 num = 0; 01437 while (state->cp < state->cpend) { 01438 c = *state->cp; 01439 if (c < '0' || '7' < c) 01440 break; 01441 state->cp++; 01442 tmp = 8 * num + (UINT)JS7_UNDEC(c); 01443 if (tmp > 0377) 01444 break; 01445 num = tmp; 01446 } 01447 c = (WCHAR)num; 01448 doFlat: 01449 state->result = NewRENode(state, REOP_FLAT); 01450 if (!state->result) 01451 return FALSE; 01452 state->result->u.flat.chr = c; 01453 state->result->u.flat.length = 1; 01454 state->progLength += 3; 01455 break; 01456 case '1': 01457 case '2': 01458 case '3': 01459 case '4': 01460 case '5': 01461 case '6': 01462 case '7': 01463 case '8': 01464 case '9': 01465 termStart = state->cp - 1; 01466 num = GetDecimalValue(c, state->parenCount, FindParenCount, state); 01467 if (state->flags & JSREG_FIND_PAREN_ERROR) 01468 return FALSE; 01469 if (num == OVERFLOW_VALUE) { 01470 /* Give a strict mode warning. */ 01471 WARN("back-reference exceeds number of capturing parentheses\n"); 01472 01473 /* 01474 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax 01475 * error here. However, for compatibility with IE, we treat the 01476 * whole backref as flat if the first character in it is not a 01477 * valid octal character, and as an octal escape otherwise. 01478 */ 01479 state->cp = termStart; 01480 if (c >= '8') { 01481 /* Treat this as flat. termStart - 1 is the \. */ 01482 c = '\\'; 01483 goto asFlat; 01484 } 01485 01486 /* Treat this as an octal escape. */ 01487 goto doOctal; 01488 } 01489 assert(1 <= num && num <= 0x10000); 01490 state->result = NewRENode(state, REOP_BACKREF); 01491 if (!state->result) 01492 return FALSE; 01493 state->result->u.parenIndex = num - 1; 01494 state->progLength 01495 += 1 + GetCompactIndexWidth(state->result->u.parenIndex); 01496 break; 01497 /* Control escape */ 01498 case 'f': 01499 c = 0xC; 01500 goto doFlat; 01501 case 'n': 01502 c = 0xA; 01503 goto doFlat; 01504 case 'r': 01505 c = 0xD; 01506 goto doFlat; 01507 case 't': 01508 c = 0x9; 01509 goto doFlat; 01510 case 'v': 01511 c = 0xB; 01512 goto doFlat; 01513 /* Control letter */ 01514 case 'c': 01515 if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) { 01516 c = (WCHAR) (*state->cp++ & 0x1F); 01517 } else { 01518 /* back off to accepting the original '\' as a literal */ 01519 --state->cp; 01520 c = '\\'; 01521 } 01522 goto doFlat; 01523 /* HexEscapeSequence */ 01524 case 'x': 01525 nDigits = 2; 01526 goto lexHex; 01527 /* UnicodeEscapeSequence */ 01528 case 'u': 01529 nDigits = 4; 01530 lexHex: 01531 n = 0; 01532 for (i = 0; i < nDigits && state->cp < state->cpend; i++) { 01533 UINT digit; 01534 c = *state->cp++; 01535 if (!isASCIIHexDigit(c, &digit)) { 01536 /* 01537 * Back off to accepting the original 'u' or 'x' as a 01538 * literal. 01539 */ 01540 state->cp -= i + 2; 01541 n = *state->cp++; 01542 break; 01543 } 01544 n = (n << 4) | digit; 01545 } 01546 c = (WCHAR) n; 01547 goto doFlat; 01548 /* Character class escapes */ 01549 case 'd': 01550 state->result = NewRENode(state, REOP_DIGIT); 01551 doSimple: 01552 if (!state->result) 01553 return FALSE; 01554 state->progLength++; 01555 break; 01556 case 'D': 01557 state->result = NewRENode(state, REOP_NONDIGIT); 01558 goto doSimple; 01559 case 's': 01560 state->result = NewRENode(state, REOP_SPACE); 01561 goto doSimple; 01562 case 'S': 01563 state->result = NewRENode(state, REOP_NONSPACE); 01564 goto doSimple; 01565 case 'w': 01566 state->result = NewRENode(state, REOP_ALNUM); 01567 goto doSimple; 01568 case 'W': 01569 state->result = NewRENode(state, REOP_NONALNUM); 01570 goto doSimple; 01571 /* IdentityEscape */ 01572 default: 01573 state->result = NewRENode(state, REOP_FLAT); 01574 if (!state->result) 01575 return FALSE; 01576 state->result->u.flat.chr = c; 01577 state->result->u.flat.length = 1; 01578 state->result->kid = (void *) (state->cp - 1); 01579 state->progLength += 3; 01580 break; 01581 } 01582 break; 01583 case '[': 01584 state->result = NewRENode(state, REOP_CLASS); 01585 if (!state->result) 01586 return FALSE; 01587 termStart = state->cp; 01588 state->result->u.ucclass.startIndex = termStart - state->cpbegin; 01589 for (;;) { 01590 if (state->cp == state->cpend) { 01591 ReportRegExpErrorHelper(state, JSREPORT_ERROR, 01592 JSMSG_UNTERM_CLASS, termStart); 01593 01594 return FALSE; 01595 } 01596 if (*state->cp == '\\') { 01597 state->cp++; 01598 if (state->cp != state->cpend) 01599 state->cp++; 01600 continue; 01601 } 01602 if (*state->cp == ']') { 01603 state->result->u.ucclass.kidlen = state->cp - termStart; 01604 break; 01605 } 01606 state->cp++; 01607 } 01608 for (i = 0; i < CLASS_CACHE_SIZE; i++) { 01609 if (!state->classCache[i].start) { 01610 state->classCache[i].start = termStart; 01611 state->classCache[i].length = state->result->u.ucclass.kidlen; 01612 state->classCache[i].index = state->classCount; 01613 break; 01614 } 01615 if (state->classCache[i].length == 01616 state->result->u.ucclass.kidlen) { 01617 for (n = 0; ; n++) { 01618 if (n == state->classCache[i].length) { 01619 state->result->u.ucclass.index 01620 = state->classCache[i].index; 01621 goto claim; 01622 } 01623 if (state->classCache[i].start[n] != termStart[n]) 01624 break; 01625 } 01626 } 01627 } 01628 state->result->u.ucclass.index = state->classCount++; 01629 01630 claim: 01631 /* 01632 * Call CalculateBitmapSize now as we want any errors it finds 01633 * to be reported during the parse phase, not at execution. 01634 */ 01635 if (!CalculateBitmapSize(state, state->result, termStart, state->cp++)) 01636 return FALSE; 01637 /* 01638 * Update classBitmapsMem with number of bytes to hold bmsize bits, 01639 * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8 01640 * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize. 01641 */ 01642 n = (state->result->u.ucclass.bmsize >> 3) + 1; 01643 if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) { 01644 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX); 01645 return FALSE; 01646 } 01647 state->classBitmapsMem += n; 01648 /* CLASS, <index> */ 01649 state->progLength 01650 += 1 + GetCompactIndexWidth(state->result->u.ucclass.index); 01651 break; 01652 01653 case '.': 01654 state->result = NewRENode(state, REOP_DOT); 01655 goto doSimple; 01656 01657 case '{': 01658 { 01659 const WCHAR *errp = state->cp--; 01660 INT err; 01661 01662 err = ParseMinMaxQuantifier(state, TRUE); 01663 state->cp = errp; 01664 01665 if (err < 0) 01666 goto asFlat; 01667 01668 /* FALL THROUGH */ 01669 } 01670 case '*': 01671 case '+': 01672 case '?': 01673 ReportRegExpErrorHelper(state, JSREPORT_ERROR, 01674 JSMSG_BAD_QUANTIFIER, state->cp - 1); 01675 return FALSE; 01676 default: 01677 asFlat: 01678 state->result = NewRENode(state, REOP_FLAT); 01679 if (!state->result) 01680 return FALSE; 01681 state->result->u.flat.chr = c; 01682 state->result->u.flat.length = 1; 01683 state->result->kid = (void *) (state->cp - 1); 01684 state->progLength += 3; 01685 break; 01686 } 01687 return ParseQuantifier(state); 01688 } 01689 01690 /* 01691 * Top-down regular expression grammar, based closely on Perl4. 01692 * 01693 * regexp: altern A regular expression is one or more 01694 * altern '|' regexp alternatives separated by vertical bar. 01695 */ 01696 #define INITIAL_STACK_SIZE 128 01697 01698 static BOOL 01699 ParseRegExp(CompilerState *state) 01700 { 01701 size_t parenIndex; 01702 RENode *operand; 01703 REOpData *operatorStack; 01704 RENode **operandStack; 01705 REOp op; 01706 INT i; 01707 BOOL result = FALSE; 01708 01709 INT operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE; 01710 INT operandSP = 0, operandStackSize = INITIAL_STACK_SIZE; 01711 01712 /* Watch out for empty regexp */ 01713 if (state->cp == state->cpend) { 01714 state->result = NewRENode(state, REOP_EMPTY); 01715 return (state->result != NULL); 01716 } 01717 01718 operatorStack = heap_alloc(sizeof(REOpData) * operatorStackSize); 01719 if (!operatorStack) 01720 return FALSE; 01721 01722 operandStack = heap_alloc(sizeof(RENode *) * operandStackSize); 01723 if (!operandStack) 01724 goto out; 01725 01726 for (;;) { 01727 parenIndex = state->parenCount; 01728 if (state->cp == state->cpend) { 01729 /* 01730 * If we are at the end of the regexp and we're short one or more 01731 * operands, the regexp must have the form /x|/ or some such, with 01732 * left parentheses making us short more than one operand. 01733 */ 01734 if (operatorSP >= operandSP) { 01735 operand = NewRENode(state, REOP_EMPTY); 01736 if (!operand) 01737 goto out; 01738 goto pushOperand; 01739 } 01740 } else { 01741 switch (*state->cp) { 01742 case '(': 01743 ++state->cp; 01744 if (state->cp + 1 < state->cpend && 01745 *state->cp == '?' && 01746 (state->cp[1] == '=' || 01747 state->cp[1] == '!' || 01748 state->cp[1] == ':')) { 01749 switch (state->cp[1]) { 01750 case '=': 01751 op = REOP_ASSERT; 01752 /* ASSERT, <next>, ... ASSERTTEST */ 01753 state->progLength += 4; 01754 break; 01755 case '!': 01756 op = REOP_ASSERT_NOT; 01757 /* ASSERTNOT, <next>, ... ASSERTNOTTEST */ 01758 state->progLength += 4; 01759 break; 01760 default: 01761 op = REOP_LPARENNON; 01762 break; 01763 } 01764 state->cp += 2; 01765 } else { 01766 op = REOP_LPAREN; 01767 /* LPAREN, <index>, ... RPAREN, <index> */ 01768 state->progLength 01769 += 2 * (1 + GetCompactIndexWidth(parenIndex)); 01770 state->parenCount++; 01771 if (state->parenCount == 65535) { 01772 ReportRegExpError(state, JSREPORT_ERROR, 01773 JSMSG_TOO_MANY_PARENS); 01774 goto out; 01775 } 01776 } 01777 goto pushOperator; 01778 01779 case ')': 01780 /* 01781 * If there's no stacked open parenthesis, throw syntax error. 01782 */ 01783 for (i = operatorSP - 1; ; i--) { 01784 if (i < 0) { 01785 ReportRegExpError(state, JSREPORT_ERROR, 01786 JSMSG_UNMATCHED_RIGHT_PAREN); 01787 goto out; 01788 } 01789 if (operatorStack[i].op == REOP_ASSERT || 01790 operatorStack[i].op == REOP_ASSERT_NOT || 01791 operatorStack[i].op == REOP_LPARENNON || 01792 operatorStack[i].op == REOP_LPAREN) { 01793 break; 01794 } 01795 } 01796 /* FALL THROUGH */ 01797 01798 case '|': 01799 /* Expected an operand before these, so make an empty one */ 01800 operand = NewRENode(state, REOP_EMPTY); 01801 if (!operand) 01802 goto out; 01803 goto pushOperand; 01804 01805 default: 01806 if (!ParseTerm(state)) 01807 goto out; 01808 operand = state->result; 01809 pushOperand: 01810 if (operandSP == operandStackSize) { 01811 RENode **tmp; 01812 operandStackSize += operandStackSize; 01813 tmp = heap_realloc(operandStack, sizeof(RENode *) * operandStackSize); 01814 if (!tmp) 01815 goto out; 01816 operandStack = tmp; 01817 } 01818 operandStack[operandSP++] = operand; 01819 break; 01820 } 01821 } 01822 01823 /* At the end; process remaining operators. */ 01824 restartOperator: 01825 if (state->cp == state->cpend) { 01826 while (operatorSP) { 01827 --operatorSP; 01828 if (!ProcessOp(state, &operatorStack[operatorSP], 01829 operandStack, operandSP)) 01830 goto out; 01831 --operandSP; 01832 } 01833 assert(operandSP == 1); 01834 state->result = operandStack[0]; 01835 result = TRUE; 01836 goto out; 01837 } 01838 01839 switch (*state->cp) { 01840 case '|': 01841 /* Process any stacked 'concat' operators */ 01842 ++state->cp; 01843 while (operatorSP && 01844 operatorStack[operatorSP - 1].op == REOP_CONCAT) { 01845 --operatorSP; 01846 if (!ProcessOp(state, &operatorStack[operatorSP], 01847 operandStack, operandSP)) { 01848 goto out; 01849 } 01850 --operandSP; 01851 } 01852 op = REOP_ALT; 01853 goto pushOperator; 01854 01855 case ')': 01856 /* 01857 * If there's no stacked open parenthesis, throw syntax error. 01858 */ 01859 for (i = operatorSP - 1; ; i--) { 01860 if (i < 0) { 01861 ReportRegExpError(state, JSREPORT_ERROR, 01862 JSMSG_UNMATCHED_RIGHT_PAREN); 01863 goto out; 01864 } 01865 if (operatorStack[i].op == REOP_ASSERT || 01866 operatorStack[i].op == REOP_ASSERT_NOT || 01867 operatorStack[i].op == REOP_LPARENNON || 01868 operatorStack[i].op == REOP_LPAREN) { 01869 break; 01870 } 01871 } 01872 ++state->cp; 01873 01874 /* Process everything on the stack until the open parenthesis. */ 01875 for (;;) { 01876 assert(operatorSP); 01877 --operatorSP; 01878 switch (operatorStack[operatorSP].op) { 01879 case REOP_ASSERT: 01880 case REOP_ASSERT_NOT: 01881 case REOP_LPAREN: 01882 operand = NewRENode(state, operatorStack[operatorSP].op); 01883 if (!operand) 01884 goto out; 01885 operand->u.parenIndex = 01886 operatorStack[operatorSP].parenIndex; 01887 assert(operandSP); 01888 operand->kid = operandStack[operandSP - 1]; 01889 operandStack[operandSP - 1] = operand; 01890 if (state->treeDepth == TREE_DEPTH_MAX) { 01891 ReportRegExpError(state, JSREPORT_ERROR, 01892 JSMSG_REGEXP_TOO_COMPLEX); 01893 goto out; 01894 } 01895 ++state->treeDepth; 01896 /* FALL THROUGH */ 01897 01898 case REOP_LPARENNON: 01899 state->result = operandStack[operandSP - 1]; 01900 if (!ParseQuantifier(state)) 01901 goto out; 01902 operandStack[operandSP - 1] = state->result; 01903 goto restartOperator; 01904 default: 01905 if (!ProcessOp(state, &operatorStack[operatorSP], 01906 operandStack, operandSP)) 01907 goto out; 01908 --operandSP; 01909 break; 01910 } 01911 } 01912 break; 01913 01914 case '{': 01915 { 01916 const WCHAR *errp = state->cp; 01917 01918 if (ParseMinMaxQuantifier(state, TRUE) < 0) { 01919 /* 01920 * This didn't even scan correctly as a quantifier, so we should 01921 * treat it as flat. 01922 */ 01923 op = REOP_CONCAT; 01924 goto pushOperator; 01925 } 01926 01927 state->cp = errp; 01928 /* FALL THROUGH */ 01929 } 01930 01931 case '+': 01932 case '*': 01933 case '?': 01934 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER, 01935 state->cp); 01936 result = FALSE; 01937 goto out; 01938 01939 default: 01940 /* Anything else is the start of the next term. */ 01941 op = REOP_CONCAT; 01942 pushOperator: 01943 if (operatorSP == operatorStackSize) { 01944 REOpData *tmp; 01945 operatorStackSize += operatorStackSize; 01946 tmp = heap_realloc(operatorStack, sizeof(REOpData) * operatorStackSize); 01947 if (!tmp) 01948 goto out; 01949 operatorStack = tmp; 01950 } 01951 operatorStack[operatorSP].op = op; 01952 operatorStack[operatorSP].errPos = state->cp; 01953 operatorStack[operatorSP++].parenIndex = parenIndex; 01954 break; 01955 } 01956 } 01957 out: 01958 heap_free(operatorStack); 01959 heap_free(operandStack); 01960 return result; 01961 } 01962 01963 /* 01964 * Save the current state of the match - the position in the input 01965 * text as well as the position in the bytecode. The state of any 01966 * parent expressions is also saved (preceding state). 01967 * Contents of parenCount parentheses from parenIndex are also saved. 01968 */ 01969 static REBackTrackData * 01970 PushBackTrackState(REGlobalData *gData, REOp op, 01971 jsbytecode *target, REMatchState *x, const WCHAR *cp, 01972 size_t parenIndex, size_t parenCount) 01973 { 01974 size_t i; 01975 REBackTrackData *result = 01976 (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz); 01977 01978 size_t sz = sizeof(REBackTrackData) + 01979 gData->stateStackTop * sizeof(REProgState) + 01980 parenCount * sizeof(RECapture); 01981 01982 ptrdiff_t btsize = gData->backTrackStackSize; 01983 ptrdiff_t btincr = ((char *)result + sz) - 01984 ((char *)gData->backTrackStack + btsize); 01985 01986 TRACE("\tBT_Push: %lu,%lu\n", (ULONG_PTR)parenIndex, (ULONG_PTR)parenCount); 01987 01988 JS_COUNT_OPERATION(gData->cx, JSOW_JUMP * (1 + parenCount)); 01989 if (btincr > 0) { 01990 ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack; 01991 01992 JS_COUNT_OPERATION(gData->cx, JSOW_ALLOCATION); 01993 btincr = ((btincr+btsize-1)/btsize)*btsize; 01994 gData->backTrackStack = jsheap_grow(gData->pool, gData->backTrackStack, btsize, btincr); 01995 if (!gData->backTrackStack) { 01996 js_ReportOutOfScriptQuota(gData->cx); 01997 gData->ok = FALSE; 01998 return NULL; 01999 } 02000 gData->backTrackStackSize = btsize + btincr; 02001 result = (REBackTrackData *) ((char *)gData->backTrackStack + offset); 02002 } 02003 gData->backTrackSP = result; 02004 result->sz = gData->cursz; 02005 gData->cursz = sz; 02006 02007 result->backtrack_op = op; 02008 result->backtrack_pc = target; 02009 result->cp = cp; 02010 result->parenCount = parenCount; 02011 result->parenIndex = parenIndex; 02012 02013 result->saveStateStackTop = gData->stateStackTop; 02014 assert(gData->stateStackTop); 02015 memcpy(result + 1, gData->stateStack, 02016 sizeof(REProgState) * result->saveStateStackTop); 02017 02018 if (parenCount != 0) { 02019 memcpy((char *)(result + 1) + 02020 sizeof(REProgState) * result->saveStateStackTop, 02021 &x->parens[parenIndex], 02022 sizeof(RECapture) * parenCount); 02023 for (i = 0; i != parenCount; i++) 02024 x->parens[parenIndex + i].index = -1; 02025 } 02026 02027 return result; 02028 } 02029 02030 static inline REMatchState * 02031 FlatNIMatcher(REGlobalData *gData, REMatchState *x, WCHAR *matchChars, 02032 size_t length) 02033 { 02034 size_t i; 02035 assert(gData->cpend >= x->cp); 02036 if (length > (size_t)(gData->cpend - x->cp)) 02037 return NULL; 02038 for (i = 0; i != length; i++) { 02039 if (toupperW(matchChars[i]) != toupperW(x->cp[i])) 02040 return NULL; 02041 } 02042 x->cp += length; 02043 return x; 02044 } 02045 02046 /* 02047 * 1. Evaluate DecimalEscape to obtain an EscapeValue E. 02048 * 2. If E is not a character then go to step 6. 02049 * 3. Let ch be E's character. 02050 * 4. Let A be a one-element RECharSet containing the character ch. 02051 * 5. Call CharacterSetMatcher(A, false) and return its Matcher result. 02052 * 6. E must be an integer. Let n be that integer. 02053 * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception. 02054 * 8. Return an internal Matcher closure that takes two arguments, a State x 02055 * and a Continuation c, and performs the following: 02056 * 1. Let cap be x's captures internal array. 02057 * 2. Let s be cap[n]. 02058 * 3. If s is undefined, then call c(x) and return its result. 02059 * 4. Let e be x's endIndex. 02060 * 5. Let len be s's length. 02061 * 6. Let f be e+len. 02062 * 7. If f>InputLength, return failure. 02063 * 8. If there exists an integer i between 0 (inclusive) and len (exclusive) 02064 * such that Canonicalize(s[i]) is not the same character as 02065 * Canonicalize(Input [e+i]), then return failure. 02066 * 9. Let y be the State (f, cap). 02067 * 10. Call c(y) and return its result. 02068 */ 02069 static REMatchState * 02070 BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex) 02071 { 02072 size_t len, i; 02073 const WCHAR *parenContent; 02074 RECapture *cap = &x->parens[parenIndex]; 02075 02076 if (cap->index == -1) 02077 return x; 02078 02079 len = cap->length; 02080 if (x->cp + len > gData->cpend) 02081 return NULL; 02082 02083 parenContent = &gData->cpbegin[cap->index]; 02084 if (gData->regexp->flags & JSREG_FOLD) { 02085 for (i = 0; i < len; i++) { 02086 if (toupperW(parenContent[i]) != toupperW(x->cp[i])) 02087 return NULL; 02088 } 02089 } else { 02090 for (i = 0; i < len; i++) { 02091 if (parenContent[i] != x->cp[i]) 02092 return NULL; 02093 } 02094 } 02095 x->cp += len; 02096 return x; 02097 } 02098 02099 /* Add a single character to the RECharSet */ 02100 static void 02101 AddCharacterToCharSet(RECharSet *cs, WCHAR c) 02102 { 02103 UINT byteIndex = (UINT)(c >> 3); 02104 assert(c <= cs->length); 02105 cs->u.bits[byteIndex] |= 1 << (c & 0x7); 02106 } 02107 02108 02109 /* Add a character range, c1 to c2 (inclusive) to the RECharSet */ 02110 static void 02111 AddCharacterRangeToCharSet(RECharSet *cs, UINT c1, UINT c2) 02112 { 02113 UINT i; 02114 02115 UINT byteIndex1 = c1 >> 3; 02116 UINT byteIndex2 = c2 >> 3; 02117 02118 assert(c2 <= cs->length && c1 <= c2); 02119 02120 c1 &= 0x7; 02121 c2 &= 0x7; 02122 02123 if (byteIndex1 == byteIndex2) { 02124 cs->u.bits[byteIndex1] |= ((BYTE)0xFF >> (7 - (c2 - c1))) << c1; 02125 } else { 02126 cs->u.bits[byteIndex1] |= 0xFF << c1; 02127 for (i = byteIndex1 + 1; i < byteIndex2; i++) 02128 cs->u.bits[i] = 0xFF; 02129 cs->u.bits[byteIndex2] |= (BYTE)0xFF >> (7 - c2); 02130 } 02131 } 02132 02133 /* Compile the source of the class into a RECharSet */ 02134 static BOOL 02135 ProcessCharSet(REGlobalData *gData, RECharSet *charSet) 02136 { 02137 const WCHAR *src, *end; 02138 BOOL inRange = FALSE; 02139 WCHAR rangeStart = 0; 02140 UINT byteLength, n; 02141 WCHAR c, thisCh; 02142 INT nDigits, i; 02143 02144 assert(!charSet->converted); 02145 /* 02146 * Assert that startIndex and length points to chars inside [] inside 02147 * source string. 02148 */ 02149 assert(1 <= charSet->u.src.startIndex); 02150 assert(charSet->u.src.startIndex 02151 < SysStringLen(gData->regexp->source)); 02152 assert(charSet->u.src.length <= SysStringLen(gData->regexp->source) 02153 - 1 - charSet->u.src.startIndex); 02154 02155 charSet->converted = TRUE; 02156 src = gData->regexp->source + charSet->u.src.startIndex; 02157 02158 end = src + charSet->u.src.length; 02159 02160 assert(src[-1] == '[' && end[0] == ']'); 02161 02162 byteLength = (charSet->length >> 3) + 1; 02163 charSet->u.bits = heap_alloc(byteLength); 02164 if (!charSet->u.bits) { 02165 JS_ReportOutOfMemory(gData->cx); 02166 gData->ok = FALSE; 02167 return FALSE; 02168 } 02169 memset(charSet->u.bits, 0, byteLength); 02170 02171 if (src == end) 02172 return TRUE; 02173 02174 if (*src == '^') { 02175 assert(charSet->sense == FALSE); 02176 ++src; 02177 } else { 02178 assert(charSet->sense == TRUE); 02179 } 02180 02181 while (src != end) { 02182 switch (*src) { 02183 case '\\': 02184 ++src; 02185 c = *src++; 02186 switch (c) { 02187 case 'b': 02188 thisCh = 0x8; 02189 break; 02190 case 'f': 02191 thisCh = 0xC; 02192 break; 02193 case 'n': 02194 thisCh = 0xA; 02195 break; 02196 case 'r': 02197 thisCh = 0xD; 02198 break; 02199 case 't': 02200 thisCh = 0x9; 02201 break; 02202 case 'v': 02203 thisCh = 0xB; 02204 break; 02205 case 'c': 02206 if (src < end && JS_ISWORD(*src)) { 02207 thisCh = (WCHAR)(*src++ & 0x1F); 02208 } else { 02209 --src; 02210 thisCh = '\\'; 02211 } 02212 break; 02213 case 'x': 02214 nDigits = 2; 02215 goto lexHex; 02216 case 'u': 02217 nDigits = 4; 02218 lexHex: 02219 n = 0; 02220 for (i = 0; (i < nDigits) && (src < end); i++) { 02221 UINT digit; 02222 c = *src++; 02223 if (!isASCIIHexDigit(c, &digit)) { 02224 /* 02225 * Back off to accepting the original '\' 02226 * as a literal 02227 */ 02228 src -= i + 1; 02229 n = '\\'; 02230 break; 02231 } 02232 n = (n << 4) | digit; 02233 } 02234 thisCh = (WCHAR)n; 02235 break; 02236 case '0': 02237 case '1': 02238 case '2': 02239 case '3': 02240 case '4': 02241 case '5': 02242 case '6': 02243 case '7': 02244 /* 02245 * This is a non-ECMA extension - decimal escapes (in this 02246 * case, octal!) are supposed to be an error inside class 02247 * ranges, but supported here for backwards compatibility. 02248 */ 02249 n = JS7_UNDEC(c); 02250 c = *src; 02251 if ('0' <= c && c <= '7') { 02252 src++; 02253 n = 8 * n + JS7_UNDEC(c); 02254 c = *src; 02255 if ('0' <= c && c <= '7') { 02256 src++; 02257 i = 8 * n + JS7_UNDEC(c); 02258 if (i <= 0377) 02259 n = i; 02260 else 02261 src--; 02262 } 02263 } 02264 thisCh = (WCHAR)n; 02265 break; 02266 02267 case 'd': 02268 AddCharacterRangeToCharSet(charSet, '0', '9'); 02269 continue; /* don't need range processing */ 02270 case 'D': 02271 AddCharacterRangeToCharSet(charSet, 0, '0' - 1); 02272 AddCharacterRangeToCharSet(charSet, 02273 (WCHAR)('9' + 1), 02274 (WCHAR)charSet->length); 02275 continue; 02276 case 's': 02277 for (i = (INT)charSet->length; i >= 0; i--) 02278 if (isspaceW(i)) 02279 AddCharacterToCharSet(charSet, (WCHAR)i); 02280 continue; 02281 case 'S': 02282 for (i = (INT)charSet->length; i >= 0; i--) 02283 if (!isspaceW(i)) 02284 AddCharacterToCharSet(charSet, (WCHAR)i); 02285 continue; 02286 case 'w': 02287 for (i = (INT)charSet->length; i >= 0; i--) 02288 if (JS_ISWORD(i)) 02289 AddCharacterToCharSet(charSet, (WCHAR)i); 02290 continue; 02291 case 'W': 02292 for (i = (INT)charSet->length; i >= 0; i--) 02293 if (!JS_ISWORD(i)) 02294 AddCharacterToCharSet(charSet, (WCHAR)i); 02295 continue; 02296 default: 02297 thisCh = c; 02298 break; 02299 02300 } 02301 break; 02302 02303 default: 02304 thisCh = *src++; 02305 break; 02306 02307 } 02308 if (inRange) { 02309 if (gData->regexp->flags & JSREG_FOLD) { 02310 int i; 02311 02312 assert(rangeStart <= thisCh); 02313 for (i = rangeStart; i <= thisCh; i++) { 02314 WCHAR uch, dch; 02315 02316 AddCharacterToCharSet(charSet, i); 02317 uch = toupperW(i); 02318 dch = tolowerW(i); 02319 if (i != uch) 02320 AddCharacterToCharSet(charSet, uch); 02321 if (i != dch) 02322 AddCharacterToCharSet(charSet, dch); 02323 } 02324 } else { 02325 AddCharacterRangeToCharSet(charSet, rangeStart, thisCh); 02326 } 02327 inRange = FALSE; 02328 } else { 02329 if (gData->regexp->flags & JSREG_FOLD) { 02330 AddCharacterToCharSet(charSet, toupperW(thisCh)); 02331 AddCharacterToCharSet(charSet, tolowerW(thisCh)); 02332 } else { 02333 AddCharacterToCharSet(charSet, thisCh); 02334 } 02335 if (src < end - 1) { 02336 if (*src == '-') { 02337 ++src; 02338 inRange = TRUE; 02339 rangeStart = thisCh; 02340 } 02341 } 02342 } 02343 } 02344 return TRUE; 02345 } 02346 02347 static BOOL 02348 ReallocStateStack(REGlobalData *gData) 02349 { 02350 size_t limit = gData->stateStackLimit; 02351 size_t sz = sizeof(REProgState) * limit; 02352 02353 gData->stateStack = jsheap_grow(gData->pool, gData->stateStack, sz, sz); 02354 if (!gData->stateStack) { 02355 js_ReportOutOfScriptQuota(gData->cx); 02356 gData->ok = FALSE; 02357 return FALSE; 02358 } 02359 gData->stateStackLimit = limit + limit; 02360 return TRUE; 02361 } 02362 02363 #define PUSH_STATE_STACK(data) \ 02364 do { \ 02365 ++(data)->stateStackTop; \ 02366 if ((data)->stateStackTop == (data)->stateStackLimit && \ 02367 !ReallocStateStack((data))) { \ 02368 return NULL; \ 02369 } \ 02370 }while(0) 02371 02372 /* 02373 * Apply the current op against the given input to see if it's going to match 02374 * or fail. Return false if we don't get a match, true if we do. If updatecp is 02375 * true, then update the current state's cp. Always update startpc to the next 02376 * op. 02377 */ 02378 static inline REMatchState * 02379 SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op, 02380 jsbytecode **startpc, BOOL updatecp) 02381 { 02382 REMatchState *result = NULL; 02383 WCHAR matchCh; 02384 size_t parenIndex; 02385 size_t offset, length, index; 02386 jsbytecode *pc = *startpc; /* pc has already been incremented past op */ 02387 WCHAR *source; 02388 const WCHAR *startcp = x->cp; 02389 WCHAR ch; 02390 RECharSet *charSet; 02391 02392 const char *opname = reop_names[op]; 02393 TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program), 02394 (int)gData->stateStackTop * 2, "", opname); 02395 02396 switch (op) { 02397 case REOP_EMPTY: 02398 result = x; 02399 break; 02400 case REOP_BOL: 02401 if (x->cp != gData->cpbegin) { 02402 if ( 02403 !(gData->regexp->flags & JSREG_MULTILINE)) { 02404 break; 02405 } 02406 if (!RE_IS_LINE_TERM(x->cp[-1])) 02407 break; 02408 } 02409 result = x; 02410 break; 02411 case REOP_EOL: 02412 if (x->cp != gData->cpend) { 02413 if ( 02414 !(gData->regexp->flags & JSREG_MULTILINE)) { 02415 break; 02416 } 02417 if (!RE_IS_LINE_TERM(*x->cp)) 02418 break; 02419 } 02420 result = x; 02421 break; 02422 case REOP_WBDRY: 02423 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^ 02424 !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) { 02425 result = x; 02426 } 02427 break; 02428 case REOP_WNONBDRY: 02429 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^ 02430 (x->cp != gData->cpend && JS_ISWORD(*x->cp))) { 02431 result = x; 02432 } 02433 break; 02434 case REOP_DOT: 02435 if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) { 02436 result = x; 02437 result->cp++; 02438 } 02439 break; 02440 case REOP_DIGIT: 02441 if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) { 02442 result = x; 02443 result->cp++; 02444 } 02445 break; 02446 case REOP_NONDIGIT: 02447 if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) { 02448 result = x; 02449 result->cp++; 02450 } 02451 break; 02452 case REOP_ALNUM: 02453 if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) { 02454 result = x; 02455 result->cp++; 02456 } 02457 break; 02458 case REOP_NONALNUM: 02459 if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) { 02460 result = x; 02461 result->cp++; 02462 } 02463 break; 02464 case REOP_SPACE: 02465 if (x->cp != gData->cpend && isspaceW(*x->cp)) { 02466 result = x; 02467 result->cp++; 02468 } 02469 break; 02470 case REOP_NONSPACE: 02471 if (x->cp != gData->cpend && !isspaceW(*x->cp)) { 02472 result = x; 02473 result->cp++; 02474 } 02475 break; 02476 case REOP_BACKREF: 02477 pc = ReadCompactIndex(pc, &parenIndex); 02478 assert(parenIndex < gData->regexp->parenCount); 02479 result = BackrefMatcher(gData, x, parenIndex); 02480 break; 02481 case REOP_FLAT: 02482 pc = ReadCompactIndex(pc, &offset); 02483 assert(offset < SysStringLen(gData->regexp->source)); 02484 pc = ReadCompactIndex(pc, &length); 02485 assert(1 <= length); 02486 assert(length <= SysStringLen(gData->regexp->source) - offset); 02487 if (length <= (size_t)(gData->cpend - x->cp)) { 02488 source = gData->regexp->source + offset; 02489 TRACE("%s\n", debugstr_wn(source, length)); 02490 for (index = 0; index != length; index++) { 02491 if (source[index] != x->cp[index]) 02492 return NULL; 02493 } 02494 x->cp += length; 02495 result = x; 02496 } 02497 break; 02498 case REOP_FLAT1: 02499 matchCh = *pc++; 02500 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp); 02501 if (x->cp != gData->cpend && *x->cp == matchCh) { 02502 result = x; 02503 result->cp++; 02504 } 02505 break; 02506 case REOP_FLATi: 02507 pc = ReadCompactIndex(pc, &offset); 02508 assert(offset < SysStringLen(gData->regexp->source)); 02509 pc = ReadCompactIndex(pc, &length); 02510 assert(1 <= length); 02511 assert(length <= SysStringLen(gData->regexp->source) - offset); 02512 source = gData->regexp->source; 02513 result = FlatNIMatcher(gData, x, source + offset, length); 02514 break; 02515 case REOP_FLAT1i: 02516 matchCh = *pc++; 02517 if (x->cp != gData->cpend && toupperW(*x->cp) == toupperW(matchCh)) { 02518 result = x; 02519 result->cp++; 02520 } 02521 break; 02522 case REOP_UCFLAT1: 02523 matchCh = GET_ARG(pc); 02524 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp); 02525 pc += ARG_LEN; 02526 if (x->cp != gData->cpend && *x->cp == matchCh) { 02527 result = x; 02528 result->cp++; 02529 } 02530 break; 02531 case REOP_UCFLAT1i: 02532 matchCh = GET_ARG(pc); 02533 pc += ARG_LEN; 02534 if (x->cp != gData->cpend && toupperW(*x->cp) == toupperW(matchCh)) { 02535 result = x; 02536 result->cp++; 02537 } 02538 break; 02539 case REOP_CLASS: 02540 pc = ReadCompactIndex(pc, &index); 02541 assert(index < gData->regexp->classCount); 02542 if (x->cp != gData->cpend) { 02543 charSet = &gData->regexp->classList[index]; 02544 assert(charSet->converted); 02545 ch = *x->cp; 02546 index = ch >> 3; 02547 if (charSet->length != 0 && 02548 ch <= charSet->length && 02549 (charSet->u.bits[index] & (1 << (ch & 0x7)))) { 02550 result = x; 02551 result->cp++; 02552 } 02553 } 02554 break; 02555 case REOP_NCLASS: 02556 pc = ReadCompactIndex(pc, &index); 02557 assert(index < gData->regexp->classCount); 02558 if (x->cp != gData->cpend) { 02559 charSet = &gData->regexp->classList[index]; 02560 assert(charSet->converted); 02561 ch = *x->cp; 02562 index = ch >> 3; 02563 if (charSet->length == 0 || 02564 ch > charSet->length || 02565 !(charSet->u.bits[index] & (1 << (ch & 0x7)))) { 02566 result = x; 02567 result->cp++; 02568 } 02569 } 02570 break; 02571 02572 default: 02573 assert(FALSE); 02574 } 02575 if (result) { 02576 if (!updatecp) 02577 x->cp = startcp; 02578 *startpc = pc; 02579 TRACE(" *\n"); 02580 return result; 02581 } 02582 x->cp = startcp; 02583 return NULL; 02584 } 02585 02586 static inline REMatchState * 02587 ExecuteREBytecode(REGlobalData *gData, REMatchState *x) 02588 { 02589 REMatchState *result = NULL; 02590 REBackTrackData *backTrackData; 02591 jsbytecode *nextpc, *testpc; 02592 REOp nextop; 02593 RECapture *cap; 02594 REProgState *curState; 02595 const WCHAR *startcp; 02596 size_t parenIndex, k; 02597 size_t parenSoFar = 0; 02598 02599 WCHAR matchCh1, matchCh2; 02600 RECharSet *charSet; 02601 02602 BOOL anchor; 02603 jsbytecode *pc = gData->regexp->program; 02604 REOp op = (REOp) *pc++; 02605 02606 /* 02607 * If the first node is a simple match, step the index into the string 02608 * until that match is made, or fail if it can't be found at all. 02609 */ 02610 if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & JSREG_STICKY)) { 02611 anchor = FALSE; 02612 while (x->cp <= gData->cpend) { 02613 nextpc = pc; /* reset back to start each time */ 02614 result = SimpleMatch(gData, x, op, &nextpc, TRUE); 02615 if (result) { 02616 anchor = TRUE; 02617 x = result; 02618 pc = nextpc; /* accept skip to next opcode */ 02619 op = (REOp) *pc++; 02620 assert(op < REOP_LIMIT); 02621 break; 02622 } 02623 gData->skipped++; 02624 x->cp++; 02625 } 02626 if (!anchor) 02627 goto bad; 02628 } 02629 02630 for (;;) { 02631 const char *opname = reop_names[op]; 02632 TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program), 02633 (int)gData->stateStackTop * 2, "", opname); 02634 02635 if (REOP_IS_SIMPLE(op)) { 02636 result = SimpleMatch(gData, x, op, &pc, TRUE); 02637 } else { 02638 curState = &gData->stateStack[gData->stateStackTop]; 02639 switch (op) { 02640 case REOP_END: 02641 goto good; 02642 case REOP_ALTPREREQ2: 02643 nextpc = pc + GET_OFFSET(pc); /* start of next op */ 02644 pc += ARG_LEN; 02645 matchCh2 = GET_ARG(pc); 02646 pc += ARG_LEN; 02647 k = GET_ARG(pc); 02648 pc += ARG_LEN; 02649 02650 if (x->cp != gData->cpend) { 02651 if (*x->cp == matchCh2) 02652 goto doAlt; 02653 02654 charSet = &gData->regexp->classList[k]; 02655 if (!charSet->converted && !ProcessCharSet(gData, charSet)) 02656 goto bad; 02657 matchCh1 = *x->cp; 02658 k = matchCh1 >> 3; 02659 if ((charSet->length == 0 || 02660 matchCh1 > charSet->length || 02661 !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^ 02662 charSet->sense) { 02663 goto doAlt; 02664 } 02665 } 02666 result = NULL; 02667 break; 02668 02669 case REOP_ALTPREREQ: 02670 nextpc = pc + GET_OFFSET(pc); /* start of next op */ 02671 pc += ARG_LEN; 02672 matchCh1 = GET_ARG(pc); 02673 pc += ARG_LEN; 02674 matchCh2 = GET_ARG(pc); 02675 pc += ARG_LEN; 02676 if (x->cp == gData->cpend || 02677 (*x->cp != matchCh1 && *x->cp != matchCh2)) { 02678 result = NULL; 02679 break; 02680 } 02681 /* else false thru... */ 02682 02683 case REOP_ALT: 02684 doAlt: 02685 nextpc = pc + GET_OFFSET(pc); /* start of next alternate */ 02686 pc += ARG_LEN; /* start of this alternate */ 02687 curState->parenSoFar = parenSoFar; 02688 PUSH_STATE_STACK(gData); 02689 op = (REOp) *pc++; 02690 startcp = x->cp; 02691 if (REOP_IS_SIMPLE(op)) { 02692 if (!SimpleMatch(gData, x, op, &pc, TRUE)) { 02693 op = (REOp) *nextpc++; 02694 pc = nextpc; 02695 continue; 02696 } 02697 result = x; 02698 op = (REOp) *pc++; 02699 } 02700 nextop = (REOp) *nextpc++; 02701 if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0)) 02702 goto bad; 02703 continue; 02704 02705 /* 02706 * Occurs at (successful) end of REOP_ALT, 02707 */ 02708 case REOP_JUMP: 02709 /* 02710 * If we have not gotten a result here, it is because of an 02711 * empty match. Do the same thing REOP_EMPTY would do. 02712 */ 02713 if (!result) 02714 result = x; 02715 02716 --gData->stateStackTop; 02717 pc += GET_OFFSET(pc); 02718 op = (REOp) *pc++; 02719 continue; 02720 02721 /* 02722 * Occurs at last (successful) end of REOP_ALT, 02723 */ 02724 case REOP_ENDALT: 02725 /* 02726 * If we have not gotten a result here, it is because of an 02727 * empty match. Do the same thing REOP_EMPTY would do. 02728 */ 02729 if (!result) 02730 result = x; 02731 02732 --gData->stateStackTop; 02733 op = (REOp) *pc++; 02734 continue; 02735 02736 case REOP_LPAREN: 02737 pc = ReadCompactIndex(pc, &parenIndex); 02738 TRACE("[ %lu ]\n", (ULONG_PTR)parenIndex); 02739 assert(parenIndex < gData->regexp->parenCount); 02740 if (parenIndex + 1 > parenSoFar) 02741 parenSoFar = parenIndex + 1; 02742 x->parens[parenIndex].index = x->cp - gData->cpbegin; 02743 x->parens[parenIndex].length = 0; 02744 op = (REOp) *pc++; 02745 continue; 02746 02747 case REOP_RPAREN: 02748 { 02749 ptrdiff_t delta; 02750 02751 pc = ReadCompactIndex(pc, &parenIndex); 02752 assert(parenIndex < gData->regexp->parenCount); 02753 cap = &x->parens[parenIndex]; 02754 delta = x->cp - (gData->cpbegin + cap->index); 02755 cap->length = (delta < 0) ? 0 : (size_t) delta; 02756 op = (REOp) *pc++; 02757 02758 if (!result) 02759 result = x; 02760 continue; 02761 } 02762 case REOP_ASSERT: 02763 nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */ 02764 pc += ARG_LEN; /* start of ASSERT child */ 02765 op = (REOp) *pc++; 02766 testpc = pc; 02767 if (REOP_IS_SIMPLE(op) && 02768 !SimpleMatch(gData, x, op, &testpc, FALSE)) { 02769 result = NULL; 02770 break; 02771 } 02772 curState->u.assertion.top = 02773 (char *)gData->backTrackSP - (char *)gData->backTrackStack; 02774 curState->u.assertion.sz = gData->cursz; 02775 curState->index = x->cp - gData->cpbegin; 02776 curState->parenSoFar = parenSoFar; 02777 PUSH_STATE_STACK(gData); 02778 if (!PushBackTrackState(gData, REOP_ASSERTTEST, 02779 nextpc, x, x->cp, 0, 0)) { 02780 goto bad; 02781 } 02782 continue; 02783 02784 case REOP_ASSERT_NOT: 02785 nextpc = pc + GET_OFFSET(pc); 02786 pc += ARG_LEN; 02787 op = (REOp) *pc++; 02788 testpc = pc; 02789 if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ && 02790 SimpleMatch(gData, x, op, &testpc, FALSE) && 02791 *testpc == REOP_ASSERTNOTTEST) { 02792 result = NULL; 02793 break; 02794 } 02795 curState->u.assertion.top 02796 = (char *)gData->backTrackSP - 02797 (char *)gData->backTrackStack; 02798 curState->u.assertion.sz = gData->cursz; 02799 curState->index = x->cp - gData->cpbegin; 02800 curState->parenSoFar = parenSoFar; 02801 PUSH_STATE_STACK(gData); 02802 if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST, 02803 nextpc, x, x->cp, 0, 0)) { 02804 goto bad; 02805 } 02806 continue; 02807 02808 case REOP_ASSERTTEST: 02809 --gData->stateStackTop; 02810 --curState; 02811 x->cp = gData->cpbegin + curState->index; 02812 gData->backTrackSP = 02813 (REBackTrackData *) ((char *)gData->backTrackStack + 02814 curState->u.assertion.top); 02815 gData->cursz = curState->u.assertion.sz; 02816 if (result) 02817 result = x; 02818 break; 02819 02820 case REOP_ASSERTNOTTEST: 02821 --gData->stateStackTop; 02822 --curState; 02823 x->cp = gData->cpbegin + curState->index; 02824 gData->backTrackSP = 02825 (REBackTrackData *) ((char *)gData->backTrackStack + 02826 curState->u.assertion.top); 02827 gData->cursz = curState->u.assertion.sz; 02828 result = (!result) ? x : NULL; 02829 break; 02830 case REOP_STAR: 02831 curState->u.quantifier.min = 0; 02832 curState->u.quantifier.max = (UINT)-1; 02833 goto quantcommon; 02834 case REOP_PLUS: 02835 curState->u.quantifier.min = 1; 02836 curState->u.quantifier.max = (UINT)-1; 02837 goto quantcommon; 02838 case REOP_OPT: 02839 curState->u.quantifier.min = 0; 02840 curState->u.quantifier.max = 1; 02841 goto quantcommon; 02842 case REOP_QUANT: 02843 pc = ReadCompactIndex(pc, &k); 02844 curState->u.quantifier.min = k; 02845 pc = ReadCompactIndex(pc, &k); 02846 /* max is k - 1 to use one byte for (UINT)-1 sentinel. */ 02847 curState->u.quantifier.max = k - 1; 02848 assert(curState->u.quantifier.min <= curState->u.quantifier.max); 02849 quantcommon: 02850 if (curState->u.quantifier.max == 0) { 02851 pc = pc + GET_OFFSET(pc); 02852 op = (REOp) *pc++; 02853 result = x; 02854 continue; 02855 } 02856 /* Step over <next> */ 02857 nextpc = pc + ARG_LEN; 02858 op = (REOp) *nextpc++; 02859 startcp = x->cp; 02860 if (REOP_IS_SIMPLE(op)) { 02861 if (!SimpleMatch(gData, x, op, &nextpc, TRUE)) { 02862 if (curState->u.quantifier.min == 0) 02863 result = x; 02864 else 02865 result = NULL; 02866 pc = pc + GET_OFFSET(pc); 02867 break; 02868 } 02869 op = (REOp) *nextpc++; 02870 result = x; 02871 } 02872 curState->index = startcp - gData->cpbegin; 02873 curState->continue_op = REOP_REPEAT; 02874 curState->continue_pc = pc; 02875 curState->parenSoFar = parenSoFar; 02876 PUSH_STATE_STACK(gData); 02877 if (curState->u.quantifier.min == 0 && 02878 !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp, 02879 0, 0)) { 02880 goto bad; 02881 } 02882 pc = nextpc; 02883 continue; 02884 02885 case REOP_ENDCHILD: /* marks the end of a quantifier child */ 02886 pc = curState[-1].continue_pc; 02887 op = (REOp) curState[-1].continue_op; 02888 02889 if (!result) 02890 result = x; 02891 continue; 02892 02893 case REOP_REPEAT: 02894 --curState; 02895 do { 02896 --gData->stateStackTop; 02897 if (!result) { 02898 /* Failed, see if we have enough children. */ 02899 if (curState->u.quantifier.min == 0) 02900 goto repeatDone; 02901 goto break_switch; 02902 } 02903 if (curState->u.quantifier.min == 0 && 02904 x->cp == gData->cpbegin + curState->index) { 02905 /* matched an empty string, that'll get us nowhere */ 02906 result = NULL; 02907 goto break_switch; 02908 } 02909 if (curState->u.quantifier.min != 0) 02910 curState->u.quantifier.min--; 02911 if (curState->u.quantifier.max != (UINT) -1) 02912 curState->u.quantifier.max--; 02913 if (curState->u.quantifier.max == 0) 02914 goto repeatDone; 02915 nextpc = pc + ARG_LEN; 02916 nextop = (REOp) *nextpc; 02917 startcp = x->cp; 02918 if (REOP_IS_SIMPLE(nextop)) { 02919 nextpc++; 02920 if (!SimpleMatch(gData, x, nextop, &nextpc, TRUE)) { 02921 if (curState->u.quantifier.min == 0) 02922 goto repeatDone; 02923 result = NULL; 02924 goto break_switch; 02925 } 02926 result = x; 02927 } 02928 curState->index = startcp - gData->cpbegin; 02929 PUSH_STATE_STACK(gData); 02930 if (curState->u.quantifier.min == 0 && 02931 !PushBackTrackState(gData, REOP_REPEAT, 02932 pc, x, startcp, 02933 curState->parenSoFar, 02934 parenSoFar - 02935 curState->parenSoFar)) { 02936 goto bad; 02937 } 02938 } while (*nextpc == REOP_ENDCHILD); 02939 pc = nextpc; 02940 op = (REOp) *pc++; 02941 parenSoFar = curState->parenSoFar; 02942 continue; 02943 02944 repeatDone: 02945 result = x; 02946 pc += GET_OFFSET(pc); 02947 goto break_switch; 02948 02949 case REOP_MINIMALSTAR: 02950 curState->u.quantifier.min = 0; 02951 curState->u.quantifier.max = (UINT)-1; 02952 goto minimalquantcommon; 02953 case REOP_MINIMALPLUS: 02954 curState->u.quantifier.min = 1; 02955 curState->u.quantifier.max = (UINT)-1; 02956 goto minimalquantcommon; 02957 case REOP_MINIMALOPT: 02958 curState->u.quantifier.min = 0; 02959 curState->u.quantifier.max = 1; 02960 goto minimalquantcommon; 02961 case REOP_MINIMALQUANT: 02962 pc = ReadCompactIndex(pc, &k); 02963 curState->u.quantifier.min = k; 02964 pc = ReadCompactIndex(pc, &k); 02965 /* See REOP_QUANT comments about k - 1. */ 02966 curState->u.quantifier.max = k - 1; 02967 assert(curState->u.quantifier.min 02968 <= curState->u.quantifier.max); 02969 minimalquantcommon: 02970 curState->index = x->cp - gData->cpbegin; 02971 curState->parenSoFar = parenSoFar; 02972 PUSH_STATE_STACK(gData); 02973 if (curState->u.quantifier.min != 0) { 02974 curState->continue_op = REOP_MINIMALREPEAT; 02975 curState->continue_pc = pc; 02976 /* step over <next> */ 02977 pc += OFFSET_LEN; 02978 op = (REOp) *pc++; 02979 } else { 02980 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT, 02981 pc, x, x->cp, 0, 0)) { 02982 goto bad; 02983 } 02984 --gData->stateStackTop; 02985 pc = pc + GET_OFFSET(pc); 02986 op = (REOp) *pc++; 02987 } 02988 continue; 02989 02990 case REOP_MINIMALREPEAT: 02991 --gData->stateStackTop; 02992 --curState; 02993 02994 TRACE("{%d,%d}\n", curState->u.quantifier.min, curState->u.quantifier.max); 02995 #define PREPARE_REPEAT() \ 02996 do { \ 02997 curState->index = x->cp - gData->cpbegin; \ 02998 curState->continue_op = REOP_MINIMALREPEAT; \ 02999 curState->continue_pc = pc; \ 03000 pc += ARG_LEN; \ 03001 for (k = curState->parenSoFar; k < parenSoFar; k++) \ 03002 x->parens[k].index = -1; \ 03003 PUSH_STATE_STACK(gData); \ 03004 op = (REOp) *pc++; \ 03005 assert(op < REOP_LIMIT); \ 03006 }while(0) 03007 03008 if (!result) { 03009 TRACE(" -\n"); 03010 /* 03011 * Non-greedy failure - try to consume another child. 03012 */ 03013 if (curState->u.quantifier.max == (UINT) -1 || 03014 curState->u.quantifier.max > 0) { 03015 PREPARE_REPEAT(); 03016 continue; 03017 } 03018 /* Don't need to adjust pc since we're going to pop. */ 03019 break; 03020 } 03021 if (curState->u.quantifier.min == 0 && 03022 x->cp == gData->cpbegin + curState->index) { 03023 /* Matched an empty string, that'll get us nowhere. */ 03024 result = NULL; 03025 break; 03026 } 03027 if (curState->u.quantifier.min != 0) 03028 curState->u.quantifier.min--; 03029 if (curState->u.quantifier.max != (UINT) -1) 03030 curState->u.quantifier.max--; 03031 if (curState->u.quantifier.min != 0) { 03032 PREPARE_REPEAT(); 03033 continue; 03034 } 03035 curState->index = x->cp - gData->cpbegin; 03036 curState->parenSoFar = parenSoFar; 03037 PUSH_STATE_STACK(gData); 03038 if (!PushBackTrackState(gData, REOP_MINIMALREPEAT, 03039 pc, x, x->cp, 03040 curState->parenSoFar, 03041 parenSoFar - curState->parenSoFar)) { 03042 goto bad; 03043 } 03044 --gData->stateStackTop; 03045 pc = pc + GET_OFFSET(pc); 03046 op = (REOp) *pc++; 03047 assert(op < REOP_LIMIT); 03048 continue; 03049 default: 03050 assert(FALSE); 03051 result = NULL; 03052 } 03053 break_switch:; 03054 } 03055 03056 /* 03057 * If the match failed and there's a backtrack option, take it. 03058 * Otherwise this is a complete and utter failure. 03059 */ 03060 if (!result) { 03061 if (gData->cursz == 0) 03062 return NULL; 03063 03064 /* Potentially detect explosive regex here. */ 03065 gData->backTrackCount++; 03066 if (gData->backTrackLimit && 03067 gData->backTrackCount >= gData->backTrackLimit) { 03068 JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL, 03069 JSMSG_REGEXP_TOO_COMPLEX); 03070 gData->ok = FALSE; 03071 return NULL; 03072 } 03073 03074 backTrackData = gData->backTrackSP; 03075 gData->cursz = backTrackData->sz; 03076 gData->backTrackSP = 03077 (REBackTrackData *) ((char *)backTrackData - backTrackData->sz); 03078 x->cp = backTrackData->cp; 03079 pc = backTrackData->backtrack_pc; 03080 op = (REOp) backTrackData->backtrack_op; 03081 assert(op < REOP_LIMIT); 03082 gData->stateStackTop = backTrackData->saveStateStackTop; 03083 assert(gData->stateStackTop); 03084 03085 memcpy(gData->stateStack, backTrackData + 1, 03086 sizeof(REProgState) * backTrackData->saveStateStackTop); 03087 curState = &gData->stateStack[gData->stateStackTop - 1]; 03088 03089 if (backTrackData->parenCount) { 03090 memcpy(&x->parens[backTrackData->parenIndex], 03091 (char *)(backTrackData + 1) + 03092 sizeof(REProgState) * backTrackData->saveStateStackTop, 03093 sizeof(RECapture) * backTrackData->parenCount); 03094 parenSoFar = backTrackData->parenIndex + backTrackData->parenCount; 03095 } else { 03096 for (k = curState->parenSoFar; k < parenSoFar; k++) 03097 x->parens[k].index = -1; 03098 parenSoFar = curState->parenSoFar; 03099 } 03100 03101 TRACE("\tBT_Pop: %ld,%ld\n", 03102 (ULONG_PTR)backTrackData->parenIndex, 03103 (ULONG_PTR)backTrackData->parenCount); 03104 continue; 03105 } 03106 x = result; 03107 03108 /* 03109 * Continue with the expression. 03110 */ 03111 op = (REOp)*pc++; 03112 assert(op < REOP_LIMIT); 03113 } 03114 03115 bad: 03116 TRACE("\n"); 03117 return NULL; 03118 03119 good: 03120 TRACE("\n"); 03121 return x; 03122 } 03123 03124 static REMatchState *MatchRegExp(REGlobalData *gData, REMatchState *x) 03125 { 03126 REMatchState *result; 03127 const WCHAR *cp = x->cp; 03128 const WCHAR *cp2; 03129 UINT j; 03130 03131 /* 03132 * Have to include the position beyond the last character 03133 * in order to detect end-of-input/line condition. 03134 */ 03135 for (cp2 = cp; cp2 <= gData->cpend; cp2++) { 03136 gData->skipped = cp2 - cp; 03137 x->cp = cp2; 03138 for (j = 0; j < gData->regexp->parenCount; j++) 03139 x->parens[j].index = -1; 03140 result = ExecuteREBytecode(gData, x); 03141 if (!gData->ok || result || (gData->regexp->flags & JSREG_STICKY)) 03142 return result; 03143 gData->backTrackSP = gData->backTrackStack; 03144 gData->cursz = 0; 03145 gData->stateStackTop = 0; 03146 cp2 = cp + gData->skipped; 03147 } 03148 return NULL; 03149 } 03150 03151 #define MIN_BACKTRACK_LIMIT 400000 03152 03153 static REMatchState *InitMatch(script_ctx_t *cx, REGlobalData *gData, JSRegExp *re, size_t length) 03154 { 03155 REMatchState *result; 03156 UINT i; 03157 03158 gData->backTrackStackSize = INITIAL_BACKTRACK; 03159 gData->backTrackStack = jsheap_alloc(gData->pool, INITIAL_BACKTRACK); 03160 if (!gData->backTrackStack) 03161 goto bad; 03162 03163 gData->backTrackSP = gData->backTrackStack; 03164 gData->cursz = 0; 03165 gData->backTrackCount = 0; 03166 gData->backTrackLimit = 0; 03167 03168 gData->stateStackLimit = INITIAL_STATESTACK; 03169 gData->stateStack = jsheap_alloc(gData->pool, sizeof(REProgState) * INITIAL_STATESTACK); 03170 if (!gData->stateStack) 03171 goto bad; 03172 03173 gData->stateStackTop = 0; 03174 gData->cx = cx; 03175 gData->regexp = re; 03176 gData->ok = TRUE; 03177 03178 result = jsheap_alloc(gData->pool, offsetof(REMatchState, parens) + re->parenCount * sizeof(RECapture)); 03179 if (!result) 03180 goto bad; 03181 03182 for (i = 0; i < re->classCount; i++) { 03183 if (!re->classList[i].converted && 03184 !ProcessCharSet(gData, &re->classList[i])) { 03185 return NULL; 03186 } 03187 } 03188 03189 return result; 03190 03191 bad: 03192 js_ReportOutOfScriptQuota(cx); 03193 gData->ok = FALSE; 03194 return NULL; 03195 } 03196 03197 static void 03198 js_DestroyRegExp(JSRegExp *re) 03199 { 03200 if (re->classList) { 03201 UINT i; 03202 for (i = 0; i < re->classCount; i++) { 03203 if (re->classList[i].converted) 03204 heap_free(re->classList[i].u.bits); 03205 re->classList[i].u.bits = NULL; 03206 } 03207 heap_free(re->classList); 03208 } 03209 heap_free(re); 03210 } 03211 03212 static JSRegExp * 03213 js_NewRegExp(script_ctx_t *cx, BSTR str, UINT flags, BOOL flat) 03214 { 03215 JSRegExp *re; 03216 jsheap_t *mark; 03217 CompilerState state; 03218 size_t resize; 03219 jsbytecode *endPC; 03220 UINT i; 03221 size_t len; 03222 03223 re = NULL; 03224 mark = jsheap_mark(&cx->tmp_heap); 03225 len = SysStringLen(str); 03226 03227 state.context = cx; 03228 state.cp = str; 03229 if (!state.cp) 03230 goto out; 03231 state.cpbegin = state.cp; 03232 state.cpend = state.cp + len; 03233 state.flags = flags; 03234 state.parenCount = 0; 03235 state.classCount = 0; 03236 state.progLength = 0; 03237 state.treeDepth = 0; 03238 state.classBitmapsMem = 0; 03239 for (i = 0; i < CLASS_CACHE_SIZE; i++) 03240 state.classCache[i].start = NULL; 03241 03242 if (len != 0 && flat) { 03243 state.result = NewRENode(&state, REOP_FLAT); 03244 if (!state.result) 03245 goto out; 03246 state.result->u.flat.chr = *state.cpbegin; 03247 state.result->u.flat.length = len; 03248 state.result->kid = (void *) state.cpbegin; 03249 /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */ 03250 state.progLength += 1 + GetCompactIndexWidth(0) 03251 + GetCompactIndexWidth(len); 03252 } else { 03253 if (!ParseRegExp(&state)) 03254 goto out; 03255 } 03256 resize = offsetof(JSRegExp, program) + state.progLength + 1; 03257 re = heap_alloc(resize); 03258 if (!re) 03259 goto out; 03260 03261 assert(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT); 03262 re->classCount = state.classCount; 03263 if (re->classCount) { 03264 re->classList = heap_alloc(re->classCount * sizeof(RECharSet)); 03265 if (!re->classList) { 03266 js_DestroyRegExp(re); 03267 re = NULL; 03268 goto out; 03269 } 03270 for (i = 0; i < re->classCount; i++) 03271 re->classList[i].converted = FALSE; 03272 } else { 03273 re->classList = NULL; 03274 } 03275 endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result); 03276 if (!endPC) { 03277 js_DestroyRegExp(re); 03278 re = NULL; 03279 goto out; 03280 } 03281 *endPC++ = REOP_END; 03282 /* 03283 * Check whether size was overestimated and shrink using realloc. 03284 * This is safe since no pointers to newly parsed regexp or its parts 03285 * besides re exist here. 03286 */ 03287 if ((size_t)(endPC - re->program) != state.progLength + 1) { 03288 JSRegExp *tmp; 03289 assert((size_t)(endPC - re->program) < state.progLength + 1); 03290 resize = offsetof(JSRegExp, program) + (endPC - re->program); 03291 tmp = heap_realloc(re, resize); 03292 if (tmp) 03293 re = tmp; 03294 } 03295 03296 re->flags = flags; 03297 re->parenCount = state.parenCount; 03298 re->source = str; 03299 03300 out: 03301 jsheap_clear(mark); 03302 return re; 03303 } 03304 03305 static inline RegExpInstance *regexp_from_vdisp(vdisp_t *vdisp) 03306 { 03307 return (RegExpInstance*)vdisp->u.jsdisp; 03308 } 03309 03310 static void set_last_index(RegExpInstance *This, DWORD last_index) 03311 { 03312 This->last_index = last_index; 03313 VariantClear(&This->last_index_var); 03314 num_set_val(&This->last_index_var, last_index); 03315 } 03316 03317 static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, DWORD rem_flags, 03318 const WCHAR *str, DWORD len, const WCHAR **cp, match_result_t **parens, DWORD *parens_size, 03319 DWORD *parens_cnt, match_result_t *ret) 03320 { 03321 REMatchState *x, *result; 03322 REGlobalData gData; 03323 DWORD matchlen; 03324 03325 gData.cpbegin = *cp; 03326 gData.cpend = str + len; 03327 gData.start = *cp-str; 03328 gData.skipped = 0; 03329 gData.pool = &ctx->tmp_heap; 03330 03331 x = InitMatch(NULL, &gData, regexp->jsregexp, gData.cpend - gData.cpbegin); 03332 if(!x) { 03333 WARN("InitMatch failed\n"); 03334 return E_FAIL; 03335 } 03336 03337 x->cp = *cp; 03338 result = MatchRegExp(&gData, x); 03339 if(!gData.ok) { 03340 WARN("MatchRegExp failed\n"); 03341 return E_FAIL; 03342 } 03343 03344 if(!result) { 03345 if(rem_flags & REM_RESET_INDEX) 03346 set_last_index(regexp, 0); 03347 return S_FALSE; 03348 } 03349 03350 if(parens) { 03351 if(regexp->jsregexp->parenCount > *parens_size) { 03352 match_result_t *new_parens; 03353 03354 if(*parens) 03355 new_parens = heap_realloc(*parens, sizeof(match_result_t)*regexp->jsregexp->parenCount); 03356 else 03357 new_parens = heap_alloc(sizeof(match_result_t)*regexp->jsregexp->parenCount); 03358 if(!new_parens) 03359 return E_OUTOFMEMORY; 03360 03361 *parens = new_parens; 03362 } 03363 } 03364 03365 /* FIXME: We often already have a copy of input string that we could use to store last match */ 03366 if(!(rem_flags & REM_NO_CTX_UPDATE) && 03367 (!ctx->last_match || len != SysStringLen(ctx->last_match) || strncmpW(ctx->last_match, str, len))) { 03368 BSTR last_match; 03369 03370 last_match = SysAllocStringLen(str, len); 03371 if(!last_match) 03372 return E_OUTOFMEMORY; 03373 SysFreeString(ctx->last_match); 03374 ctx->last_match = last_match; 03375 } 03376 03377 if(parens) { 03378 DWORD i; 03379 03380 *parens_cnt = regexp->jsregexp->parenCount; 03381 03382 for(i=0; i < regexp->jsregexp->parenCount; i++) { 03383 if(result->parens[i].index == -1) { 03384 (*parens)[i].str = NULL; 03385 (*parens)[i].len = 0; 03386 }else { 03387 (*parens)[i].str = *cp + result->parens[i].index; 03388 (*parens)[i].len = result->parens[i].length; 03389 } 03390 } 03391 } 03392 03393 matchlen = (result->cp-*cp) - gData.skipped; 03394 *cp = result->cp; 03395 ret->str = result->cp-matchlen; 03396 ret->len = matchlen; 03397 set_last_index(regexp, result->cp-str); 03398 03399 if(!(rem_flags & REM_NO_CTX_UPDATE)) { 03400 ctx->last_match_index = ret->str-str; 03401 ctx->last_match_length = matchlen; 03402 } 03403 03404 return S_OK; 03405 } 03406 03407 HRESULT regexp_match_next(script_ctx_t *ctx, DispatchEx *dispex, DWORD rem_flags, const WCHAR *str, 03408 DWORD len, const WCHAR **cp, match_result_t **parens, DWORD *parens_size, DWORD *parens_cnt, 03409 match_result_t *ret) 03410 { 03411 RegExpInstance *regexp = (RegExpInstance*)dispex; 03412 jsheap_t *mark; 03413 HRESULT hres; 03414 03415 if((rem_flags & REM_CHECK_GLOBAL) && !(regexp->jsregexp->flags & JSREG_GLOB)) 03416 return S_FALSE; 03417 03418 mark = jsheap_mark(&ctx->tmp_heap); 03419 03420 hres = do_regexp_match_next(ctx, regexp, rem_flags, str, len, cp, parens, parens_size, parens_cnt, ret); 03421 03422 jsheap_clear(mark); 03423 return hres; 03424 } 03425 03426 HRESULT regexp_match(script_ctx_t *ctx, DispatchEx *dispex, const WCHAR *str, DWORD len, BOOL gflag, 03427 match_result_t **match_result, DWORD *result_cnt) 03428 { 03429 RegExpInstance *This = (RegExpInstance*)dispex; 03430 match_result_t *ret = NULL, cres; 03431 const WCHAR *cp = str; 03432 DWORD i=0, ret_size = 0; 03433 jsheap_t *mark; 03434 HRESULT hres; 03435 03436 mark = jsheap_mark(&ctx->tmp_heap); 03437 03438 while(1) { 03439 hres = do_regexp_match_next(ctx, This, 0, str, len, &cp, NULL, NULL, NULL, &cres); 03440 if(hres == S_FALSE) { 03441 hres = S_OK; 03442 break; 03443 } 03444 03445 if(FAILED(hres)) 03446 break; 03447 03448 if(ret_size == i) { 03449 if(ret) 03450 ret = heap_realloc(ret, (ret_size <<= 1) * sizeof(match_result_t)); 03451 else 03452 ret = heap_alloc((ret_size=4) * sizeof(match_result_t)); 03453 if(!ret) { 03454 hres = E_OUTOFMEMORY; 03455 break; 03456 } 03457 } 03458 03459 ret[i++] = cres; 03460 03461 if(!gflag && !(This->jsregexp->flags & JSREG_GLOB)) { 03462 hres = S_OK; 03463 break; 03464 } 03465 } 03466 03467 jsheap_clear(mark); 03468 if(FAILED(hres)) { 03469 heap_free(ret); 03470 return hres; 03471 } 03472 03473 *match_result = ret; 03474 *result_cnt = i; 03475 return S_OK; 03476 } 03477 03478 static HRESULT RegExp_source(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp, 03479 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp) 03480 { 03481 TRACE("\n"); 03482 03483 switch(flags) { 03484 case DISPATCH_PROPERTYGET: { 03485 RegExpInstance *This = regexp_from_vdisp(jsthis); 03486 03487 V_VT(retv) = VT_BSTR; 03488 V_BSTR(retv) = SysAllocString(This->str); 03489 if(!V_BSTR(retv)) 03490 return E_OUTOFMEMORY; 03491 break; 03492 } 03493 default: 03494 FIXME("Unimplemnted flags %x\n", flags); 03495 return E_NOTIMPL; 03496 } 03497 03498 return S_OK; 03499 } 03500 03501 static HRESULT RegExp_global(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp, 03502 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp) 03503 { 03504 FIXME("\n"); 03505 return E_NOTIMPL; 03506 } 03507 03508 static HRESULT RegExp_ignoreCase(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp, 03509 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp) 03510 { 03511 FIXME("\n"); 03512 return E_NOTIMPL; 03513 } 03514 03515 static HRESULT RegExp_multiline(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp, 03516 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp) 03517 { 03518 FIXME("\n"); 03519 return E_NOTIMPL; 03520 } 03521 03522 static INT index_from_var(script_ctx_t *ctx, VARIANT *v) 03523 { 03524 jsexcept_t ei; 03525 VARIANT num; 03526 HRESULT hres; 03527 03528 memset(&ei, 0, sizeof(ei)); 03529 hres = to_number(ctx, v, &ei, &num); 03530 if(FAILED(hres)) { /* FIXME: Move ignoring exceptions to to_promitive */ 03531 VariantClear(&ei.var); 03532 return 0; 03533 } 03534 03535 if(V_VT(&num) == VT_R8) { 03536 DOUBLE d = floor(V_R8(&num)); 03537 return (DOUBLE)(INT)d == d ? d : 0; 03538 } 03539 03540 return V_I4(&num); 03541 } 03542 03543 static HRESULT RegExp_lastIndex(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp, 03544 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp) 03545 { 03546 TRACE("\n"); 03547 03548 switch(flags) { 03549 case DISPATCH_PROPERTYGET: { 03550 RegExpInstance *regexp = regexp_from_vdisp(jsthis); 03551 03552 V_VT(retv) = VT_EMPTY; 03553 return VariantCopy(retv, ®exp->last_index_var); 03554 } 03555 case DISPATCH_PROPERTYPUT: { 03556 RegExpInstance *regexp = regexp_from_vdisp(jsthis); 03557 VARIANT *arg; 03558 HRESULT hres; 03559 03560 arg = get_arg(dp,0); 03561 hres = VariantCopy(®exp->last_index_var, arg); 03562 if(FAILED(hres)) 03563 return hres; 03564 03565 regexp->last_index = index_from_var(ctx, arg); 03566 break; 03567 } 03568 default: 03569 FIXME("unimplemented flags: %x\n", flags); 03570 return E_NOTIMPL; 03571 } 03572 03573 return S_OK; 03574 } 03575 03576 static HRESULT RegExp_toString(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp, 03577 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp) 03578 { 03579 FIXME("\n"); 03580 return E_NOTIMPL; 03581 } 03582 03583 static HRESULT create_match_array(script_ctx_t *ctx, BSTR input, const match_result_t *result, 03584 const match_result_t *parens, DWORD parens_cnt, jsexcept_t *ei, IDispatch **ret) 03585 { 03586 DispatchEx *array; 03587 VARIANT var; 03588 int i; 03589 HRESULT hres = S_OK; 03590 03591 static const WCHAR indexW[] = {'i','n','d','e','x',0}; 03592 static const WCHAR inputW[] = {'i','n','p','u','t',0}; 03593 static const WCHAR zeroW[] = {'0',0}; 03594 03595 hres = create_array(ctx, parens_cnt+1, &array); 03596 if(FAILED(hres)) 03597 return hres; 03598 03599 for(i=0; i < parens_cnt; i++) { 03600 V_VT(&var) = VT_BSTR; 03601 V_BSTR(&var) = SysAllocStringLen(parens[i].str, parens[i].len); 03602 if(!V_BSTR(&var)) { 03603 hres = E_OUTOFMEMORY; 03604 break; 03605 } 03606 03607 hres = jsdisp_propput_idx(array, i+1, &var, ei, NULL/*FIXME*/); 03608 SysFreeString(V_BSTR(&var)); 03609 if(FAILED(hres)) 03610 break; 03611 } 03612 03613 while(SUCCEEDED(hres)) { 03614 V_VT(&var) = VT_I4; 03615 V_I4(&var) = result->str-input; 03616 hres = jsdisp_propput_name(array, indexW, &var, ei, NULL/*FIXME*/); 03617 if(FAILED(hres)) 03618 break; 03619 03620 V_VT(&var) = VT_BSTR; 03621 V_BSTR(&var) = input; 03622 hres = jsdisp_propput_name(array, inputW, &var, ei, NULL/*FIXME*/); 03623 if(FAILED(hres)) 03624 break; 03625 03626 V_BSTR(&var) = SysAllocStringLen(result->str, result->len); 03627 if(!V_BSTR(&var)) { 03628 hres = E_OUTOFMEMORY; 03629 break; 03630 } 03631 hres = jsdisp_propput_name(array, zeroW, &var, ei, NULL/*FIXME*/); 03632 SysFreeString(V_BSTR(&var)); 03633 break; 03634 } 03635 03636 if(FAILED(hres)) { 03637 jsdisp_release(array); 03638 return hres; 03639 } 03640 03641 *ret = (IDispatch*)_IDispatchEx_(array); 03642 return S_OK; 03643 } 03644 03645 static HRESULT run_exec(script_ctx_t *ctx, vdisp_t *jsthis, VARIANT *arg, jsexcept_t *ei, BSTR *input, 03646 match_result_t *match, match_result_t **parens, DWORD *parens_cnt, VARIANT_BOOL *ret) 03647 { 03648 RegExpInstance *regexp; 03649 DWORD parens_size = 0, last_index = 0, length; 03650 const WCHAR *cp; 03651 BSTR string; 03652 HRESULT hres; 03653 03654 if(!is_vclass(jsthis, JSCLASS_REGEXP)) { 03655 FIXME("Not a RegExp\n"); 03656 return E_NOTIMPL; 03657 } 03658 03659 regexp = regexp_from_vdisp(jsthis); 03660 03661 if(arg) { 03662 hres = to_string(ctx, arg, ei, &string); 03663 if(FAILED(hres)) 03664 return hres; 03665 length = SysStringLen(string); 03666 }else { 03667 string = NULL; 03668 length = 0; 03669 } 03670 03671 if(regexp->jsregexp->flags & JSREG_GLOB) { 03672 if(regexp->last_index < 0) { 03673 SysFreeString(string); 03674 set_last_index(regexp, 0); 03675 *ret = VARIANT_FALSE; 03676 if(input) { 03677 *input = NULL; 03678 } 03679 return S_OK; 03680 } 03681 03682 last_index = regexp->last_index; 03683 } 03684 03685 cp = string + last_index; 03686 hres = regexp_match_next(ctx, ®exp->dispex, REM_RESET_INDEX, string, length, &cp, parens, 03687 parens ? &parens_size : NULL, parens_cnt, match); 03688 if(FAILED(hres)) { 03689 SysFreeString(string); 03690 return hres; 03691 } 03692 03693 *ret = hres == S_OK ? VARIANT_TRUE : VARIANT_FALSE; 03694 if(input) { 03695 *input = string; 03696 }else { 03697 SysFreeString(string); 03698 } 03699 return S_OK; 03700 } 03701 03702 static HRESULT RegExp_exec(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp, 03703 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp) 03704 { 03705 match_result_t *parens = NULL, match; 03706 DWORD parens_cnt = 0; 03707 VARIANT_BOOL b; 03708 BSTR string; 03709 HRESULT hres; 03710 03711 TRACE("\n"); 03712 03713 hres = run_exec(ctx, jsthis, arg_cnt(dp) ? get_arg(dp,0) : NULL, ei, &string, &match, &parens, &parens_cnt, &b); 03714 if(FAILED(hres)) 03715 return hres; 03716 03717 if(retv) { 03718 if(b) { 03719 IDispatch *ret; 03720 03721 hres = create_match_array(ctx, string, &match, parens, parens_cnt, ei, &ret); 03722 if(SUCCEEDED(hres)) { 03723 V_VT(retv) = VT_DISPATCH; 03724 V_DISPATCH(retv) = ret; 03725 } 03726 }else { 03727 V_VT(retv) = VT_NULL; 03728 } 03729 } 03730 03731 heap_free(parens); 03732 SysFreeString(string); 03733 return hres; 03734 } 03735 03736 static HRESULT RegExp_test(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp, 03737 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp) 03738 { 03739 match_result_t match; 03740 VARIANT undef_var; 03741 VARIANT_BOOL b; 03742 DWORD argc; 03743 HRESULT hres; 03744 03745 TRACE("\n"); 03746 03747 argc = arg_cnt(dp); 03748 if(!argc) { 03749 V_VT(&undef_var) = VT_BSTR; 03750 V_BSTR(&undef_var) = SysAllocString(undefinedW); 03751 if(!V_BSTR(&undef_var)) 03752 return E_OUTOFMEMORY; 03753 } 03754 03755 hres = run_exec(ctx, jsthis, argc ? get_arg(dp,0) : &undef_var, ei, NULL, &match, NULL, NULL, &b); 03756 if(!argc) 03757 SysFreeString(V_BSTR(&undef_var)); 03758 if(FAILED(hres)) 03759 return hres; 03760 03761 if(retv) { 03762 V_VT(retv) = VT_BOOL; 03763 V_BOOL(retv) = b; 03764 } 03765 return S_OK; 03766 } 03767 03768 static HRESULT RegExp_value(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp, 03769 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp) 03770 { 03771 TRACE("\n"); 03772 03773 switch(flags) { 03774 case INVOKE_FUNC: 03775 return throw_type_error(ctx, ei, IDS_NOT_FUNC, NULL); 03776 default: 03777 FIXME("unimplemented flags %x\n", flags); 03778 return E_NOTIMPL; 03779 } 03780 03781 return S_OK; 03782 } 03783 03784 static void RegExp_destructor(DispatchEx *dispex) 03785 { 03786 RegExpInstance *This = (RegExpInstance*)dispex; 03787 03788 if(This->jsregexp) 03789 js_DestroyRegExp(This->jsregexp); 03790 VariantClear(&This->last_index_var); 03791 SysFreeString(This->str); 03792 heap_free(This); 03793 } 03794 03795 static const builtin_prop_t RegExp_props[] = { 03796 {execW, RegExp_exec, PROPF_METHOD|1}, 03797 {globalW, RegExp_global, 0}, 03798 {ignoreCaseW, RegExp_ignoreCase, 0}, 03799 {lastIndexW, RegExp_lastIndex, 0}, 03800 {multilineW, RegExp_multiline, 0}, 03801 {sourceW, RegExp_source, 0}, 03802 {testW, RegExp_test, PROPF_METHOD|1}, 03803 {toStringW, RegExp_toString, PROPF_METHOD} 03804 }; 03805 03806 static const builtin_info_t RegExp_info = { 03807 JSCLASS_REGEXP, 03808 {NULL, RegExp_value, 0}, 03809 sizeof(RegExp_props)/sizeof(*RegExp_props), 03810 RegExp_props, 03811 RegExp_destructor, 03812 NULL 03813 }; 03814 03815 static HRESULT alloc_regexp(script_ctx_t *ctx, DispatchEx *object_prototype, RegExpInstance **ret) 03816 { 03817 RegExpInstance *regexp; 03818 HRESULT hres; 03819 03820 regexp = heap_alloc_zero(sizeof(RegExpInstance)); 03821 if(!regexp) 03822 return E_OUTOFMEMORY; 03823 03824 if(object_prototype) 03825 hres = init_dispex(®exp->dispex, ctx, &RegExp_info, object_prototype); 03826 else 03827 hres = init_dispex_from_constr(®exp->dispex, ctx, &RegExp_info, ctx->regexp_constr); 03828 03829 if(FAILED(hres)) { 03830 heap_free(regexp); 03831 return hres; 03832 } 03833 03834 *ret = regexp; 03835 return S_OK; 03836 } 03837 03838 HRESULT create_regexp(script_ctx_t *ctx, const WCHAR *exp, int len, DWORD flags, DispatchEx **ret) 03839 { 03840 RegExpInstance *regexp; 03841 HRESULT hres; 03842 03843 TRACE("%s %x\n", debugstr_w(exp), flags); 03844 03845 hres = alloc_regexp(ctx, NULL, ®exp); 03846 if(FAILED(hres)) 03847 return hres; 03848 03849 if(len == -1) 03850 regexp->str = SysAllocString(exp); 03851 else 03852 regexp->str = SysAllocStringLen(exp, len); 03853 if(!regexp->str) { 03854 jsdisp_release(®exp->dispex); 03855 return E_OUTOFMEMORY; 03856 } 03857 03858 regexp->jsregexp = js_NewRegExp(ctx, regexp->str, flags, FALSE); 03859 if(!regexp->jsregexp) { 03860 WARN("js_NewRegExp failed\n"); 03861 jsdisp_release(®exp->dispex); 03862 return E_FAIL; 03863 } 03864 03865 V_VT(®exp->last_index_var) = VT_I4; 03866 V_I4(®exp->last_index_var) = 0; 03867 03868 *ret = ®exp->dispex; 03869 return S_OK; 03870 } 03871 03872 HRESULT create_regexp_var(script_ctx_t *ctx, VARIANT *src_arg, VARIANT *flags_arg, DispatchEx **ret) 03873 { 03874 const WCHAR *opt = emptyW, *src; 03875 DWORD flags; 03876 HRESULT hres; 03877 03878 if(V_VT(src_arg) == VT_DISPATCH) { 03879 DispatchEx *obj; 03880 03881 obj = iface_to_jsdisp((IUnknown*)V_DISPATCH(src_arg)); 03882 if(obj) { 03883 if(is_class(obj, JSCLASS_REGEXP)) { 03884 RegExpInstance *regexp = (RegExpInstance*)obj; 03885 03886 hres = create_regexp(ctx, regexp->str, -1, regexp->jsregexp->flags, ret); 03887 jsdisp_release(obj); 03888 return hres; 03889 } 03890 03891 jsdisp_release(obj); 03892 } 03893 } 03894 03895 if(V_VT(src_arg) != VT_BSTR) { 03896 FIXME("flags_arg = %s\n", debugstr_variant(flags_arg)); 03897 return E_NOTIMPL; 03898 } 03899 03900 src = V_BSTR(src_arg); 03901 03902 if(flags_arg) { 03903 if(V_VT(flags_arg) != VT_BSTR) { 03904 FIXME("unimplemented for vt %d\n", V_VT(flags_arg)); 03905 return E_NOTIMPL; 03906 } 03907 03908 opt = V_BSTR(flags_arg); 03909 } 03910 03911 hres = parse_regexp_flags(opt, strlenW(opt), &flags); 03912 if(FAILED(hres)) 03913 return hres; 03914 03915 return create_regexp(ctx, src, -1, flags, ret); 03916 } 03917 03918 HRESULT regexp_string_match(script_ctx_t *ctx, DispatchEx *re, BSTR str, 03919 VARIANT *retv, jsexcept_t *ei) 03920 { 03921 RegExpInstance *regexp = (RegExpInstance*)re; 03922 match_result_t *match_result; 03923 DWORD match_cnt, i, length; 03924 DispatchEx *array; 03925 VARIANT var; 03926 HRESULT hres; 03927 03928 length = SysStringLen(str); 03929 03930 if(!(regexp->jsregexp->flags & JSREG_GLOB)) { 03931 match_result_t match, *parens = NULL; 03932 DWORD parens_cnt, parens_size = 0; 03933 const WCHAR *cp = str; 03934 03935 hres = regexp_match_next(ctx, ®exp->dispex, 0, str, length, &cp, &parens, &parens_size, &parens_cnt, &match); 03936 if(FAILED(hres)) 03937 return hres; 03938 03939 if(retv) { 03940 if(hres == S_OK) { 03941 IDispatch *ret; 03942 03943 hres = create_match_array(ctx, str, &match, parens, parens_cnt, ei, &ret); 03944 if(SUCCEEDED(hres)) { 03945 V_VT(retv) = VT_DISPATCH; 03946 V_DISPATCH(retv) = ret; 03947 } 03948 }else { 03949 V_VT(retv) = VT_NULL; 03950 } 03951 } 03952 03953 heap_free(parens); 03954 return S_OK; 03955 } 03956 03957 hres = regexp_match(ctx, ®exp->dispex, str, length, FALSE, &match_result, &match_cnt); 03958 if(FAILED(hres)) 03959 return hres; 03960 03961 if(!match_cnt) { 03962 TRACE("no match\n"); 03963 03964 if(retv) 03965 V_VT(retv) = VT_NULL; 03966 return S_OK; 03967 } 03968 03969 hres = create_array(ctx, match_cnt, &array); 03970 if(FAILED(hres)) 03971 return hres; 03972 03973 V_VT(&var) = VT_BSTR; 03974 03975 for(i=0; i < match_cnt; i++) { 03976 V_BSTR(&var) = SysAllocStringLen(match_result[i].str, match_result[i].len); 03977 if(!V_BSTR(&var)) { 03978 hres = E_OUTOFMEMORY; 03979 break; 03980 } 03981 03982 hres = jsdisp_propput_idx(array, i, &var, ei, NULL/*FIXME*/); 03983 SysFreeString(V_BSTR(&var)); 03984 if(FAILED(hres)) 03985 break; 03986 } 03987 03988 heap_free(match_result); 03989 03990 if(SUCCEEDED(hres) && retv) { 03991 V_VT(retv) = VT_DISPATCH; 03992 V_DISPATCH(retv) = (IDispatch*)_IDispatchEx_(array); 03993 }else { 03994 jsdisp_release(array); 03995 } 03996 return hres; 03997 } 03998 03999 static HRESULT RegExpConstr_leftContext(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, 04000 DISPPARAMS *dp, VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp) 04001 { 04002 TRACE("\n"); 04003 04004 switch(flags) { 04005 case DISPATCH_PROPERTYGET: { 04006 BSTR ret; 04007 04008 ret = SysAllocStringLen(ctx->last_match, ctx->last_match_index); 04009 if(!ret) 04010 return E_OUTOFMEMORY; 04011 04012 V_VT(retv) = VT_BSTR; 04013 V_BSTR(retv) = ret; 04014 } 04015 case DISPATCH_PROPERTYPUT: 04016 return S_OK; 04017 default: 04018 FIXME("unsupported flags\n"); 04019 return E_NOTIMPL; 04020 } 04021 04022 return S_OK; 04023 } 04024 04025 static HRESULT RegExpConstr_rightContext(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, 04026 DISPPARAMS *dp, VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp) 04027 { 04028 TRACE("\n"); 04029 04030 switch(flags) { 04031 case DISPATCH_PROPERTYGET: { 04032 BSTR ret; 04033 04034 ret = SysAllocString(ctx->last_match+ctx->last_match_index+ctx->last_match_length); 04035 if(!ret) 04036 return E_OUTOFMEMORY; 04037 04038 V_VT(retv) = VT_BSTR; 04039 V_BSTR(retv) = ret; 04040 } 04041 case DISPATCH_PROPERTYPUT: 04042 return S_OK; 04043 default: 04044 FIXME("unsupported flags\n"); 04045 return E_NOTIMPL; 04046 } 04047 04048 return S_OK; 04049 } 04050 04051 static HRESULT RegExpConstr_value(script_ctx_t *ctx, vdisp_t *jsthis, WORD flags, DISPPARAMS *dp, 04052 VARIANT *retv, jsexcept_t *ei, IServiceProvider *sp) 04053 { 04054 TRACE("\n"); 04055 04056 switch(flags) { 04057 case DISPATCH_METHOD: 04058 if(arg_cnt(dp)) { 04059 VARIANT *arg = get_arg(dp,0); 04060 if(V_VT(arg) == VT_DISPATCH) { 04061 DispatchEx *jsdisp = iface_to_jsdisp((IUnknown*)V_DISPATCH(arg)); 04062 if(jsdisp) { 04063 if(is_class(jsdisp, JSCLASS_REGEXP)) { 04064 if(arg_cnt(dp) > 1 && V_VT(get_arg(dp,1)) != VT_EMPTY) { 04065 jsdisp_release(jsdisp); 04066 return throw_regexp_error(ctx, ei, IDS_REGEXP_SYNTAX_ERROR, NULL); 04067 } 04068 04069 if(retv) { 04070 V_VT(retv) = VT_DISPATCH; 04071 V_DISPATCH(retv) = (IDispatch*)_IDispatchEx_(jsdisp); 04072 }else { 04073 jsdisp_release(jsdisp); 04074 } 04075 return S_OK; 04076 } 04077 jsdisp_release(jsdisp); 04078 } 04079 } 04080 } 04081 /* fall through */ 04082 case DISPATCH_CONSTRUCT: { 04083 DispatchEx *ret; 04084 HRESULT hres; 04085 04086 if(!arg_cnt(dp)) { 04087 FIXME("no args\n"); 04088 return E_NOTIMPL; 04089 } 04090 04091 hres = create_regexp_var(ctx, get_arg(dp,0), arg_cnt(dp) > 1 ? get_arg(dp,1) : NULL, &ret); 04092 if(FAILED(hres)) 04093 return hres; 04094 04095 if(retv) { 04096 V_VT(retv) = VT_DISPATCH; 04097 V_DISPATCH(retv) = (IDispatch*)_IDispatchEx_(ret); 04098 }else { 04099 jsdisp_release(ret); 04100 } 04101 return S_OK; 04102 } 04103 default: 04104 FIXME("unimplemented flags: %x\n", flags); 04105 return E_NOTIMPL; 04106 } 04107 04108 return S_OK; 04109 } 04110 04111 static const builtin_prop_t RegExpConstr_props[] = { 04112 {leftContextW, RegExpConstr_leftContext, 0}, 04113 {rightContextW, RegExpConstr_rightContext, 0} 04114 }; 04115 04116 static const builtin_info_t RegExpConstr_info = { 04117 JSCLASS_FUNCTION, 04118 {NULL, Function_value, 0}, 04119 sizeof(RegExpConstr_props)/sizeof(*RegExpConstr_props), 04120 RegExpConstr_props, 04121 NULL, 04122 NULL 04123 }; 04124 04125 HRESULT create_regexp_constr(script_ctx_t *ctx, DispatchEx *object_prototype, DispatchEx **ret) 04126 { 04127 RegExpInstance *regexp; 04128 HRESULT hres; 04129 04130 static const WCHAR RegExpW[] = {'R','e','g','E','x','p',0}; 04131 04132 hres = alloc_regexp(ctx, object_prototype, ®exp); 04133 if(FAILED(hres)) 04134 return hres; 04135 04136 hres = create_builtin_function(ctx, RegExpConstr_value, RegExpW, &RegExpConstr_info, 04137 PROPF_CONSTR|2, ®exp->dispex, ret); 04138 04139 jsdisp_release(®exp->dispex); 04140 return hres; 04141 } 04142 04143 HRESULT parse_regexp_flags(const WCHAR *str, DWORD str_len, DWORD *ret) 04144 { 04145 const WCHAR *p; 04146 DWORD flags = 0; 04147 04148 for (p = str; p < str+str_len; p++) { 04149 switch (*p) { 04150 case 'g': 04151 flags |= JSREG_GLOB; 04152 break; 04153 case 'i': 04154 flags |= JSREG_FOLD; 04155 break; 04156 case 'm': 04157 flags |= JSREG_MULTILINE; 04158 break; 04159 case 'y': 04160 flags |= JSREG_STICKY; 04161 break; 04162 default: 04163 WARN("wrong flag %c\n", *p); 04164 return E_FAIL; 04165 } 04166 } 04167 04168 *ret = flags; 04169 return S_OK; 04170 } Generated on Mon May 28 2012 04:24:02 for ReactOS by
1.7.6.1
|