ReactOS  0.4.13-dev-52-g0efcfec
regexp.c
Go to the documentation of this file.
1 /*
2  * Copyright 2008 Jacek Caban for CodeWeavers
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
17  */
18 
19 /*
20  * Code in this file is based on files:
21  * js/src/jsregexp.h
22  * js/src/jsregexp.c
23  * from Mozilla project, released under LGPL 2.1 or later.
24  *
25  * The Original Code is Mozilla Communicator client code, released
26  * March 31, 1998.
27  *
28  * The Initial Developer of the Original Code is
29  * Netscape Communications Corporation.
30  * Portions created by the Initial Developer are Copyright (C) 1998
31  * the Initial Developer. All Rights Reserved.
32  */
33 
34 #include <assert.h>
35 
36 #include "jscript.h"
37 #include "regexp.h"
38 
39 #include "wine/debug.h"
40 
42 
43 /* FIXME: Better error handling */
44 #define ReportRegExpError(a,b,c)
45 #define ReportRegExpErrorHelper(a,b,c,d)
46 #define JS_ReportErrorNumber(a,b,c,d)
47 #define JS_ReportErrorFlagsAndNumber(a,b,c,d,e,f)
48 #define js_ReportOutOfScriptQuota(a)
49 #define JS_ReportOutOfMemory(a)
50 #define JS_COUNT_OPERATION(a,b)
51 
52 
54 
55 /*
56  * This struct holds a bitmap representation of a class from a regexp.
57  * There's a list of these referenced by the classList field in the regexp_t
58  * struct below. The initial state has startIndex set to the offset in the
59  * original regexp source of the beginning of the class contents. The first
60  * use of the class converts the source representation into a bitmap.
61  *
62  */
63 typedef struct RECharSet {
67  union {
69  struct {
70  size_t startIndex;
71  size_t length;
72  } src;
73  } u;
74 } RECharSet;
75 
76 #define JSMSG_MIN_TOO_BIG 47
77 #define JSMSG_MAX_TOO_BIG 48
78 #define JSMSG_OUT_OF_ORDER 49
79 #define JSMSG_OUT_OF_MEMORY 137
80 
81 #define LINE_SEPARATOR 0x2028
82 #define PARA_SEPARATOR 0x2029
83 
84 #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
85  ((c >= 'a') && (c <= 'z')) )
86 #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
87  (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
88 
89 #define JS_ISWORD(c) ((c) < 128 && (isalnum(c) || (c) == '_'))
90 
91 #define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9)
92 #define JS7_UNDEC(c) ((c) - '0')
93 
94 typedef enum REOp {
144  REOP_LIMIT /* META: no operator >= to this */
145 } REOp;
146 
147 #define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS)
148 
149 static const char *reop_names[] = {
150  "empty",
151  "bol",
152  "eol",
153  "wbdry",
154  "wnonbdry",
155  "dot",
156  "digit",
157  "nondigit",
158  "alnum",
159  "nonalnum",
160  "space",
161  "nonspace",
162  "backref",
163  "flat",
164  "flat1",
165  "flati",
166  "flat1i",
167  "ucflat1",
168  "ucflat1i",
169  "ucflat",
170  "ucflati",
171  "class",
172  "nclass",
173  "alt",
174  "quant",
175  "star",
176  "plus",
177  "opt",
178  "lparen",
179  "rparen",
180  "jump",
181  "dotstar",
182  "lparennon",
183  "assert",
184  "assert_not",
185  "asserttest",
186  "assertnottest",
187  "minimalstar",
188  "minimalplus",
189  "minimalopt",
190  "minimalquant",
191  "endchild",
192  "repeat",
193  "minimalrepeat",
194  "altprereq",
195  "alrprereq2",
196  "endalt",
197  "concat",
198  "end",
199  NULL
200 };
201 
202 typedef struct REProgState {
203  jsbytecode *continue_pc; /* current continuation data */
205  ptrdiff_t index; /* progress in text */
206  size_t parenSoFar; /* highest indexed paren started */
207  union {
208  struct {
209  UINT min; /* current quantifier limits */
211  } quantifier;
212  struct {
213  size_t top; /* backtrack stack state */
214  size_t sz;
215  } assertion;
216  } u;
217 } REProgState;
218 
219 typedef struct REBackTrackData {
220  size_t sz; /* size of previous stack entry */
221  jsbytecode *backtrack_pc; /* where to backtrack to */
223  const WCHAR *cp; /* index in text of match at backtrack */
224  size_t parenIndex; /* start index of saved paren contents */
225  size_t parenCount; /* # of saved paren contents */
226  size_t saveStateStackTop; /* number of parent states */
227  /* saved parent states follow */
228  /* saved paren contents follow */
230 
231 #define INITIAL_STATESTACK 100
232 #define INITIAL_BACKTRACK 8000
233 
234 typedef struct REGlobalData {
235  void *cx;
236  regexp_t *regexp; /* the RE in execution */
237  BOOL ok; /* runtime error (out_of_memory only?) */
238  size_t start; /* offset to start at */
239  ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
240  const WCHAR *cpbegin; /* text base address */
241  const WCHAR *cpend; /* text limit address */
242 
243  REProgState *stateStack; /* stack of state of current parents */
246 
247  REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
250  size_t cursz; /* size of current stack entry */
251  size_t backTrackCount; /* how many times we've backtracked */
252  size_t backTrackLimit; /* upper limit on backtrack states */
253 
254  heap_pool_t *pool; /* It's faster to use one malloc'd pool
255  than to malloc/free the three items
256  that are allocated from this pool */
257 } REGlobalData;
258 
259 typedef struct RENode RENode;
260 struct RENode {
261  REOp op; /* r.e. op bytecode */
262  RENode *next; /* next in concatenation order */
263  void *kid; /* first operand */
264  union {
265  void *kid2; /* second operand */
266  INT num; /* could be a number */
267  size_t parenIndex; /* or a parenthesis index */
268  struct { /* or a quantifier range */
272  } range;
273  struct { /* or a character class */
274  size_t startIndex;
275  size_t kidlen; /* length of string at kid, in jschars */
276  size_t index; /* index into class list */
277  WORD bmsize; /* bitmap size, based on max char code */
279  } ucclass;
280  struct { /* or a literal sequence */
281  WCHAR chr; /* of one character */
282  size_t length; /* or many (via the kid) */
283  } flat;
284  struct {
285  RENode *kid2; /* second operand from ALT */
286  WCHAR ch1; /* match char for ALTPREREQ */
287  WCHAR ch2; /* ditto, or class index for ALTPREREQ2 */
288  } altprereq;
289  } u;
290 };
291 
292 #define CLASS_CACHE_SIZE 4
293 
294 typedef struct CompilerState {
295  void *context;
296  const WCHAR *cpbegin;
297  const WCHAR *cpend;
298  const WCHAR *cp;
299  size_t parenCount;
300  size_t classCount; /* number of [] encountered */
301  size_t treeDepth; /* maximum depth of parse tree */
302  size_t progLength; /* estimated bytecode length */
304  size_t classBitmapsMem; /* memory to hold all class bitmaps */
305  struct {
306  const WCHAR *start; /* small cache of class strings */
307  size_t length; /* since they're often the same */
308  size_t index;
311 
312  heap_pool_t *pool; /* It's faster to use one malloc'd pool
313  than to malloc/free */
314 } CompilerState;
315 
316 typedef struct EmitStateStackEntry {
317  jsbytecode *altHead; /* start of REOP_ALT* opcode */
318  jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */
319  jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
320  jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */
321  RENode *continueNode; /* original REOP_ALT* node being stacked */
322  jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */
323  JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to
324  avoid 16-bit unsigned offset overflow */
326 
327 /*
328  * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
329  * the getters and setters take the pc of the offset, not of the opcode before
330  * the offset.
331  */
332 #define ARG_LEN 2
333 #define GET_ARG(pc) ((WORD)(((pc)[0] << 8) | (pc)[1]))
334 #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
335  (pc)[1] = (jsbytecode) (arg))
336 
337 #define OFFSET_LEN ARG_LEN
338 #define OFFSET_MAX ((1 << (ARG_LEN * 8)) - 1)
339 #define GET_OFFSET(pc) GET_ARG(pc)
340 
342 
343 /*
344  * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
345  * For sanity, we limit it to 2^24 bytes.
346  */
347 #define TREE_DEPTH_MAX ((1 << 24) / sizeof(EmitStateStackEntry))
348 
349 /*
350  * The maximum memory that can be allocated for class bitmaps.
351  * For sanity, we limit it to 2^24 bytes.
352  */
353 #define CLASS_BITMAPS_MEM_LIMIT (1 << 24)
354 
355 /*
356  * Functions to get size and write/read bytecode that represent small indexes
357  * compactly.
358  * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
359  * indicates that the following byte brings more bits to the index. Otherwise
360  * this is the last byte in the index bytecode representing highest index bits.
361  */
362 static size_t
364 {
365  size_t width;
366 
367  for (width = 1; (index >>= 7) != 0; ++width) { }
368  return width;
369 }
370 
371 static inline jsbytecode *
373 {
374  size_t next;
375 
376  while ((next = index >> 7) != 0) {
377  *pc++ = (jsbytecode)(index | 0x80);
378  index = next;
379  }
380  *pc++ = (jsbytecode)index;
381  return pc;
382 }
383 
384 static inline jsbytecode *
386 {
387  size_t nextByte;
388 
389  nextByte = *pc++;
390  if ((nextByte & 0x80) == 0) {
391  /*
392  * Short-circuit the most common case when compact index <= 127.
393  */
394  *result = nextByte;
395  } else {
396  size_t shift = 7;
397  *result = 0x7F & nextByte;
398  do {
399  nextByte = *pc++;
400  *result |= (nextByte & 0x7F) << shift;
401  shift += 7;
402  } while ((nextByte & 0x80) != 0);
403  }
404  return pc;
405 }
406 
407 /* Construct and initialize an RENode, returning NULL for out-of-memory */
408 static RENode *
410 {
411  RENode *ren;
412 
413  ren = heap_pool_alloc(state->pool, sizeof(*ren));
414  if (!ren) {
415  /* js_ReportOutOfScriptQuota(cx); */
416  return NULL;
417  }
418  ren->op = op;
419  ren->next = NULL;
420  ren->kid = NULL;
421  return ren;
422 }
423 
424 /*
425  * Validates and converts hex ascii value.
426  */
427 static BOOL
429 {
430  UINT cv = c;
431 
432  if (cv < '0')
433  return FALSE;
434  if (cv <= '9') {
435  *digit = cv - '0';
436  return TRUE;
437  }
438  cv |= 0x20;
439  if (cv >= 'a' && cv <= 'f') {
440  *digit = cv - 'a' + 10;
441  return TRUE;
442  }
443  return FALSE;
444 }
445 
446 typedef struct {
448  const WCHAR *errPos;
449  size_t parenIndex;
450 } REOpData;
451 
452 #define JUMP_OFFSET_HI(off) ((jsbytecode)((off) >> 8))
453 #define JUMP_OFFSET_LO(off) ((jsbytecode)(off))
454 
455 static BOOL
457 {
458  ptrdiff_t offset = target - jump;
459 
460  /* Check that target really points forward. */
461  assert(offset >= 2);
462  if ((size_t)offset > OFFSET_MAX)
463  return FALSE;
464 
465  jump[0] = JUMP_OFFSET_HI(offset);
466  jump[1] = JUMP_OFFSET_LO(offset);
467  return TRUE;
468 }
469 
470 /*
471  * Generate bytecode for the tree rooted at t using an explicit stack instead
472  * of recursion.
473  */
474 static jsbytecode *
475 EmitREBytecode(CompilerState *state, regexp_t *re, size_t treeDepth,
476  jsbytecode *pc, RENode *t)
477 {
478  EmitStateStackEntry *emitStateSP, *emitStateStack;
479  RECharSet *charSet;
480  REOp op;
481 
482  if (treeDepth == 0) {
483  emitStateStack = NULL;
484  } else {
485  emitStateStack = heap_alloc(sizeof(EmitStateStackEntry) * treeDepth);
486  if (!emitStateStack)
487  return NULL;
488  }
489  emitStateSP = emitStateStack;
490  op = t->op;
491  assert(op < REOP_LIMIT);
492 
493  for (;;) {
494  *pc++ = op;
495  switch (op) {
496  case REOP_EMPTY:
497  --pc;
498  break;
499 
500  case REOP_ALTPREREQ2:
501  case REOP_ALTPREREQ:
502  assert(emitStateSP);
503  emitStateSP->altHead = pc - 1;
504  emitStateSP->endTermFixup = pc;
505  pc += OFFSET_LEN;
506  SET_ARG(pc, t->u.altprereq.ch1);
507  pc += ARG_LEN;
508  SET_ARG(pc, t->u.altprereq.ch2);
509  pc += ARG_LEN;
510 
511  emitStateSP->nextAltFixup = pc; /* offset to next alternate */
512  pc += OFFSET_LEN;
513 
514  emitStateSP->continueNode = t;
515  emitStateSP->continueOp = REOP_JUMP;
516  emitStateSP->jumpToJumpFlag = FALSE;
517  ++emitStateSP;
518  assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
519  t = t->kid;
520  op = t->op;
521  assert(op < REOP_LIMIT);
522  continue;
523 
524  case REOP_JUMP:
525  emitStateSP->nextTermFixup = pc; /* offset to following term */
526  pc += OFFSET_LEN;
527  if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
528  goto jump_too_big;
529  emitStateSP->continueOp = REOP_ENDALT;
530  ++emitStateSP;
531  assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
532  t = t->u.kid2;
533  op = t->op;
534  assert(op < REOP_LIMIT);
535  continue;
536 
537  case REOP_ENDALT:
538  /*
539  * If we already patched emitStateSP->nextTermFixup to jump to
540  * a nearer jump, to avoid 16-bit immediate offset overflow, we
541  * are done here.
542  */
543  if (emitStateSP->jumpToJumpFlag)
544  break;
545 
546  /*
547  * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
548  * REOP_ENDALT is executed only on successful match of the last
549  * alternate in a group.
550  */
551  if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
552  goto jump_too_big;
553  if (t->op != REOP_ALT) {
554  if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
555  goto jump_too_big;
556  }
557 
558  /*
559  * If the program is bigger than the REOP_JUMP offset range, then
560  * we must check for alternates before this one that are part of
561  * the same group, and fix up their jump offsets to target jumps
562  * close enough to fit in a 16-bit unsigned offset immediate.
563  */
564  if ((size_t)(pc - re->program) > OFFSET_MAX &&
565  emitStateSP > emitStateStack) {
566  EmitStateStackEntry *esp, *esp2;
567  jsbytecode *alt, *jump;
569 
570  esp2 = emitStateSP;
571  alt = esp2->altHead;
572  for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
573  if (esp->continueOp == REOP_ENDALT &&
574  !esp->jumpToJumpFlag &&
575  esp->nextTermFixup + OFFSET_LEN == alt &&
576  (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
577  ? esp->endTermFixup
578  : esp->nextTermFixup)) > OFFSET_MAX) {
579  alt = esp->altHead;
580  jump = esp->nextTermFixup;
581 
582  /*
583  * The span must be 1 less than the distance from
584  * jump offset to jump offset, so we actually jump
585  * to a REOP_JUMP bytecode, not to its offset!
586  */
587  for (;;) {
588  assert(jump < esp2->nextTermFixup);
589  span = esp2->nextTermFixup - jump - 1;
590  if ((size_t)span <= OFFSET_MAX)
591  break;
592  do {
593  if (--esp2 == esp)
594  goto jump_too_big;
595  } while (esp2->continueOp != REOP_ENDALT);
596  }
597 
598  jump[0] = JUMP_OFFSET_HI(span);
599  jump[1] = JUMP_OFFSET_LO(span);
600 
601  if (esp->continueNode->op != REOP_ALT) {
602  /*
603  * We must patch the offset at esp->endTermFixup
604  * as well, for the REOP_ALTPREREQ{,2} opcodes.
605  * If we're unlucky and endTermFixup is more than
606  * OFFSET_MAX bytes from its target, we cheat by
607  * jumping 6 bytes to the jump whose offset is at
608  * esp->nextTermFixup, which has the same target.
609  */
610  jump = esp->endTermFixup;
611  header = esp->nextTermFixup - jump;
612  span += header;
613  if ((size_t)span > OFFSET_MAX)
614  span = header;
615 
616  jump[0] = JUMP_OFFSET_HI(span);
617  jump[1] = JUMP_OFFSET_LO(span);
618  }
619 
620  esp->jumpToJumpFlag = TRUE;
621  }
622  }
623  }
624  break;
625 
626  case REOP_ALT:
627  assert(emitStateSP);
628  emitStateSP->altHead = pc - 1;
629  emitStateSP->nextAltFixup = pc; /* offset to next alternate */
630  pc += OFFSET_LEN;
631  emitStateSP->continueNode = t;
632  emitStateSP->continueOp = REOP_JUMP;
633  emitStateSP->jumpToJumpFlag = FALSE;
634  ++emitStateSP;
635  assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
636  t = t->kid;
637  op = t->op;
638  assert(op < REOP_LIMIT);
639  continue;
640 
641  case REOP_FLAT:
642  /*
643  * Coalesce FLATs if possible and if it would not increase bytecode
644  * beyond preallocated limit. The latter happens only when bytecode
645  * size for coalesced string with offset p and length 2 exceeds 6
646  * bytes preallocated for 2 single char nodes, i.e. when
647  * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
648  * GetCompactIndexWidth(p) > 4.
649  * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
650  * nodes strictly decreases bytecode size, the check has to be
651  * done only for the first coalescing.
652  */
653  if (t->kid &&
654  GetCompactIndexWidth((WCHAR*)t->kid - state->cpbegin) <= 4)
655  {
656  while (t->next &&
657  t->next->op == REOP_FLAT &&
658  (WCHAR*)t->kid + t->u.flat.length ==
659  t->next->kid) {
660  t->u.flat.length += t->next->u.flat.length;
661  t->next = t->next->next;
662  }
663  }
664  if (t->kid && t->u.flat.length > 1) {
665  pc[-1] = (state->flags & REG_FOLD) ? REOP_FLATi : REOP_FLAT;
666  pc = WriteCompactIndex(pc, (WCHAR*)t->kid - state->cpbegin);
667  pc = WriteCompactIndex(pc, t->u.flat.length);
668  } else if (t->u.flat.chr < 256) {
669  pc[-1] = (state->flags & REG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
670  *pc++ = (jsbytecode) t->u.flat.chr;
671  } else {
672  pc[-1] = (state->flags & REG_FOLD)
673  ? REOP_UCFLAT1i
674  : REOP_UCFLAT1;
675  SET_ARG(pc, t->u.flat.chr);
676  pc += ARG_LEN;
677  }
678  break;
679 
680  case REOP_LPAREN:
681  assert(emitStateSP);
682  pc = WriteCompactIndex(pc, t->u.parenIndex);
683  emitStateSP->continueNode = t;
684  emitStateSP->continueOp = REOP_RPAREN;
685  ++emitStateSP;
686  assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
687  t = t->kid;
688  op = t->op;
689  continue;
690 
691  case REOP_RPAREN:
692  pc = WriteCompactIndex(pc, t->u.parenIndex);
693  break;
694 
695  case REOP_BACKREF:
696  pc = WriteCompactIndex(pc, t->u.parenIndex);
697  break;
698 
699  case REOP_ASSERT:
700  assert(emitStateSP);
701  emitStateSP->nextTermFixup = pc;
702  pc += OFFSET_LEN;
703  emitStateSP->continueNode = t;
704  emitStateSP->continueOp = REOP_ASSERTTEST;
705  ++emitStateSP;
706  assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
707  t = t->kid;
708  op = t->op;
709  continue;
710 
711  case REOP_ASSERTTEST:
712  case REOP_ASSERTNOTTEST:
713  if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
714  goto jump_too_big;
715  break;
716 
717  case REOP_ASSERT_NOT:
718  assert(emitStateSP);
719  emitStateSP->nextTermFixup = pc;
720  pc += OFFSET_LEN;
721  emitStateSP->continueNode = t;
722  emitStateSP->continueOp = REOP_ASSERTNOTTEST;
723  ++emitStateSP;
724  assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
725  t = t->kid;
726  op = t->op;
727  continue;
728 
729  case REOP_QUANT:
730  assert(emitStateSP);
731  if (t->u.range.min == 0 && t->u.range.max == (UINT)-1) {
732  pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
733  } else if (t->u.range.min == 0 && t->u.range.max == 1) {
734  pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
735  } else if (t->u.range.min == 1 && t->u.range.max == (UINT) -1) {
736  pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
737  } else {
738  if (!t->u.range.greedy)
739  pc[-1] = REOP_MINIMALQUANT;
740  pc = WriteCompactIndex(pc, t->u.range.min);
741  /*
742  * Write max + 1 to avoid using size_t(max) + 1 bytes
743  * for (UINT)-1 sentinel.
744  */
745  pc = WriteCompactIndex(pc, t->u.range.max + 1);
746  }
747  emitStateSP->nextTermFixup = pc;
748  pc += OFFSET_LEN;
749  emitStateSP->continueNode = t;
750  emitStateSP->continueOp = REOP_ENDCHILD;
751  ++emitStateSP;
752  assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
753  t = t->kid;
754  op = t->op;
755  continue;
756 
757  case REOP_ENDCHILD:
758  if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
759  goto jump_too_big;
760  break;
761 
762  case REOP_CLASS:
763  if (!t->u.ucclass.sense)
764  pc[-1] = REOP_NCLASS;
765  pc = WriteCompactIndex(pc, t->u.ucclass.index);
766  charSet = &re->classList[t->u.ucclass.index];
767  charSet->converted = FALSE;
768  charSet->length = t->u.ucclass.bmsize;
769  charSet->u.src.startIndex = t->u.ucclass.startIndex;
770  charSet->u.src.length = t->u.ucclass.kidlen;
771  charSet->sense = t->u.ucclass.sense;
772  break;
773 
774  default:
775  break;
776  }
777 
778  t = t->next;
779  if (t) {
780  op = t->op;
781  } else {
782  if (emitStateSP == emitStateStack)
783  break;
784  --emitStateSP;
785  t = emitStateSP->continueNode;
786  op = (REOp) emitStateSP->continueOp;
787  }
788  }
789 
790  cleanup:
791  heap_free(emitStateStack);
792  return pc;
793 
794  jump_too_big:
795  ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
796  pc = NULL;
797  goto cleanup;
798 }
799 
800 /*
801  * Process the op against the two top operands, reducing them to a single
802  * operand in the penultimate slot. Update progLength and treeDepth.
803  */
804 static BOOL
805 ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
806  INT operandSP)
807 {
808  RENode *result;
809 
810  switch (opData->op) {
811  case REOP_ALT:
813  if (!result)
814  return FALSE;
815  result->kid = operandStack[operandSP - 2];
816  result->u.kid2 = operandStack[operandSP - 1];
817  operandStack[operandSP - 2] = result;
818 
819  if (state->treeDepth == TREE_DEPTH_MAX) {
820  ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
821  return FALSE;
822  }
823  ++state->treeDepth;
824 
825  /*
826  * Look at both alternates to see if there's a FLAT or a CLASS at
827  * the start of each. If so, use a prerequisite match.
828  */
829  if (((RENode *) result->kid)->op == REOP_FLAT &&
830  ((RENode *) result->u.kid2)->op == REOP_FLAT &&
831  (state->flags & REG_FOLD) == 0) {
832  result->op = REOP_ALTPREREQ;
833  result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
834  result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
835  /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
836  JUMP, <end> ... ENDALT */
837  state->progLength += 13;
838  }
839  else
840  if (((RENode *) result->kid)->op == REOP_CLASS &&
841  ((RENode *) result->kid)->u.ucclass.index < 256 &&
842  ((RENode *) result->u.kid2)->op == REOP_FLAT &&
843  (state->flags & REG_FOLD) == 0) {
844  result->op = REOP_ALTPREREQ2;
845  result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
846  result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
847  /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
848  JUMP, <end> ... ENDALT */
849  state->progLength += 13;
850  }
851  else
852  if (((RENode *) result->kid)->op == REOP_FLAT &&
853  ((RENode *) result->u.kid2)->op == REOP_CLASS &&
854  ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
855  (state->flags & REG_FOLD) == 0) {
856  result->op = REOP_ALTPREREQ2;
857  result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
858  result->u.altprereq.ch2 =
859  ((RENode *) result->u.kid2)->u.ucclass.index;
860  /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
861  JUMP, <end> ... ENDALT */
862  state->progLength += 13;
863  }
864  else {
865  /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
866  state->progLength += 7;
867  }
868  break;
869 
870  case REOP_CONCAT:
871  result = operandStack[operandSP - 2];
872  while (result->next)
873  result = result->next;
874  result->next = operandStack[operandSP - 1];
875  break;
876 
877  case REOP_ASSERT:
878  case REOP_ASSERT_NOT:
879  case REOP_LPARENNON:
880  case REOP_LPAREN:
881  /* These should have been processed by a close paren. */
882  ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
883  opData->errPos);
884  return FALSE;
885 
886  default:;
887  }
888  return TRUE;
889 }
890 
891 /*
892  * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
893  * it being on the stack, and to propagate errors to its callers.
894  */
895 #define JSREG_FIND_PAREN_COUNT 0x8000
896 #define JSREG_FIND_PAREN_ERROR 0x4000
897 
898 /*
899  * Magic return value from FindParenCount and GetDecimalValue, to indicate
900  * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
901  * its findMax parameter is non-null.
902  */
903 #define OVERFLOW_VALUE ((UINT)-1)
904 
905 static UINT
907 {
909  int i;
910 
911  if (state->flags & JSREG_FIND_PAREN_COUNT)
912  return OVERFLOW_VALUE;
913 
914  /*
915  * Copy state into temp, flag it so we never report an invalid backref,
916  * and reset its members to parse the entire regexp. This is obviously
917  * suboptimal, but GetDecimalValue calls us only if a backref appears to
918  * refer to a forward parenthetical, which is rare.
919  */
920  temp = *state;
921  temp.flags |= JSREG_FIND_PAREN_COUNT;
922  temp.cp = temp.cpbegin;
923  temp.parenCount = 0;
924  temp.classCount = 0;
925  temp.progLength = 0;
926  temp.treeDepth = 0;
927  temp.classBitmapsMem = 0;
928  for (i = 0; i < CLASS_CACHE_SIZE; i++)
929  temp.classCache[i].start = NULL;
930 
931  if (!ParseRegExp(&temp)) {
932  state->flags |= JSREG_FIND_PAREN_ERROR;
933  return OVERFLOW_VALUE;
934  }
935  return temp.parenCount;
936 }
937 
938 /*
939  * Extract and return a decimal value at state->cp. The initial character c
940  * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
941  * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
942  * state->flags to discover whether an error occurred under findMax.
943  */
944 static UINT
947 {
948  UINT value = JS7_UNDEC(c);
949  BOOL overflow = (value > max && (!findMax || value > findMax(state)));
950 
951  /* The following restriction allows simpler overflow checks. */
952  assert(max <= ((UINT)-1 - 9) / 10);
953  while (state->cp < state->cpend) {
954  c = *state->cp;
955  if (!JS7_ISDEC(c))
956  break;
957  value = 10 * value + JS7_UNDEC(c);
958  if (!overflow && value > max && (!findMax || value > findMax(state)))
959  overflow = TRUE;
960  ++state->cp;
961  }
962  return overflow ? OVERFLOW_VALUE : value;
963 }
964 
965 /*
966  * Calculate the total size of the bitmap required for a class expression.
967  */
968 static BOOL
970  const WCHAR *end)
971 {
972  UINT max = 0;
973  BOOL inRange = FALSE;
974  WCHAR c, rangeStart = 0;
975  UINT n, digit, nDigits, i;
976 
977  target->u.ucclass.bmsize = 0;
978  target->u.ucclass.sense = TRUE;
979 
980  if (src == end)
981  return TRUE;
982 
983  if (*src == '^') {
984  ++src;
985  target->u.ucclass.sense = FALSE;
986  }
987 
988  while (src != end) {
989  BOOL canStartRange = TRUE;
990  UINT localMax = 0;
991 
992  switch (*src) {
993  case '\\':
994  ++src;
995  c = *src++;
996  switch (c) {
997  case 'b':
998  localMax = 0x8;
999  break;
1000  case 'f':
1001  localMax = 0xC;
1002  break;
1003  case 'n':
1004  localMax = 0xA;
1005  break;
1006  case 'r':
1007  localMax = 0xD;
1008  break;
1009  case 't':
1010  localMax = 0x9;
1011  break;
1012  case 'v':
1013  localMax = 0xB;
1014  break;
1015  case 'c':
1016  if (src < end && RE_IS_LETTER(*src)) {
1017  localMax = (UINT) (*src++) & 0x1F;
1018  } else {
1019  --src;
1020  localMax = '\\';
1021  }
1022  break;
1023  case 'x':
1024  nDigits = 2;
1025  goto lexHex;
1026  case 'u':
1027  nDigits = 4;
1028 lexHex:
1029  n = 0;
1030  for (i = 0; (i < nDigits) && (src < end); i++) {
1031  c = *src++;
1032  if (!isASCIIHexDigit(c, &digit)) {
1033  /*
1034  * Back off to accepting the original
1035  *'\' as a literal.
1036  */
1037  src -= i + 1;
1038  n = '\\';
1039  break;
1040  }
1041  n = (n << 4) | digit;
1042  }
1043  localMax = n;
1044  break;
1045  case 'd':
1046  canStartRange = FALSE;
1047  if (inRange) {
1048  JS_ReportErrorNumber(state->context,
1049  js_GetErrorMessage, NULL,
1050  JSMSG_BAD_CLASS_RANGE);
1051  return FALSE;
1052  }
1053  localMax = '9';
1054  break;
1055  case 'D':
1056  case 's':
1057  case 'S':
1058  case 'w':
1059  case 'W':
1060  canStartRange = FALSE;
1061  if (inRange) {
1062  JS_ReportErrorNumber(state->context,
1063  js_GetErrorMessage, NULL,
1064  JSMSG_BAD_CLASS_RANGE);
1065  return FALSE;
1066  }
1067  max = 65535;
1068 
1069  /*
1070  * If this is the start of a range, ensure that it's less than
1071  * the end.
1072  */
1073  localMax = 0;
1074  break;
1075  case '0':
1076  case '1':
1077  case '2':
1078  case '3':
1079  case '4':
1080  case '5':
1081  case '6':
1082  case '7':
1083  /*
1084  * This is a non-ECMA extension - decimal escapes (in this
1085  * case, octal!) are supposed to be an error inside class
1086  * ranges, but supported here for backwards compatibility.
1087  *
1088  */
1089  n = JS7_UNDEC(c);
1090  c = *src;
1091  if ('0' <= c && c <= '7') {
1092  src++;
1093  n = 8 * n + JS7_UNDEC(c);
1094  c = *src;
1095  if ('0' <= c && c <= '7') {
1096  src++;
1097  i = 8 * n + JS7_UNDEC(c);
1098  if (i <= 0377)
1099  n = i;
1100  else
1101  src--;
1102  }
1103  }
1104  localMax = n;
1105  break;
1106 
1107  default:
1108  localMax = c;
1109  break;
1110  }
1111  break;
1112  default:
1113  localMax = *src++;
1114  break;
1115  }
1116 
1117  if (inRange) {
1118  /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1119  if (rangeStart > localMax) {
1120  JS_ReportErrorNumber(state->context,
1121  js_GetErrorMessage, NULL,
1122  JSMSG_BAD_CLASS_RANGE);
1123  return FALSE;
1124  }
1125  inRange = FALSE;
1126  } else {
1127  if (canStartRange && src < end - 1) {
1128  if (*src == '-') {
1129  ++src;
1130  inRange = TRUE;
1131  rangeStart = (WCHAR)localMax;
1132  continue;
1133  }
1134  }
1135  if (state->flags & REG_FOLD)
1136  rangeStart = localMax; /* one run of the uc/dc loop below */
1137  }
1138 
1139  if (state->flags & REG_FOLD) {
1140  WCHAR maxch = localMax;
1141 
1142  for (i = rangeStart; i <= localMax; i++) {
1143  WCHAR uch, dch;
1144 
1145  uch = toupperW(i);
1146  dch = tolowerW(i);
1147  if(maxch < uch)
1148  maxch = uch;
1149  if(maxch < dch)
1150  maxch = dch;
1151  }
1152  localMax = maxch;
1153  }
1154 
1155  if (localMax > max)
1156  max = localMax;
1157  }
1158  target->u.ucclass.bmsize = max;
1159  return TRUE;
1160 }
1161 
1162 static INT
1164 {
1165  UINT min, max;
1166  WCHAR c;
1167  const WCHAR *errp = state->cp++;
1168 
1169  c = *state->cp;
1170  if (JS7_ISDEC(c)) {
1171  ++state->cp;
1172  min = GetDecimalValue(c, 0xFFFF, NULL, state);
1173  c = *state->cp;
1174 
1175  if (!ignoreValues && min == OVERFLOW_VALUE)
1176  return JSMSG_MIN_TOO_BIG;
1177 
1178  if (c == ',') {
1179  c = *++state->cp;
1180  if (JS7_ISDEC(c)) {
1181  ++state->cp;
1182  max = GetDecimalValue(c, 0xFFFF, NULL, state);
1183  c = *state->cp;
1184  if (!ignoreValues && max == OVERFLOW_VALUE)
1185  return JSMSG_MAX_TOO_BIG;
1186  if (!ignoreValues && min > max)
1187  return JSMSG_OUT_OF_ORDER;
1188  } else {
1189  max = (UINT)-1;
1190  }
1191  } else {
1192  max = min;
1193  }
1194  if (c == '}') {
1195  state->result = NewRENode(state, REOP_QUANT);
1196  if (!state->result)
1197  return JSMSG_OUT_OF_MEMORY;
1198  state->result->u.range.min = min;
1199  state->result->u.range.max = max;
1200  /*
1201  * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1202  * where <max> is written as compact(max+1) to make
1203  * (UINT)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1204  */
1205  state->progLength += (1 + GetCompactIndexWidth(min)
1206  + GetCompactIndexWidth(max + 1)
1207  +3);
1208  return 0;
1209  }
1210  }
1211 
1212  state->cp = errp;
1213  return -1;
1214 }
1215 
1216 static BOOL
1218 {
1219  RENode *term;
1220  term = state->result;
1221  if (state->cp < state->cpend) {
1222  switch (*state->cp) {
1223  case '+':
1224  state->result = NewRENode(state, REOP_QUANT);
1225  if (!state->result)
1226  return FALSE;
1227  state->result->u.range.min = 1;
1228  state->result->u.range.max = (UINT)-1;
1229  /* <PLUS>, <next> ... <ENDCHILD> */
1230  state->progLength += 4;
1231  goto quantifier;
1232  case '*':
1233  state->result = NewRENode(state, REOP_QUANT);
1234  if (!state->result)
1235  return FALSE;
1236  state->result->u.range.min = 0;
1237  state->result->u.range.max = (UINT)-1;
1238  /* <STAR>, <next> ... <ENDCHILD> */
1239  state->progLength += 4;
1240  goto quantifier;
1241  case '?':
1242  state->result = NewRENode(state, REOP_QUANT);
1243  if (!state->result)
1244  return FALSE;
1245  state->result->u.range.min = 0;
1246  state->result->u.range.max = 1;
1247  /* <OPT>, <next> ... <ENDCHILD> */
1248  state->progLength += 4;
1249  goto quantifier;
1250  case '{': /* balance '}' */
1251  {
1252  INT err;
1253 
1255  if (err == 0)
1256  goto quantifier;
1257  if (err == -1)
1258  return TRUE;
1259 
1260  ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
1261  return FALSE;
1262  }
1263  default:;
1264  }
1265  }
1266  return TRUE;
1267 
1268 quantifier:
1269  if (state->treeDepth == TREE_DEPTH_MAX) {
1270  ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1271  return FALSE;
1272  }
1273 
1274  ++state->treeDepth;
1275  ++state->cp;
1276  state->result->kid = term;
1277  if (state->cp < state->cpend && *state->cp == '?') {
1278  ++state->cp;
1279  state->result->u.range.greedy = FALSE;
1280  } else {
1281  state->result->u.range.greedy = TRUE;
1282  }
1283  return TRUE;
1284 }
1285 
1286 /*
1287  * item: assertion An item is either an assertion or
1288  * quantatom a quantified atom.
1289  *
1290  * assertion: '^' Assertions match beginning of string
1291  * (or line if the class static property
1292  * RegExp.multiline is true).
1293  * '$' End of string (or line if the class
1294  * static property RegExp.multiline is
1295  * true).
1296  * '\b' Word boundary (between \w and \W).
1297  * '\B' Word non-boundary.
1298  *
1299  * quantatom: atom An unquantified atom.
1300  * quantatom '{' n ',' m '}'
1301  * Atom must occur between n and m times.
1302  * quantatom '{' n ',' '}' Atom must occur at least n times.
1303  * quantatom '{' n '}' Atom must occur exactly n times.
1304  * quantatom '*' Zero or more times (same as {0,}).
1305  * quantatom '+' One or more times (same as {1,}).
1306  * quantatom '?' Zero or one time (same as {0,1}).
1307  *
1308  * any of which can be optionally followed by '?' for ungreedy
1309  *
1310  * atom: '(' regexp ')' A parenthesized regexp (what matched
1311  * can be addressed using a backreference,
1312  * see '\' n below).
1313  * '.' Matches any char except '\n'.
1314  * '[' classlist ']' A character class.
1315  * '[' '^' classlist ']' A negated character class.
1316  * '\f' Form Feed.
1317  * '\n' Newline (Line Feed).
1318  * '\r' Carriage Return.
1319  * '\t' Horizontal Tab.
1320  * '\v' Vertical Tab.
1321  * '\d' A digit (same as [0-9]).
1322  * '\D' A non-digit.
1323  * '\w' A word character, [0-9a-z_A-Z].
1324  * '\W' A non-word character.
1325  * '\s' A whitespace character, [ \b\f\n\r\t\v].
1326  * '\S' A non-whitespace character.
1327  * '\' n A backreference to the nth (n decimal
1328  * and positive) parenthesized expression.
1329  * '\' octal An octal escape sequence (octal must be
1330  * two or three digits long, unless it is
1331  * 0 for the null character).
1332  * '\x' hex A hex escape (hex must be two digits).
1333  * '\u' unicode A unicode escape (must be four digits).
1334  * '\c' ctrl A control character, ctrl is a letter.
1335  * '\' literalatomchar Any character except one of the above
1336  * that follow '\' in an atom.
1337  * otheratomchar Any character not first among the other
1338  * atom right-hand sides.
1339  */
1340 static BOOL
1342 {
1343  WCHAR c = *state->cp++;
1344  UINT nDigits;
1345  UINT num, tmp, n, i;
1346  const WCHAR *termStart;
1347 
1348  switch (c) {
1349  /* assertions and atoms */
1350  case '^':
1351  state->result = NewRENode(state, REOP_BOL);
1352  if (!state->result)
1353  return FALSE;
1354  state->progLength++;
1355  return TRUE;
1356  case '$':
1357  state->result = NewRENode(state, REOP_EOL);
1358  if (!state->result)
1359  return FALSE;
1360  state->progLength++;
1361  return TRUE;
1362  case '\\':
1363  if (state->cp >= state->cpend) {
1364  /* a trailing '\' is an error */
1365  ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
1366  return FALSE;
1367  }
1368  c = *state->cp++;
1369  switch (c) {
1370  /* assertion escapes */
1371  case 'b' :
1372  state->result = NewRENode(state, REOP_WBDRY);
1373  if (!state->result)
1374  return FALSE;
1375  state->progLength++;
1376  return TRUE;
1377  case 'B':
1378  state->result = NewRENode(state, REOP_WNONBDRY);
1379  if (!state->result)
1380  return FALSE;
1381  state->progLength++;
1382  return TRUE;
1383  /* Decimal escape */
1384  case '0':
1385  /* Give a strict warning. See also the note below. */
1386  WARN("non-octal digit in an escape sequence that doesn't match a back-reference\n");
1387  doOctal:
1388  num = 0;
1389  while (state->cp < state->cpend) {
1390  c = *state->cp;
1391  if (c < '0' || '7' < c)
1392  break;
1393  state->cp++;
1394  tmp = 8 * num + (UINT)JS7_UNDEC(c);
1395  if (tmp > 0377)
1396  break;
1397  num = tmp;
1398  }
1399  c = (WCHAR)num;
1400  doFlat:
1401  state->result = NewRENode(state, REOP_FLAT);
1402  if (!state->result)
1403  return FALSE;
1404  state->result->u.flat.chr = c;
1405  state->result->u.flat.length = 1;
1406  state->progLength += 3;
1407  break;
1408  case '1':
1409  case '2':
1410  case '3':
1411  case '4':
1412  case '5':
1413  case '6':
1414  case '7':
1415  case '8':
1416  case '9':
1417  termStart = state->cp - 1;
1418  num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1419  if (state->flags & JSREG_FIND_PAREN_ERROR)
1420  return FALSE;
1421  if (num == OVERFLOW_VALUE) {
1422  /* Give a strict mode warning. */
1423  WARN("back-reference exceeds number of capturing parentheses\n");
1424 
1425  /*
1426  * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1427  * error here. However, for compatibility with IE, we treat the
1428  * whole backref as flat if the first character in it is not a
1429  * valid octal character, and as an octal escape otherwise.
1430  */
1431  state->cp = termStart;
1432  if (c >= '8') {
1433  /* Treat this as flat. termStart - 1 is the \. */
1434  c = '\\';
1435  goto asFlat;
1436  }
1437 
1438  /* Treat this as an octal escape. */
1439  goto doOctal;
1440  }
1441  assert(1 <= num && num <= 0x10000);
1442  state->result = NewRENode(state, REOP_BACKREF);
1443  if (!state->result)
1444  return FALSE;
1445  state->result->u.parenIndex = num - 1;
1446  state->progLength
1447  += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1448  break;
1449  /* Control escape */
1450  case 'f':
1451  c = 0xC;
1452  goto doFlat;
1453  case 'n':
1454  c = 0xA;
1455  goto doFlat;
1456  case 'r':
1457  c = 0xD;
1458  goto doFlat;
1459  case 't':
1460  c = 0x9;
1461  goto doFlat;
1462  case 'v':
1463  c = 0xB;
1464  goto doFlat;
1465  /* Control letter */
1466  case 'c':
1467  if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
1468  c = (WCHAR) (*state->cp++ & 0x1F);
1469  } else {
1470  /* back off to accepting the original '\' as a literal */
1471  --state->cp;
1472  c = '\\';
1473  }
1474  goto doFlat;
1475  /* HexEscapeSequence */
1476  case 'x':
1477  nDigits = 2;
1478  goto lexHex;
1479  /* UnicodeEscapeSequence */
1480  case 'u':
1481  nDigits = 4;
1482 lexHex:
1483  n = 0;
1484  for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1485  UINT digit;
1486  c = *state->cp++;
1487  if (!isASCIIHexDigit(c, &digit)) {
1488  /*
1489  * Back off to accepting the original 'u' or 'x' as a
1490  * literal.
1491  */
1492  state->cp -= i + 2;
1493  n = *state->cp++;
1494  break;
1495  }
1496  n = (n << 4) | digit;
1497  }
1498  c = (WCHAR) n;
1499  goto doFlat;
1500  /* Character class escapes */
1501  case 'd':
1502  state->result = NewRENode(state, REOP_DIGIT);
1503 doSimple:
1504  if (!state->result)
1505  return FALSE;
1506  state->progLength++;
1507  break;
1508  case 'D':
1509  state->result = NewRENode(state, REOP_NONDIGIT);
1510  goto doSimple;
1511  case 's':
1512  state->result = NewRENode(state, REOP_SPACE);
1513  goto doSimple;
1514  case 'S':
1515  state->result = NewRENode(state, REOP_NONSPACE);
1516  goto doSimple;
1517  case 'w':
1518  state->result = NewRENode(state, REOP_ALNUM);
1519  goto doSimple;
1520  case 'W':
1521  state->result = NewRENode(state, REOP_NONALNUM);
1522  goto doSimple;
1523  /* IdentityEscape */
1524  default:
1525  state->result = NewRENode(state, REOP_FLAT);
1526  if (!state->result)
1527  return FALSE;
1528  state->result->u.flat.chr = c;
1529  state->result->u.flat.length = 1;
1530  state->result->kid = (void *) (state->cp - 1);
1531  state->progLength += 3;
1532  break;
1533  }
1534  break;
1535  case '[':
1536  state->result = NewRENode(state, REOP_CLASS);
1537  if (!state->result)
1538  return FALSE;
1539  termStart = state->cp;
1540  state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1541  for (;;) {
1542  if (state->cp == state->cpend) {
1543  ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1544  JSMSG_UNTERM_CLASS, termStart);
1545 
1546  return FALSE;
1547  }
1548  if (*state->cp == '\\') {
1549  state->cp++;
1550  if (state->cp != state->cpend)
1551  state->cp++;
1552  continue;
1553  }
1554  if (*state->cp == ']') {
1555  state->result->u.ucclass.kidlen = state->cp - termStart;
1556  break;
1557  }
1558  state->cp++;
1559  }
1560  for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1561  if (!state->classCache[i].start) {
1562  state->classCache[i].start = termStart;
1563  state->classCache[i].length = state->result->u.ucclass.kidlen;
1564  state->classCache[i].index = state->classCount;
1565  break;
1566  }
1567  if (state->classCache[i].length ==
1568  state->result->u.ucclass.kidlen) {
1569  for (n = 0; ; n++) {
1570  if (n == state->classCache[i].length) {
1571  state->result->u.ucclass.index
1572  = state->classCache[i].index;
1573  goto claim;
1574  }
1575  if (state->classCache[i].start[n] != termStart[n])
1576  break;
1577  }
1578  }
1579  }
1580  state->result->u.ucclass.index = state->classCount++;
1581 
1582  claim:
1583  /*
1584  * Call CalculateBitmapSize now as we want any errors it finds
1585  * to be reported during the parse phase, not at execution.
1586  */
1587  if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1588  return FALSE;
1589  /*
1590  * Update classBitmapsMem with number of bytes to hold bmsize bits,
1591  * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1592  * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1593  */
1594  n = (state->result->u.ucclass.bmsize >> 3) + 1;
1595  if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1596  ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1597  return FALSE;
1598  }
1599  state->classBitmapsMem += n;
1600  /* CLASS, <index> */
1601  state->progLength
1602  += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1603  break;
1604 
1605  case '.':
1606  state->result = NewRENode(state, REOP_DOT);
1607  goto doSimple;
1608 
1609  case '{':
1610  {
1611  const WCHAR *errp = state->cp--;
1612  INT err;
1613 
1615  state->cp = errp;
1616 
1617  if (err < 0)
1618  goto asFlat;
1619 
1620  /* FALL THROUGH */
1621  }
1622  case '*':
1623  case '+':
1624  case '?':
1625  ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1626  JSMSG_BAD_QUANTIFIER, state->cp - 1);
1627  return FALSE;
1628  default:
1629 asFlat:
1630  state->result = NewRENode(state, REOP_FLAT);
1631  if (!state->result)
1632  return FALSE;
1633  state->result->u.flat.chr = c;
1634  state->result->u.flat.length = 1;
1635  state->result->kid = (void *) (state->cp - 1);
1636  state->progLength += 3;
1637  break;
1638  }
1639  return ParseQuantifier(state);
1640 }
1641 
1642 /*
1643  * Top-down regular expression grammar, based closely on Perl4.
1644  *
1645  * regexp: altern A regular expression is one or more
1646  * altern '|' regexp alternatives separated by vertical bar.
1647  */
1648 #define INITIAL_STACK_SIZE 128
1649 
1650 static BOOL
1652 {
1653  size_t parenIndex;
1654  RENode *operand;
1655  REOpData *operatorStack;
1656  RENode **operandStack;
1657  REOp op;
1658  INT i;
1659  BOOL result = FALSE;
1660 
1661  INT operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
1662  INT operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
1663 
1664  /* Watch out for empty regexp */
1665  if (state->cp == state->cpend) {
1666  state->result = NewRENode(state, REOP_EMPTY);
1667  return (state->result != NULL);
1668  }
1669 
1670  operatorStack = heap_alloc(sizeof(REOpData) * operatorStackSize);
1671  if (!operatorStack)
1672  return FALSE;
1673 
1674  operandStack = heap_alloc(sizeof(RENode *) * operandStackSize);
1675  if (!operandStack)
1676  goto out;
1677 
1678  for (;;) {
1679  parenIndex = state->parenCount;
1680  if (state->cp == state->cpend) {
1681  /*
1682  * If we are at the end of the regexp and we're short one or more
1683  * operands, the regexp must have the form /x|/ or some such, with
1684  * left parentheses making us short more than one operand.
1685  */
1686  if (operatorSP >= operandSP) {
1687  operand = NewRENode(state, REOP_EMPTY);
1688  if (!operand)
1689  goto out;
1690  goto pushOperand;
1691  }
1692  } else {
1693  switch (*state->cp) {
1694  case '(':
1695  ++state->cp;
1696  if (state->cp + 1 < state->cpend &&
1697  *state->cp == '?' &&
1698  (state->cp[1] == '=' ||
1699  state->cp[1] == '!' ||
1700  state->cp[1] == ':')) {
1701  switch (state->cp[1]) {
1702  case '=':
1703  op = REOP_ASSERT;
1704  /* ASSERT, <next>, ... ASSERTTEST */
1705  state->progLength += 4;
1706  break;
1707  case '!':
1708  op = REOP_ASSERT_NOT;
1709  /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
1710  state->progLength += 4;
1711  break;
1712  default:
1713  op = REOP_LPARENNON;
1714  break;
1715  }
1716  state->cp += 2;
1717  } else {
1718  op = REOP_LPAREN;
1719  /* LPAREN, <index>, ... RPAREN, <index> */
1720  state->progLength
1721  += 2 * (1 + GetCompactIndexWidth(parenIndex));
1722  state->parenCount++;
1723  if (state->parenCount == 65535) {
1724  ReportRegExpError(state, JSREPORT_ERROR,
1725  JSMSG_TOO_MANY_PARENS);
1726  goto out;
1727  }
1728  }
1729  goto pushOperator;
1730 
1731  case ')':
1732  /*
1733  * If there's no stacked open parenthesis, throw syntax error.
1734  */
1735  for (i = operatorSP - 1; ; i--) {
1736  if (i < 0) {
1737  ReportRegExpError(state, JSREPORT_ERROR,
1738  JSMSG_UNMATCHED_RIGHT_PAREN);
1739  goto out;
1740  }
1741  if (operatorStack[i].op == REOP_ASSERT ||
1742  operatorStack[i].op == REOP_ASSERT_NOT ||
1743  operatorStack[i].op == REOP_LPARENNON ||
1744  operatorStack[i].op == REOP_LPAREN) {
1745  break;
1746  }
1747  }
1748  /* FALL THROUGH */
1749 
1750  case '|':
1751  /* Expected an operand before these, so make an empty one */
1752  operand = NewRENode(state, REOP_EMPTY);
1753  if (!operand)
1754  goto out;
1755  goto pushOperand;
1756 
1757  default:
1758  if (!ParseTerm(state))
1759  goto out;
1760  operand = state->result;
1761 pushOperand:
1762  if (operandSP == operandStackSize) {
1763  RENode **tmp;
1764  operandStackSize += operandStackSize;
1765  tmp = heap_realloc(operandStack, sizeof(RENode *) * operandStackSize);
1766  if (!tmp)
1767  goto out;
1768  operandStack = tmp;
1769  }
1770  operandStack[operandSP++] = operand;
1771  break;
1772  }
1773  }
1774 
1775  /* At the end; process remaining operators. */
1776 restartOperator:
1777  if (state->cp == state->cpend) {
1778  while (operatorSP) {
1779  --operatorSP;
1780  if (!ProcessOp(state, &operatorStack[operatorSP],
1781  operandStack, operandSP))
1782  goto out;
1783  --operandSP;
1784  }
1785  assert(operandSP == 1);
1786  state->result = operandStack[0];
1787  result = TRUE;
1788  goto out;
1789  }
1790 
1791  switch (*state->cp) {
1792  case '|':
1793  /* Process any stacked 'concat' operators */
1794  ++state->cp;
1795  while (operatorSP &&
1796  operatorStack[operatorSP - 1].op == REOP_CONCAT) {
1797  --operatorSP;
1798  if (!ProcessOp(state, &operatorStack[operatorSP],
1799  operandStack, operandSP)) {
1800  goto out;
1801  }
1802  --operandSP;
1803  }
1804  op = REOP_ALT;
1805  goto pushOperator;
1806 
1807  case ')':
1808  /*
1809  * If there's no stacked open parenthesis, throw syntax error.
1810  */
1811  for (i = operatorSP - 1; ; i--) {
1812  if (i < 0) {
1813  ReportRegExpError(state, JSREPORT_ERROR,
1814  JSMSG_UNMATCHED_RIGHT_PAREN);
1815  goto out;
1816  }
1817  if (operatorStack[i].op == REOP_ASSERT ||
1818  operatorStack[i].op == REOP_ASSERT_NOT ||
1819  operatorStack[i].op == REOP_LPARENNON ||
1820  operatorStack[i].op == REOP_LPAREN) {
1821  break;
1822  }
1823  }
1824  ++state->cp;
1825 
1826  /* Process everything on the stack until the open parenthesis. */
1827  for (;;) {
1828  assert(operatorSP);
1829  --operatorSP;
1830  switch (operatorStack[operatorSP].op) {
1831  case REOP_ASSERT:
1832  case REOP_ASSERT_NOT:
1833  case REOP_LPAREN:
1834  operand = NewRENode(state, operatorStack[operatorSP].op);
1835  if (!operand)
1836  goto out;
1837  operand->u.parenIndex =
1838  operatorStack[operatorSP].parenIndex;
1839  assert(operandSP);
1840  operand->kid = operandStack[operandSP - 1];
1841  operandStack[operandSP - 1] = operand;
1842  if (state->treeDepth == TREE_DEPTH_MAX) {
1843  ReportRegExpError(state, JSREPORT_ERROR,
1844  JSMSG_REGEXP_TOO_COMPLEX);
1845  goto out;
1846  }
1847  ++state->treeDepth;
1848  /* FALL THROUGH */
1849 
1850  case REOP_LPARENNON:
1851  state->result = operandStack[operandSP - 1];
1852  if (!ParseQuantifier(state))
1853  goto out;
1854  operandStack[operandSP - 1] = state->result;
1855  goto restartOperator;
1856  default:
1857  if (!ProcessOp(state, &operatorStack[operatorSP],
1858  operandStack, operandSP))
1859  goto out;
1860  --operandSP;
1861  break;
1862  }
1863  }
1864  break;
1865 
1866  case '{':
1867  {
1868  const WCHAR *errp = state->cp;
1869 
1870  if (ParseMinMaxQuantifier(state, TRUE) < 0) {
1871  /*
1872  * This didn't even scan correctly as a quantifier, so we should
1873  * treat it as flat.
1874  */
1875  op = REOP_CONCAT;
1876  goto pushOperator;
1877  }
1878 
1879  state->cp = errp;
1880  /* FALL THROUGH */
1881  }
1882 
1883  case '+':
1884  case '*':
1885  case '?':
1886  ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
1887  state->cp);
1888  result = FALSE;
1889  goto out;
1890 
1891  default:
1892  /* Anything else is the start of the next term. */
1893  op = REOP_CONCAT;
1894 pushOperator:
1895  if (operatorSP == operatorStackSize) {
1896  REOpData *tmp;
1897  operatorStackSize += operatorStackSize;
1898  tmp = heap_realloc(operatorStack, sizeof(REOpData) * operatorStackSize);
1899  if (!tmp)
1900  goto out;
1901  operatorStack = tmp;
1902  }
1903  operatorStack[operatorSP].op = op;
1904  operatorStack[operatorSP].errPos = state->cp;
1905  operatorStack[operatorSP++].parenIndex = parenIndex;
1906  break;
1907  }
1908  }
1909 out:
1910  heap_free(operatorStack);
1911  heap_free(operandStack);
1912  return result;
1913 }
1914 
1915 /*
1916  * Save the current state of the match - the position in the input
1917  * text as well as the position in the bytecode. The state of any
1918  * parent expressions is also saved (preceding state).
1919  * Contents of parenCount parentheses from parenIndex are also saved.
1920  */
1921 static REBackTrackData *
1923  jsbytecode *target, match_state_t *x, const WCHAR *cp,
1924  size_t parenIndex, size_t parenCount)
1925 {
1926  size_t i;
1928  (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
1929 
1930  size_t sz = sizeof(REBackTrackData) +
1931  gData->stateStackTop * sizeof(REProgState) +
1932  parenCount * sizeof(RECapture);
1933 
1934  ptrdiff_t btsize = gData->backTrackStackSize;
1935  ptrdiff_t btincr = ((char *)result + sz) -
1936  ((char *)gData->backTrackStack + btsize);
1937 
1938  TRACE("\tBT_Push: %lu,%lu\n", (ULONG_PTR)parenIndex, (ULONG_PTR)parenCount);
1939 
1940  JS_COUNT_OPERATION(gData->cx, JSOW_JUMP * (1 + parenCount));
1941  if (btincr > 0) {
1942  ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
1943 
1944  JS_COUNT_OPERATION(gData->cx, JSOW_ALLOCATION);
1945  btincr = ((btincr+btsize-1)/btsize)*btsize;
1946  gData->backTrackStack = heap_pool_grow(gData->pool, gData->backTrackStack, btsize, btincr);
1947  if (!gData->backTrackStack) {
1948  js_ReportOutOfScriptQuota(gData->cx);
1949  gData->ok = FALSE;
1950  return NULL;
1951  }
1952  gData->backTrackStackSize = btsize + btincr;
1953  result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
1954  }
1955  gData->backTrackSP = result;
1956  result->sz = gData->cursz;
1957  gData->cursz = sz;
1958 
1959  result->backtrack_op = op;
1960  result->backtrack_pc = target;
1961  result->cp = cp;
1962  result->parenCount = parenCount;
1963  result->parenIndex = parenIndex;
1964 
1965  result->saveStateStackTop = gData->stateStackTop;
1966  assert(gData->stateStackTop);
1967  memcpy(result + 1, gData->stateStack,
1968  sizeof(REProgState) * result->saveStateStackTop);
1969 
1970  if (parenCount != 0) {
1971  memcpy((char *)(result + 1) +
1972  sizeof(REProgState) * result->saveStateStackTop,
1973  &x->parens[parenIndex],
1974  sizeof(RECapture) * parenCount);
1975  for (i = 0; i != parenCount; i++)
1976  x->parens[parenIndex + i].index = -1;
1977  }
1978 
1979  return result;
1980 }
1981 
1982 static inline match_state_t *
1983 FlatNIMatcher(REGlobalData *gData, match_state_t *x, const WCHAR *matchChars,
1984  size_t length)
1985 {
1986  size_t i;
1987  assert(gData->cpend >= x->cp);
1988  if (length > (size_t)(gData->cpend - x->cp))
1989  return NULL;
1990  for (i = 0; i != length; i++) {
1991  if (toupperW(matchChars[i]) != toupperW(x->cp[i]))
1992  return NULL;
1993  }
1994  x->cp += length;
1995  return x;
1996 }
1997 
1998 /*
1999  * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2000  * 2. If E is not a character then go to step 6.
2001  * 3. Let ch be E's character.
2002  * 4. Let A be a one-element RECharSet containing the character ch.
2003  * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2004  * 6. E must be an integer. Let n be that integer.
2005  * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2006  * 8. Return an internal Matcher closure that takes two arguments, a State x
2007  * and a Continuation c, and performs the following:
2008  * 1. Let cap be x's captures internal array.
2009  * 2. Let s be cap[n].
2010  * 3. If s is undefined, then call c(x) and return its result.
2011  * 4. Let e be x's endIndex.
2012  * 5. Let len be s's length.
2013  * 6. Let f be e+len.
2014  * 7. If f>InputLength, return failure.
2015  * 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2016  * such that Canonicalize(s[i]) is not the same character as
2017  * Canonicalize(Input [e+i]), then return failure.
2018  * 9. Let y be the State (f, cap).
2019  * 10. Call c(y) and return its result.
2020  */
2021 static match_state_t *
2022 BackrefMatcher(REGlobalData *gData, match_state_t *x, size_t parenIndex)
2023 {
2024  size_t len, i;
2025  const WCHAR *parenContent;
2026  RECapture *cap = &x->parens[parenIndex];
2027 
2028  if (cap->index == -1)
2029  return x;
2030 
2031  len = cap->length;
2032  if (x->cp + len > gData->cpend)
2033  return NULL;
2034 
2035  parenContent = &gData->cpbegin[cap->index];
2036  if (gData->regexp->flags & REG_FOLD) {
2037  for (i = 0; i < len; i++) {
2038  if (toupperW(parenContent[i]) != toupperW(x->cp[i]))
2039  return NULL;
2040  }
2041  } else {
2042  for (i = 0; i < len; i++) {
2043  if (parenContent[i] != x->cp[i])
2044  return NULL;
2045  }
2046  }
2047  x->cp += len;
2048  return x;
2049 }
2050 
2051 /* Add a single character to the RECharSet */
2052 static void
2054 {
2055  UINT byteIndex = (UINT)(c >> 3);
2057  cs->u.bits[byteIndex] |= 1 << (c & 0x7);
2058 }
2059 
2060 
2061 /* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2062 static void
2064 {
2065  UINT i;
2066 
2067  UINT byteIndex1 = c1 >> 3;
2068  UINT byteIndex2 = c2 >> 3;
2069 
2070  assert(c2 <= cs->length && c1 <= c2);
2071 
2072  c1 &= 0x7;
2073  c2 &= 0x7;
2074 
2075  if (byteIndex1 == byteIndex2) {
2076  cs->u.bits[byteIndex1] |= ((BYTE)0xFF >> (7 - (c2 - c1))) << c1;
2077  } else {
2078  cs->u.bits[byteIndex1] |= 0xFF << c1;
2079  for (i = byteIndex1 + 1; i < byteIndex2; i++)
2080  cs->u.bits[i] = 0xFF;
2081  cs->u.bits[byteIndex2] |= (BYTE)0xFF >> (7 - c2);
2082  }
2083 }
2084 
2085 /* Compile the source of the class into a RECharSet */
2086 static BOOL
2088 {
2089  const WCHAR *src, *end;
2090  BOOL inRange = FALSE;
2091  WCHAR rangeStart = 0;
2092  UINT byteLength, n;
2093  WCHAR c, thisCh;
2094  INT nDigits, i;
2095 
2096  assert(!charSet->converted);
2097  /*
2098  * Assert that startIndex and length points to chars inside [] inside
2099  * source string.
2100  */
2101  assert(1 <= charSet->u.src.startIndex);
2102  assert(charSet->u.src.startIndex < gData->regexp->source_len);
2103  assert(charSet->u.src.length <= gData->regexp->source_len
2104  - 1 - charSet->u.src.startIndex);
2105 
2106  charSet->converted = TRUE;
2107  src = gData->regexp->source + charSet->u.src.startIndex;
2108 
2109  end = src + charSet->u.src.length;
2110 
2111  assert(src[-1] == '[' && end[0] == ']');
2112 
2113  byteLength = (charSet->length >> 3) + 1;
2114  charSet->u.bits = heap_alloc(byteLength);
2115  if (!charSet->u.bits) {
2116  JS_ReportOutOfMemory(gData->cx);
2117  gData->ok = FALSE;
2118  return FALSE;
2119  }
2120  memset(charSet->u.bits, 0, byteLength);
2121 
2122  if (src == end)
2123  return TRUE;
2124 
2125  if (*src == '^') {
2126  assert(charSet->sense == FALSE);
2127  ++src;
2128  } else {
2129  assert(charSet->sense == TRUE);
2130  }
2131 
2132  while (src != end) {
2133  switch (*src) {
2134  case '\\':
2135  ++src;
2136  c = *src++;
2137  switch (c) {
2138  case 'b':
2139  thisCh = 0x8;
2140  break;
2141  case 'f':
2142  thisCh = 0xC;
2143  break;
2144  case 'n':
2145  thisCh = 0xA;
2146  break;
2147  case 'r':
2148  thisCh = 0xD;
2149  break;
2150  case 't':
2151  thisCh = 0x9;
2152  break;
2153  case 'v':
2154  thisCh = 0xB;
2155  break;
2156  case 'c':
2157  if (src < end && JS_ISWORD(*src)) {
2158  thisCh = (WCHAR)(*src++ & 0x1F);
2159  } else {
2160  --src;
2161  thisCh = '\\';
2162  }
2163  break;
2164  case 'x':
2165  nDigits = 2;
2166  goto lexHex;
2167  case 'u':
2168  nDigits = 4;
2169  lexHex:
2170  n = 0;
2171  for (i = 0; (i < nDigits) && (src < end); i++) {
2172  UINT digit;
2173  c = *src++;
2174  if (!isASCIIHexDigit(c, &digit)) {
2175  /*
2176  * Back off to accepting the original '\'
2177  * as a literal
2178  */
2179  src -= i + 1;
2180  n = '\\';
2181  break;
2182  }
2183  n = (n << 4) | digit;
2184  }
2185  thisCh = (WCHAR)n;
2186  break;
2187  case '0':
2188  case '1':
2189  case '2':
2190  case '3':
2191  case '4':
2192  case '5':
2193  case '6':
2194  case '7':
2195  /*
2196  * This is a non-ECMA extension - decimal escapes (in this
2197  * case, octal!) are supposed to be an error inside class
2198  * ranges, but supported here for backwards compatibility.
2199  */
2200  n = JS7_UNDEC(c);
2201  c = *src;
2202  if ('0' <= c && c <= '7') {
2203  src++;
2204  n = 8 * n + JS7_UNDEC(c);
2205  c = *src;
2206  if ('0' <= c && c <= '7') {
2207  src++;
2208  i = 8 * n + JS7_UNDEC(c);
2209  if (i <= 0377)
2210  n = i;
2211  else
2212  src--;
2213  }
2214  }
2215  thisCh = (WCHAR)n;
2216  break;
2217 
2218  case 'd':
2219  AddCharacterRangeToCharSet(charSet, '0', '9');
2220  continue; /* don't need range processing */
2221  case 'D':
2222  AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2224  (WCHAR)('9' + 1),
2225  (WCHAR)charSet->length);
2226  continue;
2227  case 's':
2228  for (i = (INT)charSet->length; i >= 0; i--)
2229  if (isspaceW(i))
2230  AddCharacterToCharSet(charSet, (WCHAR)i);
2231  continue;
2232  case 'S':
2233  for (i = (INT)charSet->length; i >= 0; i--)
2234  if (!isspaceW(i))
2235  AddCharacterToCharSet(charSet, (WCHAR)i);
2236  continue;
2237  case 'w':
2238  for (i = (INT)charSet->length; i >= 0; i--)
2239  if (JS_ISWORD(i))
2240  AddCharacterToCharSet(charSet, (WCHAR)i);
2241  continue;
2242  case 'W':
2243  for (i = (INT)charSet->length; i >= 0; i--)
2244  if (!JS_ISWORD(i))
2245  AddCharacterToCharSet(charSet, (WCHAR)i);
2246  continue;
2247  default:
2248  thisCh = c;
2249  break;
2250 
2251  }
2252  break;
2253 
2254  default:
2255  thisCh = *src++;
2256  break;
2257 
2258  }
2259  if (inRange) {
2260  if (gData->regexp->flags & REG_FOLD) {
2261  assert(rangeStart <= thisCh);
2262  for (i = rangeStart; i <= thisCh; i++) {
2263  WCHAR uch, dch;
2264 
2265  AddCharacterToCharSet(charSet, i);
2266  uch = toupperW(i);
2267  dch = tolowerW(i);
2268  if (i != uch)
2269  AddCharacterToCharSet(charSet, uch);
2270  if (i != dch)
2271  AddCharacterToCharSet(charSet, dch);
2272  }
2273  } else {
2274  AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2275  }
2276  inRange = FALSE;
2277  } else {
2278  if (gData->regexp->flags & REG_FOLD) {
2279  AddCharacterToCharSet(charSet, toupperW(thisCh));
2280  AddCharacterToCharSet(charSet, tolowerW(thisCh));
2281  } else {
2282  AddCharacterToCharSet(charSet, thisCh);
2283  }
2284  if (src < end - 1) {
2285  if (*src == '-') {
2286  ++src;
2287  inRange = TRUE;
2288  rangeStart = thisCh;
2289  }
2290  }
2291  }
2292  }
2293  return TRUE;
2294 }
2295 
2296 static BOOL
2298 {
2299  size_t limit = gData->stateStackLimit;
2300  size_t sz = sizeof(REProgState) * limit;
2301 
2302  gData->stateStack = heap_pool_grow(gData->pool, gData->stateStack, sz, sz);
2303  if (!gData->stateStack) {
2304  js_ReportOutOfScriptQuota(gData->cx);
2305  gData->ok = FALSE;
2306  return FALSE;
2307  }
2308  gData->stateStackLimit = limit + limit;
2309  return TRUE;
2310 }
2311 
2312 #define PUSH_STATE_STACK(data) \
2313  do { \
2314  ++(data)->stateStackTop; \
2315  if ((data)->stateStackTop == (data)->stateStackLimit && \
2316  !ReallocStateStack((data))) { \
2317  return NULL; \
2318  } \
2319  }while(0)
2320 
2321 /*
2322  * Apply the current op against the given input to see if it's going to match
2323  * or fail. Return false if we don't get a match, true if we do. If updatecp is
2324  * true, then update the current state's cp. Always update startpc to the next
2325  * op.
2326  */
2327 static inline match_state_t *
2329  jsbytecode **startpc, BOOL updatecp)
2330 {
2332  WCHAR matchCh;
2333  size_t parenIndex;
2334  size_t offset, length, index;
2335  jsbytecode *pc = *startpc; /* pc has already been incremented past op */
2336  const WCHAR *source;
2337  const WCHAR *startcp = x->cp;
2338  WCHAR ch;
2339  RECharSet *charSet;
2340 
2341  const char *opname = reop_names[op];
2342  TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
2343  (int)gData->stateStackTop * 2, "", opname);
2344 
2345  switch (op) {
2346  case REOP_EMPTY:
2347  result = x;
2348  break;
2349  case REOP_BOL:
2350  if (x->cp != gData->cpbegin) {
2351  if (
2352  !(gData->regexp->flags & REG_MULTILINE)) {
2353  break;
2354  }
2355  if (!RE_IS_LINE_TERM(x->cp[-1]))
2356  break;
2357  }
2358  result = x;
2359  break;
2360  case REOP_EOL:
2361  if (x->cp != gData->cpend) {
2362  if (
2363  !(gData->regexp->flags & REG_MULTILINE)) {
2364  break;
2365  }
2366  if (!RE_IS_LINE_TERM(*x->cp))
2367  break;
2368  }
2369  result = x;
2370  break;
2371  case REOP_WBDRY:
2372  if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2373  !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2374  result = x;
2375  }
2376  break;
2377  case REOP_WNONBDRY:
2378  if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2379  (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2380  result = x;
2381  }
2382  break;
2383  case REOP_DOT:
2384  if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
2385  result = x;
2386  result->cp++;
2387  }
2388  break;
2389  case REOP_DIGIT:
2390  if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) {
2391  result = x;
2392  result->cp++;
2393  }
2394  break;
2395  case REOP_NONDIGIT:
2396  if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) {
2397  result = x;
2398  result->cp++;
2399  }
2400  break;
2401  case REOP_ALNUM:
2402  if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
2403  result = x;
2404  result->cp++;
2405  }
2406  break;
2407  case REOP_NONALNUM:
2408  if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
2409  result = x;
2410  result->cp++;
2411  }
2412  break;
2413  case REOP_SPACE:
2414  if (x->cp != gData->cpend && isspaceW(*x->cp)) {
2415  result = x;
2416  result->cp++;
2417  }
2418  break;
2419  case REOP_NONSPACE:
2420  if (x->cp != gData->cpend && !isspaceW(*x->cp)) {
2421  result = x;
2422  result->cp++;
2423  }
2424  break;
2425  case REOP_BACKREF:
2426  pc = ReadCompactIndex(pc, &parenIndex);
2427  assert(parenIndex < gData->regexp->parenCount);
2428  result = BackrefMatcher(gData, x, parenIndex);
2429  break;
2430  case REOP_FLAT:
2431  pc = ReadCompactIndex(pc, &offset);
2432  assert(offset < gData->regexp->source_len);
2433  pc = ReadCompactIndex(pc, &length);
2434  assert(1 <= length);
2435  assert(length <= gData->regexp->source_len - offset);
2436  if (length <= (size_t)(gData->cpend - x->cp)) {
2437  source = gData->regexp->source + offset;
2438  TRACE("%s\n", debugstr_wn(source, length));
2439  for (index = 0; index != length; index++) {
2440  if (source[index] != x->cp[index])
2441  return NULL;
2442  }
2443  x->cp += length;
2444  result = x;
2445  }
2446  break;
2447  case REOP_FLAT1:
2448  matchCh = *pc++;
2449  TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2450  if (x->cp != gData->cpend && *x->cp == matchCh) {
2451  result = x;
2452  result->cp++;
2453  }
2454  break;
2455  case REOP_FLATi:
2456  pc = ReadCompactIndex(pc, &offset);
2457  assert(offset < gData->regexp->source_len);
2458  pc = ReadCompactIndex(pc, &length);
2459  assert(1 <= length);
2460  assert(length <= gData->regexp->source_len - offset);
2461  source = gData->regexp->source;
2462  result = FlatNIMatcher(gData, x, source + offset, length);
2463  break;
2464  case REOP_FLAT1i:
2465  matchCh = *pc++;
2466  if (x->cp != gData->cpend && toupperW(*x->cp) == toupperW(matchCh)) {
2467  result = x;
2468  result->cp++;
2469  }
2470  break;
2471  case REOP_UCFLAT1:
2472  matchCh = GET_ARG(pc);
2473  TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2474  pc += ARG_LEN;
2475  if (x->cp != gData->cpend && *x->cp == matchCh) {
2476  result = x;
2477  result->cp++;
2478  }
2479  break;
2480  case REOP_UCFLAT1i:
2481  matchCh = GET_ARG(pc);
2482  pc += ARG_LEN;
2483  if (x->cp != gData->cpend && toupperW(*x->cp) == toupperW(matchCh)) {
2484  result = x;
2485  result->cp++;
2486  }
2487  break;
2488  case REOP_CLASS:
2489  pc = ReadCompactIndex(pc, &index);
2490  assert(index < gData->regexp->classCount);
2491  if (x->cp != gData->cpend) {
2492  charSet = &gData->regexp->classList[index];
2493  assert(charSet->converted);
2494  ch = *x->cp;
2495  index = ch >> 3;
2496  if (charSet->length != 0 &&
2497  ch <= charSet->length &&
2498  (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2499  result = x;
2500  result->cp++;
2501  }
2502  }
2503  break;
2504  case REOP_NCLASS:
2505  pc = ReadCompactIndex(pc, &index);
2506  assert(index < gData->regexp->classCount);
2507  if (x->cp != gData->cpend) {
2508  charSet = &gData->regexp->classList[index];
2509  assert(charSet->converted);
2510  ch = *x->cp;
2511  index = ch >> 3;
2512  if (charSet->length == 0 ||
2513  ch > charSet->length ||
2514  !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2515  result = x;
2516  result->cp++;
2517  }
2518  }
2519  break;
2520 
2521  default:
2522  assert(FALSE);
2523  }
2524  if (result) {
2525  if (!updatecp)
2526  x->cp = startcp;
2527  *startpc = pc;
2528  TRACE(" *\n");
2529  return result;
2530  }
2531  x->cp = startcp;
2532  return NULL;
2533 }
2534 
2535 static inline match_state_t *
2537 {
2539  REBackTrackData *backTrackData;
2540  jsbytecode *nextpc, *testpc;
2541  REOp nextop;
2542  RECapture *cap;
2543  REProgState *curState;
2544  const WCHAR *startcp;
2545  size_t parenIndex, k;
2546  size_t parenSoFar = 0;
2547 
2548  WCHAR matchCh1, matchCh2;
2549  RECharSet *charSet;
2550 
2551  BOOL anchor;
2552  jsbytecode *pc = gData->regexp->program;
2553  REOp op = (REOp) *pc++;
2554 
2555  /*
2556  * If the first node is a simple match, step the index into the string
2557  * until that match is made, or fail if it can't be found at all.
2558  */
2559  if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & REG_STICKY)) {
2560  anchor = FALSE;
2561  while (x->cp <= gData->cpend) {
2562  nextpc = pc; /* reset back to start each time */
2563  result = SimpleMatch(gData, x, op, &nextpc, TRUE);
2564  if (result) {
2565  anchor = TRUE;
2566  x = result;
2567  pc = nextpc; /* accept skip to next opcode */
2568  op = (REOp) *pc++;
2569  assert(op < REOP_LIMIT);
2570  break;
2571  }
2572  gData->skipped++;
2573  x->cp++;
2574  }
2575  if (!anchor)
2576  goto bad;
2577  }
2578 
2579  for (;;) {
2580  const char *opname = reop_names[op];
2581  TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
2582  (int)gData->stateStackTop * 2, "", opname);
2583 
2584  if (REOP_IS_SIMPLE(op)) {
2585  result = SimpleMatch(gData, x, op, &pc, TRUE);
2586  } else {
2587  curState = &gData->stateStack[gData->stateStackTop];
2588  switch (op) {
2589  case REOP_END:
2590  goto good;
2591  case REOP_ALTPREREQ2:
2592  nextpc = pc + GET_OFFSET(pc); /* start of next op */
2593  pc += ARG_LEN;
2594  matchCh2 = GET_ARG(pc);
2595  pc += ARG_LEN;
2596  k = GET_ARG(pc);
2597  pc += ARG_LEN;
2598 
2599  if (x->cp != gData->cpend) {
2600  if (*x->cp == matchCh2)
2601  goto doAlt;
2602 
2603  charSet = &gData->regexp->classList[k];
2604  if (!charSet->converted && !ProcessCharSet(gData, charSet))
2605  goto bad;
2606  matchCh1 = *x->cp;
2607  k = matchCh1 >> 3;
2608  if ((charSet->length == 0 ||
2609  matchCh1 > charSet->length ||
2610  !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
2611  charSet->sense) {
2612  goto doAlt;
2613  }
2614  }
2615  result = NULL;
2616  break;
2617 
2618  case REOP_ALTPREREQ:
2619  nextpc = pc + GET_OFFSET(pc); /* start of next op */
2620  pc += ARG_LEN;
2621  matchCh1 = GET_ARG(pc);
2622  pc += ARG_LEN;
2623  matchCh2 = GET_ARG(pc);
2624  pc += ARG_LEN;
2625  if (x->cp == gData->cpend ||
2626  (*x->cp != matchCh1 && *x->cp != matchCh2)) {
2627  result = NULL;
2628  break;
2629  }
2630  /* else fall through... */
2631 
2632  case REOP_ALT:
2633  doAlt:
2634  nextpc = pc + GET_OFFSET(pc); /* start of next alternate */
2635  pc += ARG_LEN; /* start of this alternate */
2636  curState->parenSoFar = parenSoFar;
2637  PUSH_STATE_STACK(gData);
2638  op = (REOp) *pc++;
2639  startcp = x->cp;
2640  if (REOP_IS_SIMPLE(op)) {
2641  if (!SimpleMatch(gData, x, op, &pc, TRUE)) {
2642  op = (REOp) *nextpc++;
2643  pc = nextpc;
2644  continue;
2645  }
2646  result = x;
2647  op = (REOp) *pc++;
2648  }
2649  nextop = (REOp) *nextpc++;
2650  if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
2651  goto bad;
2652  continue;
2653 
2654  /*
2655  * Occurs at (successful) end of REOP_ALT,
2656  */
2657  case REOP_JUMP:
2658  /*
2659  * If we have not gotten a result here, it is because of an
2660  * empty match. Do the same thing REOP_EMPTY would do.
2661  */
2662  if (!result)
2663  result = x;
2664 
2665  --gData->stateStackTop;
2666  pc += GET_OFFSET(pc);
2667  op = (REOp) *pc++;
2668  continue;
2669 
2670  /*
2671  * Occurs at last (successful) end of REOP_ALT,
2672  */
2673  case REOP_ENDALT:
2674  /*
2675  * If we have not gotten a result here, it is because of an
2676  * empty match. Do the same thing REOP_EMPTY would do.
2677  */
2678  if (!result)
2679  result = x;
2680 
2681  --gData->stateStackTop;
2682  op = (REOp) *pc++;
2683  continue;
2684 
2685  case REOP_LPAREN:
2686  pc = ReadCompactIndex(pc, &parenIndex);
2687  TRACE("[ %lu ]\n", (ULONG_PTR)parenIndex);
2688  assert(parenIndex < gData->regexp->parenCount);
2689  if (parenIndex + 1 > parenSoFar)
2690  parenSoFar = parenIndex + 1;
2691  x->parens[parenIndex].index = x->cp - gData->cpbegin;
2692  x->parens[parenIndex].length = 0;
2693  op = (REOp) *pc++;
2694  continue;
2695 
2696  case REOP_RPAREN:
2697  {
2698  ptrdiff_t delta;
2699 
2700  pc = ReadCompactIndex(pc, &parenIndex);
2701  assert(parenIndex < gData->regexp->parenCount);
2702  cap = &x->parens[parenIndex];
2703  delta = x->cp - (gData->cpbegin + cap->index);
2704  cap->length = (delta < 0) ? 0 : (size_t) delta;
2705  op = (REOp) *pc++;
2706 
2707  if (!result)
2708  result = x;
2709  continue;
2710  }
2711  case REOP_ASSERT:
2712  nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */
2713  pc += ARG_LEN; /* start of ASSERT child */
2714  op = (REOp) *pc++;
2715  testpc = pc;
2716  if (REOP_IS_SIMPLE(op) &&
2717  !SimpleMatch(gData, x, op, &testpc, FALSE)) {
2718  result = NULL;
2719  break;
2720  }
2721  curState->u.assertion.top =
2722  (char *)gData->backTrackSP - (char *)gData->backTrackStack;
2723  curState->u.assertion.sz = gData->cursz;
2724  curState->index = x->cp - gData->cpbegin;
2725  curState->parenSoFar = parenSoFar;
2726  PUSH_STATE_STACK(gData);
2727  if (!PushBackTrackState(gData, REOP_ASSERTTEST,
2728  nextpc, x, x->cp, 0, 0)) {
2729  goto bad;
2730  }
2731  continue;
2732 
2733  case REOP_ASSERT_NOT:
2734  nextpc = pc + GET_OFFSET(pc);
2735  pc += ARG_LEN;
2736  op = (REOp) *pc++;
2737  testpc = pc;
2738  if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
2739  SimpleMatch(gData, x, op, &testpc, FALSE) &&
2740  *testpc == REOP_ASSERTNOTTEST) {
2741  result = NULL;
2742  break;
2743  }
2744  curState->u.assertion.top
2745  = (char *)gData->backTrackSP -
2746  (char *)gData->backTrackStack;
2747  curState->u.assertion.sz = gData->cursz;
2748  curState->index = x->cp - gData->cpbegin;
2749  curState->parenSoFar = parenSoFar;
2750  PUSH_STATE_STACK(gData);
2752  nextpc, x, x->cp, 0, 0)) {
2753  goto bad;
2754  }
2755  continue;
2756 
2757  case REOP_ASSERTTEST:
2758  --gData->stateStackTop;
2759  --curState;
2760  x->cp = gData->cpbegin + curState->index;
2761  gData->backTrackSP =
2762  (REBackTrackData *) ((char *)gData->backTrackStack +
2763  curState->u.assertion.top);
2764  gData->cursz = curState->u.assertion.sz;
2765  if (result)
2766  result = x;
2767  break;
2768 
2769  case REOP_ASSERTNOTTEST:
2770  --gData->stateStackTop;
2771  --curState;
2772  x->cp = gData->cpbegin + curState->index;
2773  gData->backTrackSP =
2774  (REBackTrackData *) ((char *)gData->backTrackStack +
2775  curState->u.assertion.top);
2776  gData->cursz = curState->u.assertion.sz;
2777  result = (!result) ? x : NULL;
2778  break;
2779  case REOP_STAR:
2780  curState->u.quantifier.min = 0;
2781  curState->u.quantifier.max = (UINT)-1;
2782  goto quantcommon;
2783  case REOP_PLUS:
2784  curState->u.quantifier.min = 1;
2785  curState->u.quantifier.max = (UINT)-1;
2786  goto quantcommon;
2787  case REOP_OPT:
2788  curState->u.quantifier.min = 0;
2789  curState->u.quantifier.max = 1;
2790  goto quantcommon;
2791  case REOP_QUANT:
2792  pc = ReadCompactIndex(pc, &k);
2793  curState->u.quantifier.min = k;
2794  pc = ReadCompactIndex(pc, &k);
2795  /* max is k - 1 to use one byte for (UINT)-1 sentinel. */
2796  curState->u.quantifier.max = k - 1;
2797  assert(curState->u.quantifier.min <= curState->u.quantifier.max);
2798  quantcommon:
2799  if (curState->u.quantifier.max == 0) {
2800  pc = pc + GET_OFFSET(pc);
2801  op = (REOp) *pc++;
2802  result = x;
2803  continue;
2804  }
2805  /* Step over <next> */
2806  nextpc = pc + ARG_LEN;
2807  op = (REOp) *nextpc++;
2808  startcp = x->cp;
2809  if (REOP_IS_SIMPLE(op)) {
2810  if (!SimpleMatch(gData, x, op, &nextpc, TRUE)) {
2811  if (curState->u.quantifier.min == 0)
2812  result = x;
2813  else
2814  result = NULL;
2815  pc = pc + GET_OFFSET(pc);
2816  break;
2817  }
2818  op = (REOp) *nextpc++;
2819  result = x;
2820  }
2821  curState->index = startcp - gData->cpbegin;
2822  curState->continue_op = REOP_REPEAT;
2823  curState->continue_pc = pc;
2824  curState->parenSoFar = parenSoFar;
2825  PUSH_STATE_STACK(gData);
2826  if (curState->u.quantifier.min == 0 &&
2827  !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
2828  0, 0)) {
2829  goto bad;
2830  }
2831  pc = nextpc;
2832  continue;
2833 
2834  case REOP_ENDCHILD: /* marks the end of a quantifier child */
2835  pc = curState[-1].continue_pc;
2836  op = (REOp) curState[-1].continue_op;
2837 
2838  if (!result)
2839  result = x;
2840  continue;
2841 
2842  case REOP_REPEAT:
2843  --curState;
2844  do {
2845  --gData->stateStackTop;
2846  if (!result) {
2847  /* Failed, see if we have enough children. */
2848  if (curState->u.quantifier.min == 0)
2849  goto repeatDone;
2850  goto break_switch;
2851  }
2852  if (curState->u.quantifier.min == 0 &&
2853  x->cp == gData->cpbegin + curState->index) {
2854  /* matched an empty string, that'll get us nowhere */
2855  result = NULL;
2856  goto break_switch;
2857  }
2858  if (curState->u.quantifier.min != 0)
2859  curState->u.quantifier.min--;
2860  if (curState->u.quantifier.max != (UINT) -1)
2861  curState->u.quantifier.max--;
2862  if (curState->u.quantifier.max == 0)
2863  goto repeatDone;
2864  nextpc = pc + ARG_LEN;
2865  nextop = (REOp) *nextpc;
2866  startcp = x->cp;
2867  if (REOP_IS_SIMPLE(nextop)) {
2868  nextpc++;
2869  if (!SimpleMatch(gData, x, nextop, &nextpc, TRUE)) {
2870  if (curState->u.quantifier.min == 0)
2871  goto repeatDone;
2872  result = NULL;
2873  goto break_switch;
2874  }
2875  result = x;
2876  }
2877  curState->index = startcp - gData->cpbegin;
2878  PUSH_STATE_STACK(gData);
2879  if (curState->u.quantifier.min == 0 &&
2881  pc, x, startcp,
2882  curState->parenSoFar,
2883  parenSoFar -
2884  curState->parenSoFar)) {
2885  goto bad;
2886  }
2887  } while (*nextpc == REOP_ENDCHILD);
2888  pc = nextpc;
2889  op = (REOp) *pc++;
2890  parenSoFar = curState->parenSoFar;
2891  continue;
2892 
2893  repeatDone:
2894  result = x;
2895  pc += GET_OFFSET(pc);
2896  goto break_switch;
2897 
2898  case REOP_MINIMALSTAR:
2899  curState->u.quantifier.min = 0;
2900  curState->u.quantifier.max = (UINT)-1;
2901  goto minimalquantcommon;
2902  case REOP_MINIMALPLUS:
2903  curState->u.quantifier.min = 1;
2904  curState->u.quantifier.max = (UINT)-1;
2905  goto minimalquantcommon;
2906  case REOP_MINIMALOPT:
2907  curState->u.quantifier.min = 0;
2908  curState->u.quantifier.max = 1;
2909  goto minimalquantcommon;
2910  case REOP_MINIMALQUANT:
2911  pc = ReadCompactIndex(pc, &k);
2912  curState->u.quantifier.min = k;
2913  pc = ReadCompactIndex(pc, &k);
2914  /* See REOP_QUANT comments about k - 1. */
2915  curState->u.quantifier.max = k - 1;
2916  assert(curState->u.quantifier.min
2917  <= curState->u.quantifier.max);
2918  minimalquantcommon:
2919  curState->index = x->cp - gData->cpbegin;
2920  curState->parenSoFar = parenSoFar;
2921  PUSH_STATE_STACK(gData);
2922  if (curState->u.quantifier.min != 0) {
2923  curState->continue_op = REOP_MINIMALREPEAT;
2924  curState->continue_pc = pc;
2925  /* step over <next> */
2926  pc += OFFSET_LEN;
2927  op = (REOp) *pc++;
2928  } else {
2930  pc, x, x->cp, 0, 0)) {
2931  goto bad;
2932  }
2933  --gData->stateStackTop;
2934  pc = pc + GET_OFFSET(pc);
2935  op = (REOp) *pc++;
2936  }
2937  continue;
2938 
2939  case REOP_MINIMALREPEAT:
2940  --gData->stateStackTop;
2941  --curState;
2942 
2943  TRACE("{%d,%d}\n", curState->u.quantifier.min, curState->u.quantifier.max);
2944 #define PREPARE_REPEAT() \
2945  do { \
2946  curState->index = x->cp - gData->cpbegin; \
2947  curState->continue_op = REOP_MINIMALREPEAT; \
2948  curState->continue_pc = pc; \
2949  pc += ARG_LEN; \
2950  for (k = curState->parenSoFar; k < parenSoFar; k++) \
2951  x->parens[k].index = -1; \
2952  PUSH_STATE_STACK(gData); \
2953  op = (REOp) *pc++; \
2954  assert(op < REOP_LIMIT); \
2955  }while(0)
2956 
2957  if (!result) {
2958  TRACE(" -\n");
2959  /*
2960  * Non-greedy failure - try to consume another child.
2961  */
2962  if (curState->u.quantifier.max == (UINT) -1 ||
2963  curState->u.quantifier.max > 0) {
2964  PREPARE_REPEAT();
2965  continue;
2966  }
2967  /* Don't need to adjust pc since we're going to pop. */
2968  break;
2969  }
2970  if (curState->u.quantifier.min == 0 &&
2971  x->cp == gData->cpbegin + curState->index) {
2972  /* Matched an empty string, that'll get us nowhere. */
2973  result = NULL;
2974  break;
2975  }
2976  if (curState->u.quantifier.min != 0)
2977  curState->u.quantifier.min--;
2978  if (curState->u.quantifier.max != (UINT) -1)
2979  curState->u.quantifier.max--;
2980  if (curState->u.quantifier.min != 0) {
2981  PREPARE_REPEAT();
2982  continue;
2983  }
2984  curState->index = x->cp - gData->cpbegin;
2985  curState->parenSoFar = parenSoFar;
2986  PUSH_STATE_STACK(gData);
2988  pc, x, x->cp,
2989  curState->parenSoFar,
2990  parenSoFar - curState->parenSoFar)) {
2991  goto bad;
2992  }
2993  --gData->stateStackTop;
2994  pc = pc + GET_OFFSET(pc);
2995  op = (REOp) *pc++;
2996  assert(op < REOP_LIMIT);
2997  continue;
2998  default:
2999  assert(FALSE);
3000  result = NULL;
3001  }
3002  break_switch:;
3003  }
3004 
3005  /*
3006  * If the match failed and there's a backtrack option, take it.
3007  * Otherwise this is a complete and utter failure.
3008  */
3009  if (!result) {
3010  if (gData->cursz == 0)
3011  return NULL;
3012 
3013  /* Potentially detect explosive regex here. */
3014  gData->backTrackCount++;
3015  if (gData->backTrackLimit &&
3016  gData->backTrackCount >= gData->backTrackLimit) {
3017  JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL,
3018  JSMSG_REGEXP_TOO_COMPLEX);
3019  gData->ok = FALSE;
3020  return NULL;
3021  }
3022 
3023  backTrackData = gData->backTrackSP;
3024  gData->cursz = backTrackData->sz;
3025  gData->backTrackSP =
3026  (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
3027  x->cp = backTrackData->cp;
3028  pc = backTrackData->backtrack_pc;
3029  op = (REOp) backTrackData->backtrack_op;
3030  assert(op < REOP_LIMIT);
3031  gData->stateStackTop = backTrackData->saveStateStackTop;
3032  assert(gData->stateStackTop);
3033 
3034  memcpy(gData->stateStack, backTrackData + 1,
3035  sizeof(REProgState) * backTrackData->saveStateStackTop);
3036  curState = &gData->stateStack[gData->stateStackTop - 1];
3037 
3038  if (backTrackData->parenCount) {
3039  memcpy(&x->parens[backTrackData->parenIndex],
3040  (char *)(backTrackData + 1) +
3041  sizeof(REProgState) * backTrackData->saveStateStackTop,
3042  sizeof(RECapture) * backTrackData->parenCount);
3043  parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
3044  } else {
3045  for (k = curState->parenSoFar; k < parenSoFar; k++)
3046  x->parens[k].index = -1;
3047  parenSoFar = curState->parenSoFar;
3048  }
3049 
3050  TRACE("\tBT_Pop: %ld,%ld\n",
3051  (ULONG_PTR)backTrackData->parenIndex,
3052  (ULONG_PTR)backTrackData->parenCount);
3053  continue;
3054  }
3055  x = result;
3056 
3057  /*
3058  * Continue with the expression.
3059  */
3060  op = (REOp)*pc++;
3061  assert(op < REOP_LIMIT);
3062  }
3063 
3064 bad:
3065  TRACE("\n");
3066  return NULL;
3067 
3068 good:
3069  TRACE("\n");
3070  return x;
3071 }
3072 
3074 {
3076  const WCHAR *cp = x->cp;
3077  const WCHAR *cp2;
3078  UINT j;
3079 
3080  /*
3081  * Have to include the position beyond the last character
3082  * in order to detect end-of-input/line condition.
3083  */
3084  for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
3085  gData->skipped = cp2 - cp;
3086  x->cp = cp2;
3087  for (j = 0; j < gData->regexp->parenCount; j++)
3088  x->parens[j].index = -1;
3089  result = ExecuteREBytecode(gData, x);
3090  if (!gData->ok || result || (gData->regexp->flags & REG_STICKY))
3091  return result;
3092  gData->backTrackSP = gData->backTrackStack;
3093  gData->cursz = 0;
3094  gData->stateStackTop = 0;
3095  cp2 = cp + gData->skipped;
3096  }
3097  return NULL;
3098 }
3099 
3101 {
3102  UINT i;
3103 
3106  if (!gData->backTrackStack)
3107  goto bad;
3108 
3109  gData->backTrackSP = gData->backTrackStack;
3110  gData->cursz = 0;
3111  gData->backTrackCount = 0;
3112  gData->backTrackLimit = 0;
3113 
3115  gData->stateStack = heap_pool_alloc(gData->pool, sizeof(REProgState) * INITIAL_STATESTACK);
3116  if (!gData->stateStack)
3117  goto bad;
3118 
3119  gData->stateStackTop = 0;
3120  gData->cx = cx;
3121  gData->pool = pool;
3122  gData->regexp = re;
3123  gData->ok = TRUE;
3124 
3125  for (i = 0; i < re->classCount; i++) {
3126  if (!re->classList[i].converted &&
3127  !ProcessCharSet(gData, &re->classList[i])) {
3128  return E_FAIL;
3129  }
3130  }
3131 
3132  return S_OK;
3133 
3134 bad:
3136  gData->ok = FALSE;
3137  return E_OUTOFMEMORY;
3138 }
3139 
3142 {
3143  match_state_t *res;
3144  REGlobalData gData;
3145  heap_pool_t *mark = heap_pool_mark(pool);
3146  const WCHAR *str_beg = result->cp;
3147  HRESULT hres;
3148 
3149  assert(result->cp != NULL);
3150 
3151  gData.cpbegin = str;
3152  gData.cpend = str+str_len;
3153  gData.start = result->cp-str;
3154  gData.skipped = 0;
3155  gData.pool = pool;
3156 
3157  hres = InitMatch(regexp, cx, pool, &gData);
3158  if(FAILED(hres)) {
3159  WARN("InitMatch failed\n");
3160  heap_pool_clear(mark);
3161  return hres;
3162  }
3163 
3164  res = MatchRegExp(&gData, result);
3165  heap_pool_clear(mark);
3166  if(!gData.ok) {
3167  WARN("MatchRegExp failed\n");
3168  return E_FAIL;
3169  }
3170 
3171  if(!res) {
3172  result->match_len = 0;
3173  return S_FALSE;
3174  }
3175 
3176  result->match_len = (result->cp-str_beg) - gData.skipped;
3177  result->paren_count = regexp->parenCount;
3178  return S_OK;
3179 }
3180 
3182 {
3183  if (re->classList) {
3184  UINT i;
3185  for (i = 0; i < re->classCount; i++) {
3186  if (re->classList[i].converted)
3187  heap_free(re->classList[i].u.bits);
3188  re->classList[i].u.bits = NULL;
3189  }
3190  heap_free(re->classList);
3191  }
3192  heap_free(re);
3193 }
3194 
3196  DWORD str_len, WORD flags, BOOL flat)
3197 {
3198  regexp_t *re;
3199  heap_pool_t *mark;
3201  size_t resize;
3202  jsbytecode *endPC;
3203  UINT i;
3204  size_t len;
3205 
3206  re = NULL;
3207  mark = heap_pool_mark(pool);
3208  len = str_len;
3209 
3210  state.context = cx;
3211  state.pool = pool;
3212  state.cp = str;
3213  if (!state.cp)
3214  goto out;
3215  state.cpbegin = state.cp;
3216  state.cpend = state.cp + len;
3217  state.flags = flags;
3218  state.parenCount = 0;
3219  state.classCount = 0;
3220  state.progLength = 0;
3221  state.treeDepth = 0;
3222  state.classBitmapsMem = 0;
3223  for (i = 0; i < CLASS_CACHE_SIZE; i++)
3224  state.classCache[i].start = NULL;
3225 
3226  if (len != 0 && flat) {
3227  state.result = NewRENode(&state, REOP_FLAT);
3228  if (!state.result)
3229  goto out;
3230  state.result->u.flat.chr = *state.cpbegin;
3231  state.result->u.flat.length = len;
3232  state.result->kid = (void *) state.cpbegin;
3233  /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
3234  state.progLength += 1 + GetCompactIndexWidth(0)
3236  } else {
3237  if (!ParseRegExp(&state))
3238  goto out;
3239  }
3240  resize = offsetof(regexp_t, program) + state.progLength + 1;
3241  re = heap_alloc(resize);
3242  if (!re)
3243  goto out;
3244 
3245  assert(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
3246  re->classCount = state.classCount;
3247  if (re->classCount) {
3248  re->classList = heap_alloc(re->classCount * sizeof(RECharSet));
3249  if (!re->classList) {
3250  regexp_destroy(re);
3251  re = NULL;
3252  goto out;
3253  }
3254  for (i = 0; i < re->classCount; i++)
3255  re->classList[i].converted = FALSE;
3256  } else {
3257  re->classList = NULL;
3258  }
3259  endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
3260  if (!endPC) {
3261  regexp_destroy(re);
3262  re = NULL;
3263  goto out;
3264  }
3265  *endPC++ = REOP_END;
3266  /*
3267  * Check whether size was overestimated and shrink using realloc.
3268  * This is safe since no pointers to newly parsed regexp or its parts
3269  * besides re exist here.
3270  */
3271  if ((size_t)(endPC - re->program) != state.progLength + 1) {
3272  regexp_t *tmp;
3273  assert((size_t)(endPC - re->program) < state.progLength + 1);
3274  resize = offsetof(regexp_t, program) + (endPC - re->program);
3275  tmp = heap_realloc(re, resize);
3276  if (tmp)
3277  re = tmp;
3278  }
3279 
3280  re->flags = flags;
3281  re->parenCount = state.parenCount;
3282  re->source = str;
3283  re->source_len = str_len;
3284 
3285 out:
3286  heap_pool_clear(mark);
3287  return re;
3288 }
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble * u
Definition: glfuncs.h:240
void regexp_destroy(regexp_t *re)
Definition: regexp.c:3181
jsbytecode * altHead
Definition: regexp.c:317
struct RENode::@429::@433 altprereq
struct RECapture RECapture
static UINT GetDecimalValue(WCHAR c, UINT max, UINT(*findMax)(CompilerState *state), CompilerState *state)
Definition: regexp.c:945
size_t length
Definition: regexp.c:282
UINT max
Definition: regexp.c:210
#define GET_OFFSET(pc)
Definition: regexp.c:339
BYTE jsbytecode
Definition: regexp.h:54
union RECharSet::@424 u
size_t progLength
Definition: regexp.c:302
size_t classBitmapsMem
Definition: regexp.c:304
GLint GLint GLsizei width
Definition: gl.h:1546
UINT min
Definition: regexp.c:269
static BOOL ParseTerm(CompilerState *state)
Definition: regexp.c:1341
void * kid2
Definition: regexp.c:265
#define max(a, b)
Definition: svc.c:63
BOOL ok
Definition: regexp.c:237
size_t stateStackTop
Definition: regexp.c:244
jsbytecode * nextAltFixup
Definition: regexp.c:318
#define TRUE
Definition: types.h:120
#define shift
Definition: input.c:1668
#define OFFSET_LEN
Definition: regexp.c:337
WINE_DEFAULT_DEBUG_CHANNEL(jscript)
const WCHAR * cpbegin
Definition: regexp.c:240
RENode * continueNode
Definition: regexp.c:321
static jsbytecode * ReadCompactIndex(jsbytecode *pc, size_t *result)
Definition: regexp.c:385
size_t backTrackCount
Definition: regexp.c:251
static UINT FindParenCount(CompilerState *state)
Definition: regexp.c:906
static jsbytecode * EmitREBytecode(CompilerState *state, regexp_t *re, size_t treeDepth, jsbytecode *pc, RENode *t)
Definition: regexp.c:475
const WCHAR * cpend
Definition: regexp.c:297
static match_state_t * ExecuteREBytecode(REGlobalData *gData, match_state_t *x)
Definition: regexp.c:2536
static const char * reop_names[]
Definition: regexp.c:149
#define TREE_DEPTH_MAX
Definition: regexp.c:347
size_t top
Definition: regexp.c:213
RENode * next
Definition: regexp.c:262
void * heap_pool_alloc(heap_pool_t *, DWORD) __WINE_ALLOC_SIZE(2) DECLSPEC_HIDDEN
Definition: jsutils.c:75
JSPackedBool sense
Definition: regexp.c:278
static REBackTrackData * PushBackTrackState(REGlobalData *gData, REOp op, jsbytecode *target, match_state_t *x, const WCHAR *cp, size_t parenIndex, size_t parenCount)
Definition: regexp.c:1922
WINE_UNICODE_INLINE int isspaceW(WCHAR wc)
Definition: unicode.h:165
union RENode::@429 u
#define JSREG_FIND_PAREN_ERROR
Definition: regexp.c:896
#define WARN(fmt,...)
Definition: debug.h:111
#define JS7_ISDEC(c)
Definition: regexp.c:91
struct CompilerState::@434 classCache[CLASS_CACHE_SIZE]
GLintptr offset
Definition: glext.h:5920
#define str_len
Definition: treelist.c:89
const WCHAR * cpbegin
Definition: regexp.c:296
GLdouble n
Definition: glext.h:7729
GLdouble GLdouble t
Definition: gl.h:2047
#define assert(x)
Definition: debug.h:53
#define REOP_IS_SIMPLE(op)
Definition: regexp.c:147
GLint GLint GLint GLint GLint x
Definition: gl.h:1548
WORD flags
Definition: regexp.h:57
#define JS7_UNDEC(c)
Definition: regexp.c:92
GLuint GLuint end
Definition: gl.h:1545
jsbytecode continue_op
Definition: regexp.c:204
DWORD source_len
Definition: regexp.h:62
#define JUMP_OFFSET_LO(off)
Definition: regexp.c:453
size_t parenIndex
Definition: regexp.c:449
REProgState * stateStack
Definition: regexp.c:243
#define JSREG_FIND_PAREN_COUNT
Definition: regexp.c:895
#define INITIAL_STATESTACK
Definition: regexp.c:231
struct REProgState::@426::@427 quantifier
#define E_FAIL
Definition: ddrawi.h:102
size_t backTrackLimit
Definition: regexp.c:252
int32_t INT
Definition: typedefs.h:56
size_t start
Definition: regexp.c:238
REOp
Definition: regexp.c:94
union REProgState::@426 u
static void * heap_realloc(void *mem, size_t len)
Definition: appwiz.h:70
jsbytecode * nextTermFixup
Definition: regexp.c:319
static void * heap_alloc(size_t len)
Definition: appwiz.h:65
struct EmitStateStackEntry EmitStateStackEntry
BYTE * bits
Definition: regexp.c:68
#define CLASS_BITMAPS_MEM_LIMIT
Definition: regexp.c:353
GLint limit
Definition: glext.h:10326
WORD length
Definition: regexp.c:66
heap_pool_t * pool
Definition: regexp.c:312
#define REG_STICKY
Definition: regexp.h:39
REOp op
Definition: regexp.c:261
uint32_t ULONG_PTR
Definition: typedefs.h:63
#define RE_IS_LETTER(c)
Definition: regexp.c:84
static BOOL ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
Definition: regexp.c:2087
struct REGlobalData REGlobalData
#define GET_ARG(pc)
Definition: regexp.c:333
static void AddCharacterToCharSet(RECharSet *cs, WCHAR c)
Definition: regexp.c:2053
uint32_t cs
Definition: isohybrid.c:75
RENode * kid2
Definition: regexp.c:285
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
size_t parenCount
Definition: regexp.h:58
#define JSMSG_MAX_TOO_BIG
Definition: regexp.c:77
#define REG_MULTILINE
Definition: regexp.h:38
#define REG_FOLD
Definition: regexp.h:36
#define E_OUTOFMEMORY
Definition: ddrawi.h:100
REOp op
Definition: regexp.c:447
size_t parenIndex
Definition: regexp.c:224
JSPackedBool greedy
Definition: regexp.c:271
GLenum cap
Definition: glext.h:9639
unsigned int BOOL
Definition: ntddk_ex.h:94
static BOOL SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
Definition: regexp.c:456
size_t startIndex
Definition: regexp.c:70
#define CLASS_CACHE_SIZE
Definition: regexp.c:292
size_t treeDepth
Definition: regexp.c:301
size_t kidlen
Definition: regexp.c:275
struct REBackTrackData REBackTrackData
WCHAR chr
Definition: regexp.c:281
#define S_FALSE
Definition: winerror.h:2357
const WCHAR * cpend
Definition: regexp.c:241
size_t classCount
Definition: regexp.c:300
struct RECharSet RECharSet
jsbytecode * continue_pc
Definition: regexp.c:203
const WCHAR * str
WORD bmsize
Definition: regexp.c:277
size_t length
Definition: regexp.c:71
static void AddCharacterRangeToCharSet(RECharSet *cs, UINT c1, UINT c2)
Definition: regexp.c:2063
smooth NULL
Definition: ftsmooth.c:416
#define offsetof(TYPE, MEMBER)
#define JS_COUNT_OPERATION(a, b)
Definition: regexp.c:50
jsbytecode backtrack_op
Definition: regexp.c:222
#define js_ReportOutOfScriptQuota(a)
Definition: regexp.c:48
regexp_t * regexp_new(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_len, WORD flags, BOOL flat)
Definition: regexp.c:3195
jsbytecode continueOp
Definition: regexp.c:322
GLuint index
Definition: glext.h:6031
struct RENode::@429::@432 flat
#define RE_IS_LINE_TERM(c)
Definition: regexp.c:86
static match_state_t * MatchRegExp(REGlobalData *gData, match_state_t *x)
Definition: regexp.c:3073
#define ReportRegExpError(a, b, c)
Definition: regexp.c:44
size_t index
Definition: regexp.c:276
static match_state_t * SimpleMatch(REGlobalData *gData, match_state_t *x, REOp op, jsbytecode **startpc, BOOL updatecp)
Definition: regexp.c:2328
ptrdiff_t index
Definition: regexp.c:205
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint GLint GLint j
Definition: glfuncs.h:250
REBackTrackData * backTrackStack
Definition: regexp.c:247
WINE_UNICODE_INLINE WCHAR toupperW(WCHAR ch)
Definition: unicode.h:141
#define TRACE(s)
Definition: solgame.cpp:4
struct CompilerState CompilerState
static match_state_t * FlatNIMatcher(REGlobalData *gData, match_state_t *x, const WCHAR *matchChars, size_t length)
Definition: regexp.c:1983
HRESULT hres
Definition: protocol.c:465
JSPackedBool converted
Definition: regexp.c:64
if(!(yy_init))
Definition: macro.lex.yy.c:714
__wchar_t WCHAR
Definition: xmlstorage.h:180
LONG HRESULT
Definition: typedefs.h:77
GLenum GLuint GLenum GLsizei length
Definition: glext.h:5579
#define JS_ISWORD(c)
Definition: regexp.c:89
size_t backTrackStackSize
Definition: regexp.c:249
const GLubyte * c
Definition: glext.h:8905
static match_state_t * BackrefMatcher(REGlobalData *gData, match_state_t *x, size_t parenIndex)
Definition: regexp.c:2022
unsigned short WORD
Definition: ntddk_ex.h:93
static FILE * out
Definition: regtests2xml.c:44
WINE_UNICODE_INLINE WCHAR tolowerW(WCHAR ch)
Definition: unicode.h:135
unsigned long DWORD
Definition: ntddk_ex.h:95
GLuint GLuint num
Definition: glext.h:9618
ptrdiff_t skipped
Definition: regexp.c:239
const WCHAR * cp
Definition: regexp.c:298
size_t startIndex
Definition: regexp.c:274
static BOOL ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack, INT operandSP)
Definition: regexp.c:805
size_t parenCount
Definition: regexp.c:225
size_t cursz
Definition: regexp.c:250
static jsbytecode * WriteCompactIndex(jsbytecode *pc, size_t index)
Definition: regexp.c:372
#define JSMSG_MIN_TOO_BIG
Definition: regexp.c:76
REBackTrackData * backTrackSP
Definition: regexp.c:248
jsbytecode * endTermFixup
Definition: regexp.c:320
GLbitfield flags
Definition: glext.h:7161
size_t length
Definition: regexp.c:307
#define PUSH_STATE_STACK(data)
Definition: regexp.c:2312
#define JSMSG_OUT_OF_MEMORY
Definition: regexp.c:79
#define index(s, c)
Definition: various.h:29
size_t saveStateStackTop
Definition: regexp.c:226
jsbytecode program[1]
Definition: regexp.h:63
static int state
Definition: maze.c:121
HRESULT regexp_execute(regexp_t *regexp, void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_len, match_state_t *result)
Definition: regexp.c:3140
size_t parenIndex
Definition: regexp.c:267
const WCHAR * source
Definition: regexp.h:61
struct RECharSet::@424::@425 src
#define memcpy(s1, s2, n)
Definition: mkisofs.h:878
#define JS_ReportOutOfMemory(a)
Definition: regexp.c:49
GLenum GLsizei len
Definition: glext.h:6722
unsigned char BYTE
Definition: mem.h:68
static BOOL isASCIIHexDigit(WCHAR c, UINT *digit)
Definition: regexp.c:428
size_t sz
Definition: regexp.c:220
GLenum src
Definition: glext.h:6340
#define err(...)
#define SET_ARG(pc, arg)
Definition: regexp.c:334
size_t index
Definition: regexp.c:308
GLsizei const GLfloat * value
Definition: glext.h:6069
#define debugstr_wn
Definition: kernel32.h:33
#define cap
Definition: glfuncs.h:226
WCHAR ch2
Definition: regexp.c:287
static RENode * NewRENode(CompilerState *state, REOp op)
Definition: regexp.c:409
#define OFFSET_MAX
Definition: regexp.c:338
WCHAR ch1
Definition: regexp.c:286
#define PREPARE_REPEAT()
#define S_OK
Definition: intsafe.h:59
size_t parenSoFar
Definition: regexp.c:206
const WCHAR * cp
Definition: regexp.c:223
static unsigned __int64 next
Definition: rand_nt.c:6
#define INITIAL_BACKTRACK
Definition: regexp.c:232
JSPackedBool sense
Definition: regexp.c:65
GLuint program
Definition: glext.h:6723
static calc_node_t temp
Definition: rpn_ieee.c:38
GLsizei GLsizei GLchar * source
Definition: glext.h:6048
#define JSMSG_OUT_OF_ORDER
Definition: regexp.c:78
UINT min
Definition: regexp.c:209
#define min(a, b)
Definition: monoChain.cc:55
unsigned int UINT
Definition: ndis.h:50
__kernel_ptrdiff_t ptrdiff_t
Definition: linux.h:247
size_t sz
Definition: regexp.c:214
#define JS_ReportErrorNumber(a, b, c, d)
Definition: regexp.c:46
void * heap_pool_grow(heap_pool_t *, void *, DWORD, DWORD) DECLSPEC_HIDDEN
Definition: jsutils.c:128
void * cx
Definition: regexp.c:235
BYTE JSPackedBool
Definition: regexp.c:53
RENode * result
Definition: regexp.c:303
size_t stateStackLimit
Definition: regexp.c:245
_Out_opt_ int * cx
Definition: commctrl.h:570
static BOOL ParseQuantifier(CompilerState *state)
Definition: regexp.c:1217
static BOOL ReallocStateStack(REGlobalData *gData)
Definition: regexp.c:2297
POINT cp
Definition: magnifier.c:60
#define INITIAL_STACK_SIZE
Definition: regexp.c:1648
struct RENode::@429::@430 range
GLuint res
Definition: glext.h:9613
#define c
Definition: ke_i.h:80
GLenum target
Definition: glext.h:7315
WORD flags
Definition: regexp.c:310
#define ReportRegExpErrorHelper(a, b, c, d)
Definition: regexp.c:45
jsbytecode * backtrack_pc
Definition: regexp.c:221
static size_t GetCompactIndexWidth(size_t index)
Definition: regexp.c:363
char * cleanup(char *str)
Definition: wpickclick.c:99
static BOOL ParseRegExp(CompilerState *)
Definition: regexp.c:1651
const WCHAR * errPos
Definition: regexp.c:448
UINT op
Definition: effect.c:223
void * context
Definition: regexp.c:295
GLenum GLenum GLvoid GLvoid GLvoid * span
Definition: glext.h:5664
static INT ParseMinMaxQuantifier(CompilerState *state, BOOL ignoreValues)
Definition: regexp.c:1163
#define OVERFLOW_VALUE
Definition: regexp.c:903
static HRESULT InitMatch(regexp_t *re, void *cx, heap_pool_t *pool, REGlobalData *gData)
Definition: regexp.c:3100
size_t parenCount
Definition: regexp.c:299
static BOOL CalculateBitmapSize(CompilerState *state, RENode *target, const WCHAR *src, const WCHAR *end)
Definition: regexp.c:969
Definition: regexp.c:260
heap_pool_t * heap_pool_mark(heap_pool_t *) DECLSPEC_HIDDEN
Definition: jsutils.c:180
GLuint64EXT * result
Definition: glext.h:11304
#define memset(x, y, z)
Definition: compat.h:39
struct REProgState REProgState
int k
Definition: mpi.c:3369
struct RENode::@429::@431 ucclass
size_t classCount
Definition: regexp.h:59
struct REProgState::@426::@428 assertion
struct CFHEADER header
Definition: fdi.c:109
#define ARG_LEN
Definition: regexp.c:332
UINT max
Definition: regexp.c:270
JSPackedBool jumpToJumpFlag
Definition: regexp.c:323
heap_pool_t * pool
Definition: regexp.c:254
#define JUMP_OFFSET_HI(off)
Definition: regexp.c:452
unsigned char uch
Definition: zutil.h:34
void * kid
Definition: regexp.c:263
INT num
Definition: regexp.c:266
struct RECharSet * classList
Definition: regexp.h:60
static BOOL heap_free(void *mem)
Definition: appwiz.h:75
regexp_t * regexp
Definition: regexp.c:236
void heap_pool_clear(heap_pool_t *) DECLSPEC_HIDDEN
Definition: jsutils.c:144
const WCHAR * start
Definition: regexp.c:306