ReactOS 0.4.17-dev-357-ga8f14ff
regexp.c File Reference
#include <assert.h>
#include "jscript.h"
#include "regexp.h"
#include "wine/debug.h"
Include dependency graph for regexp.c:

Go to the source code of this file.

Classes

struct  RECharSet
 
struct  REProgState
 
struct  REBackTrackData
 
struct  REGlobalData
 
struct  RENode
 
struct  CompilerState
 
struct  EmitStateStackEntry
 
struct  REOpData
 

Macros

#define ReportRegExpError(a, b, c)   throw_error((a)->context, E_FAIL, L"")
 
#define ReportRegExpErrorHelper(a, b, c, d)   throw_error((a)->context, E_FAIL, L"")
 
#define JS_ReportErrorNumber(a, b, c, d)   throw_error((a), E_FAIL, L"")
 
#define JS_ReportErrorFlagsAndNumber(a, b, c, d, e, f)   throw_error((a), E_FAIL, L"")
 
#define JS_COUNT_OPERATION(a, b)   do { } while(0)
 
#define JSMSG_MIN_TOO_BIG   47
 
#define JSMSG_MAX_TOO_BIG   48
 
#define JSMSG_OUT_OF_ORDER   49
 
#define JSMSG_OUT_OF_MEMORY   137
 
#define LINE_SEPARATOR   0x2028
 
#define PARA_SEPARATOR   0x2029
 
#define RE_IS_LETTER(c)
 
#define RE_IS_LINE_TERM(c)
 
#define JS_ISWORD(c)   ((c) < 128 && (isalnum(c) || (c) == '_'))
 
#define JS7_ISDEC(c)   ((((unsigned)(c)) - '0') <= 9)
 
#define JS7_UNDEC(c)   ((c) - '0')
 
#define REOP_IS_SIMPLE(op)   ((op) <= REOP_NCLASS)
 
#define INITIAL_STATESTACK   100
 
#define INITIAL_BACKTRACK   8000
 
#define CLASS_CACHE_SIZE   4
 
#define ARG_LEN   2
 
#define GET_ARG(pc)   ((WORD)(((pc)[0] << 8) | (pc)[1]))
 
#define SET_ARG(pc, arg)
 
#define OFFSET_LEN   ARG_LEN
 
#define OFFSET_MAX   ((1 << (ARG_LEN * 8)) - 1)
 
#define GET_OFFSET(pc)   GET_ARG(pc)
 
#define TREE_DEPTH_MAX   ((1 << 24) / sizeof(EmitStateStackEntry))
 
#define CLASS_BITMAPS_MEM_LIMIT   (1 << 24)
 
#define JUMP_OFFSET_HI(off)   ((jsbytecode)((off) >> 8))
 
#define JUMP_OFFSET_LO(off)   ((jsbytecode)(off))
 
#define JSREG_FIND_PAREN_COUNT   0x8000
 
#define JSREG_FIND_PAREN_ERROR   0x4000
 
#define OVERFLOW_VALUE   ((UINT)-1)
 
#define INITIAL_STACK_SIZE   128
 
#define PUSH_STATE_STACK(data)
 
#define PREPARE_REPEAT()
 

Typedefs

typedef BYTE JSPackedBool
 
typedef struct RECharSet RECharSet
 
typedef enum REOp REOp
 
typedef struct REProgState REProgState
 
typedef struct REBackTrackData REBackTrackData
 
typedef struct REGlobalData REGlobalData
 
typedef struct RENode RENode
 
typedef struct CompilerState CompilerState
 
typedef struct EmitStateStackEntry EmitStateStackEntry
 

Enumerations

enum  REOp {
  REOP_EMPTY , REOP_BOL , REOP_EOL , REOP_WBDRY ,
  REOP_WNONBDRY , REOP_DOT , REOP_DIGIT , REOP_NONDIGIT ,
  REOP_ALNUM , REOP_NONALNUM , REOP_SPACE , REOP_NONSPACE ,
  REOP_BACKREF , REOP_FLAT , REOP_FLAT1 , REOP_FLATi ,
  REOP_FLAT1i , REOP_UCFLAT1 , REOP_UCFLAT1i , REOP_UCFLAT ,
  REOP_UCFLATi , REOP_CLASS , REOP_NCLASS , REOP_ALT ,
  REOP_QUANT , REOP_STAR , REOP_PLUS , REOP_OPT ,
  REOP_LPAREN , REOP_RPAREN , REOP_JUMP , REOP_DOTSTAR ,
  REOP_LPARENNON , REOP_ASSERT , REOP_ASSERT_NOT , REOP_ASSERTTEST ,
  REOP_ASSERTNOTTEST , REOP_MINIMALSTAR , REOP_MINIMALPLUS , REOP_MINIMALOPT ,
  REOP_MINIMALQUANT , REOP_ENDCHILD , REOP_REPEAT , REOP_MINIMALREPEAT ,
  REOP_ALTPREREQ , REOP_ALTPREREQ2 , REOP_ENDALT , REOP_CONCAT ,
  REOP_END , REOP_LIMIT , REOP_EMPTY , REOP_BOL ,
  REOP_EOL , REOP_WBDRY , REOP_WNONBDRY , REOP_DOT ,
  REOP_DIGIT , REOP_NONDIGIT , REOP_ALNUM , REOP_NONALNUM ,
  REOP_SPACE , REOP_NONSPACE , REOP_BACKREF , REOP_FLAT ,
  REOP_FLAT1 , REOP_FLATi , REOP_FLAT1i , REOP_UCFLAT1 ,
  REOP_UCFLAT1i , REOP_UCFLAT , REOP_UCFLATi , REOP_CLASS ,
  REOP_NCLASS , REOP_ALT , REOP_QUANT , REOP_STAR ,
  REOP_PLUS , REOP_OPT , REOP_LPAREN , REOP_RPAREN ,
  REOP_JUMP , REOP_DOTSTAR , REOP_LPARENNON , REOP_ASSERT ,
  REOP_ASSERT_NOT , REOP_ASSERTTEST , REOP_ASSERTNOTTEST , REOP_MINIMALSTAR ,
  REOP_MINIMALPLUS , REOP_MINIMALOPT , REOP_MINIMALQUANT , REOP_ENDCHILD ,
  REOP_REPEAT , REOP_MINIMALREPEAT , REOP_ALTPREREQ , REOP_ALTPREREQ2 ,
  REOP_ENDALT , REOP_CONCAT , REOP_END , REOP_LIMIT
}
 

Functions

 WINE_DEFAULT_DEBUG_CHANNEL (jscript)
 
static BOOL ParseRegExp (CompilerState *)
 
static size_t GetCompactIndexWidth (size_t index)
 
static jsbytecodeWriteCompactIndex (jsbytecode *pc, size_t index)
 
static jsbytecodeReadCompactIndex (jsbytecode *pc, size_t *result)
 
static RENodeNewRENode (CompilerState *state, REOp op)
 
static BOOL isASCIIHexDigit (WCHAR c, UINT *digit)
 
static BOOL SetForwardJumpOffset (jsbytecode *jump, jsbytecode *target)
 
static jsbytecodeEmitREBytecode (CompilerState *state, regexp_t *re, size_t treeDepth, jsbytecode *pc, RENode *t)
 
static BOOL ProcessOp (CompilerState *state, REOpData *opData, RENode **operandStack, INT operandSP)
 
static UINT FindParenCount (CompilerState *state)
 
static UINT GetDecimalValue (WCHAR c, UINT max, UINT(*findMax)(CompilerState *state), CompilerState *state)
 
static BOOL CalculateBitmapSize (CompilerState *state, RENode *target, const WCHAR *src, const WCHAR *end)
 
static INT ParseMinMaxQuantifier (CompilerState *state, BOOL ignoreValues)
 
static BOOL ParseQuantifier (CompilerState *state)
 
static BOOL ParseTerm (CompilerState *state)
 
static REBackTrackDataPushBackTrackState (REGlobalData *gData, REOp op, jsbytecode *target, match_state_t *x, const WCHAR *cp, size_t parenIndex, size_t parenCount)
 
static match_state_tFlatNIMatcher (REGlobalData *gData, match_state_t *x, const WCHAR *matchChars, size_t length)
 
static match_state_tBackrefMatcher (REGlobalData *gData, match_state_t *x, size_t parenIndex)
 
static void AddCharacterToCharSet (RECharSet *cs, WCHAR c)
 
static void AddCharacterRangeToCharSet (RECharSet *cs, UINT c1, UINT c2)
 
static BOOL ProcessCharSet (REGlobalData *gData, RECharSet *charSet)
 
static BOOL ReallocStateStack (REGlobalData *gData)
 
static match_state_tSimpleMatch (REGlobalData *gData, match_state_t *x, REOp op, jsbytecode **startpc, BOOL updatecp)
 
static match_state_tExecuteREBytecode (REGlobalData *gData, match_state_t *x)
 
static match_state_tMatchRegExp (REGlobalData *gData, match_state_t *x)
 
static HRESULT InitMatch (regexp_t *re, void *cx, heap_pool_t *pool, REGlobalData *gData)
 
HRESULT regexp_execute (regexp_t *regexp, void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_len, match_state_t *result)
 
void regexp_destroy (regexp_t *re)
 
regexp_tregexp_new (void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_len, WORD flags, BOOL flat)
 

Variables

static const charreop_names []
 

Macro Definition Documentation

◆ ARG_LEN

#define ARG_LEN   2

Definition at line 330 of file regexp.c.

◆ CLASS_BITMAPS_MEM_LIMIT

#define CLASS_BITMAPS_MEM_LIMIT   (1 << 24)

Definition at line 351 of file regexp.c.

◆ CLASS_CACHE_SIZE

#define CLASS_CACHE_SIZE   4

Definition at line 290 of file regexp.c.

◆ GET_ARG

#define GET_ARG (   pc)    ((WORD)(((pc)[0] << 8) | (pc)[1]))

Definition at line 331 of file regexp.c.

◆ GET_OFFSET

#define GET_OFFSET (   pc)    GET_ARG(pc)

Definition at line 337 of file regexp.c.

◆ INITIAL_BACKTRACK

#define INITIAL_BACKTRACK   8000

Definition at line 230 of file regexp.c.

◆ INITIAL_STACK_SIZE

#define INITIAL_STACK_SIZE   128

Definition at line 1645 of file regexp.c.

◆ INITIAL_STATESTACK

#define INITIAL_STATESTACK   100

Definition at line 229 of file regexp.c.

◆ JS7_ISDEC

#define JS7_ISDEC (   c)    ((((unsigned)(c)) - '0') <= 9)

Definition at line 89 of file regexp.c.

◆ JS7_UNDEC

#define JS7_UNDEC (   c)    ((c) - '0')

Definition at line 90 of file regexp.c.

◆ JS_COUNT_OPERATION

#define JS_COUNT_OPERATION (   a,
  b 
)    do { } while(0)

Definition at line 48 of file regexp.c.

◆ JS_ISWORD

#define JS_ISWORD (   c)    ((c) < 128 && (isalnum(c) || (c) == '_'))

Definition at line 87 of file regexp.c.

◆ JS_ReportErrorFlagsAndNumber

#define JS_ReportErrorFlagsAndNumber (   a,
  b,
  c,
  d,
  e,
  f 
)    throw_error((a), E_FAIL, L"")

Definition at line 47 of file regexp.c.

◆ JS_ReportErrorNumber

#define JS_ReportErrorNumber (   a,
  b,
  c,
  d 
)    throw_error((a), E_FAIL, L"")

Definition at line 46 of file regexp.c.

◆ JSMSG_MAX_TOO_BIG

#define JSMSG_MAX_TOO_BIG   48

Definition at line 75 of file regexp.c.

◆ JSMSG_MIN_TOO_BIG

#define JSMSG_MIN_TOO_BIG   47

Definition at line 74 of file regexp.c.

◆ JSMSG_OUT_OF_MEMORY

#define JSMSG_OUT_OF_MEMORY   137

Definition at line 77 of file regexp.c.

◆ JSMSG_OUT_OF_ORDER

#define JSMSG_OUT_OF_ORDER   49

Definition at line 76 of file regexp.c.

◆ JSREG_FIND_PAREN_COUNT

#define JSREG_FIND_PAREN_COUNT   0x8000

Definition at line 893 of file regexp.c.

◆ JSREG_FIND_PAREN_ERROR

#define JSREG_FIND_PAREN_ERROR   0x4000

Definition at line 894 of file regexp.c.

◆ JUMP_OFFSET_HI

#define JUMP_OFFSET_HI (   off)    ((jsbytecode)((off) >> 8))

Definition at line 450 of file regexp.c.

◆ JUMP_OFFSET_LO

#define JUMP_OFFSET_LO (   off)    ((jsbytecode)(off))

Definition at line 451 of file regexp.c.

◆ LINE_SEPARATOR

#define LINE_SEPARATOR   0x2028

Definition at line 79 of file regexp.c.

◆ OFFSET_LEN

#define OFFSET_LEN   ARG_LEN

Definition at line 335 of file regexp.c.

◆ OFFSET_MAX

#define OFFSET_MAX   ((1 << (ARG_LEN * 8)) - 1)

Definition at line 336 of file regexp.c.

◆ OVERFLOW_VALUE

#define OVERFLOW_VALUE   ((UINT)-1)

Definition at line 901 of file regexp.c.

◆ PARA_SEPARATOR

#define PARA_SEPARATOR   0x2029

Definition at line 80 of file regexp.c.

◆ PREPARE_REPEAT

#define PREPARE_REPEAT ( )
Value:
do { \
curState->index = x->cp - gData->cpbegin; \
curState->continue_op = REOP_MINIMALREPEAT; \
curState->continue_pc = pc; \
pc += ARG_LEN; \
for (k = curState->parenSoFar; k < parenSoFar; k++) \
x->parens[k].index = -1; \
PUSH_STATE_STACK(gData); \
op = (REOp) *pc++; \
assert(op < REOP_LIMIT); \
}while(0)
UINT op
Definition: effect.c:236
GLint GLint GLint GLint GLint x
Definition: gl.h:1548
REOp
Definition: regexp.c:92
@ REOP_MINIMALREPEAT
Definition: regexp.c:136
@ REOP_LIMIT
Definition: regexp.c:142
#define ARG_LEN
Definition: regexp.c:330
int k
Definition: mpi.c:3369

◆ PUSH_STATE_STACK

#define PUSH_STATE_STACK (   data)
Value:
do { \
++(data)->stateStackTop; \
if ((data)->stateStackTop == (data)->stateStackLimit && \
return NULL; \
} \
}while(0)
#define NULL
Definition: types.h:112
GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const GLvoid * data
Definition: gl.h:1950
static BOOL ReallocStateStack(REGlobalData *gData)
Definition: regexp.c:2294

Definition at line 2309 of file regexp.c.

◆ RE_IS_LETTER

#define RE_IS_LETTER (   c)
Value:
(((c >= 'A') && (c <= 'Z')) || \
((c >= 'a') && (c <= 'z')) )
const GLubyte * c
Definition: glext.h:8905

Definition at line 82 of file regexp.c.

◆ RE_IS_LINE_TERM

#define RE_IS_LINE_TERM (   c)
Value:
((c == '\n') || (c == '\r') || \
#define PARA_SEPARATOR
Definition: regexp.c:80
#define LINE_SEPARATOR
Definition: regexp.c:79

Definition at line 84 of file regexp.c.

◆ REOP_IS_SIMPLE

#define REOP_IS_SIMPLE (   op)    ((op) <= REOP_NCLASS)

Definition at line 145 of file regexp.c.

◆ ReportRegExpError

#define ReportRegExpError (   a,
  b,
  c 
)    throw_error((a)->context, E_FAIL, L"")

Definition at line 44 of file regexp.c.

◆ ReportRegExpErrorHelper

#define ReportRegExpErrorHelper (   a,
  b,
  c,
  d 
)    throw_error((a)->context, E_FAIL, L"")

Definition at line 45 of file regexp.c.

◆ SET_ARG

#define SET_ARG (   pc,
  arg 
)
Value:
((pc)[0] = (jsbytecode) ((arg) >> 8), \
(pc)[1] = (jsbytecode) (arg))
BYTE jsbytecode
Definition: regexp.h:54
void * arg
Definition: msvc.h:10

Definition at line 332 of file regexp.c.

◆ TREE_DEPTH_MAX

#define TREE_DEPTH_MAX   ((1 << 24) / sizeof(EmitStateStackEntry))

Definition at line 345 of file regexp.c.

Typedef Documentation

◆ CompilerState

◆ EmitStateStackEntry

◆ JSPackedBool

typedef BYTE JSPackedBool

Definition at line 51 of file regexp.c.

◆ REBackTrackData

◆ RECharSet

◆ REGlobalData

◆ RENode

typedef struct RENode RENode

Definition at line 257 of file regexp.c.

◆ REOp

typedef enum REOp REOp

◆ REProgState

Enumeration Type Documentation

◆ REOp

Enumerator
REOP_EMPTY 
REOP_BOL 
REOP_EOL 
REOP_WBDRY 
REOP_WNONBDRY 
REOP_DOT 
REOP_DIGIT 
REOP_NONDIGIT 
REOP_ALNUM 
REOP_NONALNUM 
REOP_SPACE 
REOP_NONSPACE 
REOP_BACKREF 
REOP_FLAT 
REOP_FLAT1 
REOP_FLATi 
REOP_FLAT1i 
REOP_UCFLAT1 
REOP_UCFLAT1i 
REOP_UCFLAT 
REOP_UCFLATi 
REOP_CLASS 
REOP_NCLASS 
REOP_ALT 
REOP_QUANT 
REOP_STAR 
REOP_PLUS 
REOP_OPT 
REOP_LPAREN 
REOP_RPAREN 
REOP_JUMP 
REOP_DOTSTAR 
REOP_LPARENNON 
REOP_ASSERT 
REOP_ASSERT_NOT 
REOP_ASSERTTEST 
REOP_ASSERTNOTTEST 
REOP_MINIMALSTAR 
REOP_MINIMALPLUS 
REOP_MINIMALOPT 
REOP_MINIMALQUANT 
REOP_ENDCHILD 
REOP_REPEAT 
REOP_MINIMALREPEAT 
REOP_ALTPREREQ 
REOP_ALTPREREQ2 
REOP_ENDALT 
REOP_CONCAT 
REOP_END 
REOP_LIMIT 
REOP_EMPTY 
REOP_BOL 
REOP_EOL 
REOP_WBDRY 
REOP_WNONBDRY 
REOP_DOT 
REOP_DIGIT 
REOP_NONDIGIT 
REOP_ALNUM 
REOP_NONALNUM 
REOP_SPACE 
REOP_NONSPACE 
REOP_BACKREF 
REOP_FLAT 
REOP_FLAT1 
REOP_FLATi 
REOP_FLAT1i 
REOP_UCFLAT1 
REOP_UCFLAT1i 
REOP_UCFLAT 
REOP_UCFLATi 
REOP_CLASS 
REOP_NCLASS 
REOP_ALT 
REOP_QUANT 
REOP_STAR 
REOP_PLUS 
REOP_OPT 
REOP_LPAREN 
REOP_RPAREN 
REOP_JUMP 
REOP_DOTSTAR 
REOP_LPARENNON 
REOP_ASSERT 
REOP_ASSERT_NOT 
REOP_ASSERTTEST 
REOP_ASSERTNOTTEST 
REOP_MINIMALSTAR 
REOP_MINIMALPLUS 
REOP_MINIMALOPT 
REOP_MINIMALQUANT 
REOP_ENDCHILD 
REOP_REPEAT 
REOP_MINIMALREPEAT 
REOP_ALTPREREQ 
REOP_ALTPREREQ2 
REOP_ENDALT 
REOP_CONCAT 
REOP_END 
REOP_LIMIT 

Definition at line 92 of file regexp.c.

92 {
106 REOP_FLAT,
116 REOP_ALT,
118 REOP_STAR,
119 REOP_PLUS,
120 REOP_OPT,
123 REOP_JUMP,
141 REOP_END,
142 REOP_LIMIT /* META: no operator >= to this */
143} REOp;
@ REOP_LPAREN
Definition: regexp.c:121
@ REOP_SPACE
Definition: regexp.c:103
@ REOP_WBDRY
Definition: regexp.c:96
@ REOP_UCFLAT
Definition: regexp.c:112
@ REOP_ALTPREREQ
Definition: regexp.c:137
@ REOP_REPEAT
Definition: regexp.c:135
@ REOP_QUANT
Definition: regexp.c:117
@ REOP_UCFLAT1i
Definition: regexp.c:111
@ REOP_ASSERTTEST
Definition: regexp.c:128
@ REOP_EMPTY
Definition: regexp.c:93
@ REOP_OPT
Definition: regexp.c:120
@ REOP_DOTSTAR
Definition: regexp.c:124
@ REOP_FLAT1i
Definition: regexp.c:109
@ REOP_MINIMALPLUS
Definition: regexp.c:131
@ REOP_JUMP
Definition: regexp.c:123
@ REOP_ALT
Definition: regexp.c:116
@ REOP_NONSPACE
Definition: regexp.c:104
@ REOP_DOT
Definition: regexp.c:98
@ REOP_BACKREF
Definition: regexp.c:105
@ REOP_BOL
Definition: regexp.c:94
@ REOP_ENDCHILD
Definition: regexp.c:134
@ REOP_NCLASS
Definition: regexp.c:115
@ REOP_ASSERT
Definition: regexp.c:126
@ REOP_ALNUM
Definition: regexp.c:101
@ REOP_WNONBDRY
Definition: regexp.c:97
@ REOP_MINIMALOPT
Definition: regexp.c:132
@ REOP_END
Definition: regexp.c:141
@ REOP_UCFLATi
Definition: regexp.c:113
@ REOP_CLASS
Definition: regexp.c:114
@ REOP_MINIMALQUANT
Definition: regexp.c:133
@ REOP_RPAREN
Definition: regexp.c:122
@ REOP_DIGIT
Definition: regexp.c:99
@ REOP_NONDIGIT
Definition: regexp.c:100
@ REOP_ENDALT
Definition: regexp.c:139
@ REOP_ALTPREREQ2
Definition: regexp.c:138
@ REOP_FLAT1
Definition: regexp.c:107
@ REOP_ASSERTNOTTEST
Definition: regexp.c:129
@ REOP_EOL
Definition: regexp.c:95
@ REOP_ASSERT_NOT
Definition: regexp.c:127
@ REOP_FLATi
Definition: regexp.c:108
@ REOP_PLUS
Definition: regexp.c:119
@ REOP_CONCAT
Definition: regexp.c:140
@ REOP_FLAT
Definition: regexp.c:106
@ REOP_MINIMALSTAR
Definition: regexp.c:130
@ REOP_STAR
Definition: regexp.c:118
@ REOP_NONALNUM
Definition: regexp.c:102
@ REOP_LPARENNON
Definition: regexp.c:125
@ REOP_UCFLAT1
Definition: regexp.c:110

Function Documentation

◆ AddCharacterRangeToCharSet()

static void AddCharacterRangeToCharSet ( RECharSet cs,
UINT  c1,
UINT  c2 
)
static

Definition at line 2060 of file regexp.c.

2061{
2062 UINT i;
2063
2064 UINT byteIndex1 = c1 >> 3;
2065 UINT byteIndex2 = c2 >> 3;
2066
2067 assert(c2 <= cs->length && c1 <= c2);
2068
2069 c1 &= 0x7;
2070 c2 &= 0x7;
2071
2072 if (byteIndex1 == byteIndex2) {
2073 cs->u.bits[byteIndex1] |= ((BYTE)0xFF >> (7 - (c2 - c1))) << c1;
2074 } else {
2075 cs->u.bits[byteIndex1] |= 0xFF << c1;
2076 for (i = byteIndex1 + 1; i < byteIndex2; i++)
2077 cs->u.bits[i] = 0xFF;
2078 cs->u.bits[byteIndex2] |= (BYTE)0xFF >> (7 - c2);
2079 }
2080}
#define assert(_expr)
Definition: assert.h:32
GLuint GLsizei GLsizei * length
Definition: glext.h:6040
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
#define cs
Definition: i386-dis.c:442
unsigned int UINT
Definition: ndis.h:50
unsigned char BYTE
Definition: xxhash.c:193

Referenced by ProcessCharSet().

◆ AddCharacterToCharSet()

static void AddCharacterToCharSet ( RECharSet cs,
WCHAR  c 
)
static

Definition at line 2050 of file regexp.c.

2051{
2052 UINT byteIndex = (UINT)(c >> 3);
2053 assert(c <= cs->length);
2054 cs->u.bits[byteIndex] |= 1 << (c & 0x7);
2055}

Referenced by ProcessCharSet().

◆ BackrefMatcher()

static match_state_t * BackrefMatcher ( REGlobalData gData,
match_state_t x,
size_t  parenIndex 
)
static

Definition at line 2019 of file regexp.c.

2020{
2021 size_t len, i;
2022 const WCHAR *parenContent;
2023 RECapture *cap = &x->parens[parenIndex];
2024
2025 if (cap->index == -1)
2026 return x;
2027
2028 len = cap->length;
2029 if (x->cp + len > gData->cpend)
2030 return NULL;
2031
2032 parenContent = &gData->cpbegin[cap->index];
2033 if (gData->regexp->flags & REG_FOLD) {
2034 for (i = 0; i < len; i++) {
2035 if (towupper(parenContent[i]) != towupper(x->cp[i]))
2036 return NULL;
2037 }
2038 } else {
2039 for (i = 0; i < len; i++) {
2040 if (parenContent[i] != x->cp[i])
2041 return NULL;
2042 }
2043 }
2044 x->cp += len;
2045 return x;
2046}
#define REG_FOLD
Definition: regexp.h:36
GLenum GLsizei len
Definition: glext.h:6722
GLenum cap
Definition: glext.h:9639
short WCHAR
Definition: pedump.c:58
#define towupper(c)
Definition: wctype.h:99
const WCHAR * cpbegin
Definition: regexp.c:238
const WCHAR * cpend
Definition: regexp.c:239
regexp_t * regexp
Definition: regexp.c:234
WORD flags
Definition: regexp.h:57

Referenced by SimpleMatch().

◆ CalculateBitmapSize()

static BOOL CalculateBitmapSize ( CompilerState state,
RENode target,
const WCHAR src,
const WCHAR end 
)
static

Definition at line 967 of file regexp.c.

969{
970 UINT max = 0;
971 BOOL inRange = FALSE;
972 WCHAR c, rangeStart = 0;
973 UINT n, digit, nDigits, i;
974
975 target->u.ucclass.bmsize = 0;
976 target->u.ucclass.sense = TRUE;
977
978 if (src == end)
979 return TRUE;
980
981 if (*src == '^') {
982 ++src;
983 target->u.ucclass.sense = FALSE;
984 }
985
986 while (src != end) {
987 BOOL canStartRange = TRUE;
988 UINT localMax = 0;
989
990 switch (*src) {
991 case '\\':
992 ++src;
993 c = *src++;
994 switch (c) {
995 case 'b':
996 localMax = 0x8;
997 break;
998 case 'f':
999 localMax = 0xC;
1000 break;
1001 case 'n':
1002 localMax = 0xA;
1003 break;
1004 case 'r':
1005 localMax = 0xD;
1006 break;
1007 case 't':
1008 localMax = 0x9;
1009 break;
1010 case 'v':
1011 localMax = 0xB;
1012 break;
1013 case 'c':
1014 if (src < end && RE_IS_LETTER(*src)) {
1015 localMax = (UINT) (*src++) & 0x1F;
1016 } else {
1017 --src;
1018 localMax = '\\';
1019 }
1020 break;
1021 case 'x':
1022 nDigits = 2;
1023 goto lexHex;
1024 case 'u':
1025 nDigits = 4;
1026lexHex:
1027 n = 0;
1028 for (i = 0; (i < nDigits) && (src < end); i++) {
1029 c = *src++;
1030 if (!isASCIIHexDigit(c, &digit)) {
1031 /*
1032 * Back off to accepting the original
1033 *'\' as a literal.
1034 */
1035 src -= i + 1;
1036 n = '\\';
1037 break;
1038 }
1039 n = (n << 4) | digit;
1040 }
1041 localMax = n;
1042 break;
1043 case 'd':
1044 canStartRange = FALSE;
1045 if (inRange) {
1046 JS_ReportErrorNumber(state->context,
1047 js_GetErrorMessage, NULL,
1048 JSMSG_BAD_CLASS_RANGE);
1049 return FALSE;
1050 }
1051 localMax = '9';
1052 break;
1053 case 'D':
1054 case 's':
1055 case 'S':
1056 case 'w':
1057 case 'W':
1058 canStartRange = FALSE;
1059 if (inRange) {
1060 JS_ReportErrorNumber(state->context,
1061 js_GetErrorMessage, NULL,
1062 JSMSG_BAD_CLASS_RANGE);
1063 return FALSE;
1064 }
1065 max = 65535;
1066
1067 /*
1068 * If this is the start of a range, ensure that it's less than
1069 * the end.
1070 */
1071 localMax = 0;
1072 break;
1073 case '0':
1074 case '1':
1075 case '2':
1076 case '3':
1077 case '4':
1078 case '5':
1079 case '6':
1080 case '7':
1081 /*
1082 * This is a non-ECMA extension - decimal escapes (in this
1083 * case, octal!) are supposed to be an error inside class
1084 * ranges, but supported here for backwards compatibility.
1085 *
1086 */
1087 n = JS7_UNDEC(c);
1088 c = *src;
1089 if ('0' <= c && c <= '7') {
1090 src++;
1091 n = 8 * n + JS7_UNDEC(c);
1092 c = *src;
1093 if ('0' <= c && c <= '7') {
1094 src++;
1095 i = 8 * n + JS7_UNDEC(c);
1096 if (i <= 0377)
1097 n = i;
1098 else
1099 src--;
1100 }
1101 }
1102 localMax = n;
1103 break;
1104
1105 default:
1106 localMax = c;
1107 break;
1108 }
1109 break;
1110 default:
1111 localMax = *src++;
1112 break;
1113 }
1114
1115 if (inRange) {
1116 /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1117 if (rangeStart > localMax) {
1118 JS_ReportErrorNumber(state->context,
1119 js_GetErrorMessage, NULL,
1120 JSMSG_BAD_CLASS_RANGE);
1121 return FALSE;
1122 }
1123 inRange = FALSE;
1124 } else {
1125 if (canStartRange && src < end - 1) {
1126 if (*src == '-') {
1127 ++src;
1128 inRange = TRUE;
1129 rangeStart = (WCHAR)localMax;
1130 continue;
1131 }
1132 }
1133 if (state->flags & REG_FOLD)
1134 rangeStart = localMax; /* one run of the uc/dc loop below */
1135 }
1136
1137 if (state->flags & REG_FOLD) {
1138 WCHAR maxch = localMax;
1139
1140 for (i = rangeStart; i <= localMax; i++) {
1141 WCHAR uch, dch;
1142
1143 uch = towupper(i);
1144 dch = towlower(i);
1145 if(maxch < uch)
1146 maxch = uch;
1147 if(maxch < dch)
1148 maxch = dch;
1149 }
1150 localMax = maxch;
1151 }
1152
1153 if (localMax > max)
1154 max = localMax;
1155 }
1156 target->u.ucclass.bmsize = max;
1157 return TRUE;
1158}
static int state
Definition: maze.c:121
#define TRUE
Definition: types.h:120
#define FALSE
Definition: types.h:117
unsigned char uch
Definition: zlib.h:47
unsigned int BOOL
Definition: ntddk_ex.h:94
GLuint GLuint end
Definition: gl.h:1545
GLdouble n
Definition: glext.h:7729
GLenum src
Definition: glext.h:6340
#define JS7_UNDEC(c)
Definition: regexp.c:90
#define RE_IS_LETTER(c)
Definition: regexp.c:82
static BOOL isASCIIHexDigit(WCHAR c, UINT *digit)
Definition: regexp.c:426
#define JS_ReportErrorNumber(a, b, c, d)
Definition: regexp.c:46
#define c
Definition: ke_i.h:80
#define towlower(c)
Definition: wctype.h:97
Definition: tools.h:99
#define max(a, b)
Definition: svc.c:63

Referenced by ParseTerm().

◆ EmitREBytecode()

static jsbytecode * EmitREBytecode ( CompilerState state,
regexp_t re,
size_t  treeDepth,
jsbytecode pc,
RENode t 
)
static

Definition at line 473 of file regexp.c.

475{
476 EmitStateStackEntry *emitStateSP, *emitStateStack;
477 RECharSet *charSet;
478 REOp op;
479
480 if (treeDepth == 0) {
481 emitStateStack = NULL;
482 } else {
483 emitStateStack = malloc(sizeof(EmitStateStackEntry) * treeDepth);
484 if (!emitStateStack)
485 return NULL;
486 }
487 emitStateSP = emitStateStack;
488 op = t->op;
490
491 for (;;) {
492 *pc++ = op;
493 switch (op) {
494 case REOP_EMPTY:
495 --pc;
496 break;
497
498 case REOP_ALTPREREQ2:
499 case REOP_ALTPREREQ:
500 assert(emitStateSP);
501 emitStateSP->altHead = pc - 1;
502 emitStateSP->endTermFixup = pc;
503 pc += OFFSET_LEN;
504 SET_ARG(pc, t->u.altprereq.ch1);
505 pc += ARG_LEN;
506 SET_ARG(pc, t->u.altprereq.ch2);
507 pc += ARG_LEN;
508
509 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
510 pc += OFFSET_LEN;
511
512 emitStateSP->continueNode = t;
513 emitStateSP->continueOp = REOP_JUMP;
514 emitStateSP->jumpToJumpFlag = FALSE;
515 ++emitStateSP;
516 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
517 t = t->kid;
518 op = t->op;
520 continue;
521
522 case REOP_JUMP:
523 emitStateSP->nextTermFixup = pc; /* offset to following term */
524 pc += OFFSET_LEN;
525 if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
526 goto jump_too_big;
527 emitStateSP->continueOp = REOP_ENDALT;
528 ++emitStateSP;
529 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
530 t = t->u.kid2;
531 op = t->op;
533 continue;
534
535 case REOP_ENDALT:
536 /*
537 * If we already patched emitStateSP->nextTermFixup to jump to
538 * a nearer jump, to avoid 16-bit immediate offset overflow, we
539 * are done here.
540 */
541 if (emitStateSP->jumpToJumpFlag)
542 break;
543
544 /*
545 * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
546 * REOP_ENDALT is executed only on successful match of the last
547 * alternate in a group.
548 */
549 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
550 goto jump_too_big;
551 if (t->op != REOP_ALT) {
552 if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
553 goto jump_too_big;
554 }
555
556 /*
557 * If the program is bigger than the REOP_JUMP offset range, then
558 * we must check for alternates before this one that are part of
559 * the same group, and fix up their jump offsets to target jumps
560 * close enough to fit in a 16-bit unsigned offset immediate.
561 */
562 if ((size_t)(pc - re->program) > OFFSET_MAX &&
563 emitStateSP > emitStateStack) {
564 EmitStateStackEntry *esp, *esp2;
565 jsbytecode *alt, *jump;
567
568 esp2 = emitStateSP;
569 alt = esp2->altHead;
570 for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
571 if (esp->continueOp == REOP_ENDALT &&
572 !esp->jumpToJumpFlag &&
573 esp->nextTermFixup + OFFSET_LEN == alt &&
574 (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
575 ? esp->endTermFixup
576 : esp->nextTermFixup)) > OFFSET_MAX) {
577 alt = esp->altHead;
578 jump = esp->nextTermFixup;
579
580 /*
581 * The span must be 1 less than the distance from
582 * jump offset to jump offset, so we actually jump
583 * to a REOP_JUMP bytecode, not to its offset!
584 */
585 for (;;) {
586 assert(jump < esp2->nextTermFixup);
587 span = esp2->nextTermFixup - jump - 1;
588 if ((size_t)span <= OFFSET_MAX)
589 break;
590 do {
591 if (--esp2 == esp)
592 goto jump_too_big;
593 } while (esp2->continueOp != REOP_ENDALT);
594 }
595
596 jump[0] = JUMP_OFFSET_HI(span);
597 jump[1] = JUMP_OFFSET_LO(span);
598
599 if (esp->continueNode->op != REOP_ALT) {
600 /*
601 * We must patch the offset at esp->endTermFixup
602 * as well, for the REOP_ALTPREREQ{,2} opcodes.
603 * If we're unlucky and endTermFixup is more than
604 * OFFSET_MAX bytes from its target, we cheat by
605 * jumping 6 bytes to the jump whose offset is at
606 * esp->nextTermFixup, which has the same target.
607 */
608 jump = esp->endTermFixup;
609 header = esp->nextTermFixup - jump;
610 span += header;
611 if ((size_t)span > OFFSET_MAX)
612 span = header;
613
614 jump[0] = JUMP_OFFSET_HI(span);
615 jump[1] = JUMP_OFFSET_LO(span);
616 }
617
618 esp->jumpToJumpFlag = TRUE;
619 }
620 }
621 }
622 break;
623
624 case REOP_ALT:
625 assert(emitStateSP);
626 emitStateSP->altHead = pc - 1;
627 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
628 pc += OFFSET_LEN;
629 emitStateSP->continueNode = t;
630 emitStateSP->continueOp = REOP_JUMP;
631 emitStateSP->jumpToJumpFlag = FALSE;
632 ++emitStateSP;
633 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
634 t = t->kid;
635 op = t->op;
637 continue;
638
639 case REOP_FLAT:
640 /*
641 * Coalesce FLATs if possible and if it would not increase bytecode
642 * beyond preallocated limit. The latter happens only when bytecode
643 * size for coalesced string with offset p and length 2 exceeds 6
644 * bytes preallocated for 2 single char nodes, i.e. when
645 * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
646 * GetCompactIndexWidth(p) > 4.
647 * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
648 * nodes strictly decreases bytecode size, the check has to be
649 * done only for the first coalescing.
650 */
651 if (t->kid &&
652 GetCompactIndexWidth((WCHAR*)t->kid - state->cpbegin) <= 4)
653 {
654 while (t->next &&
655 t->next->op == REOP_FLAT &&
656 (WCHAR*)t->kid + t->u.flat.length ==
657 t->next->kid) {
658 t->u.flat.length += t->next->u.flat.length;
659 t->next = t->next->next;
660 }
661 }
662 if (t->kid && t->u.flat.length > 1) {
663 pc[-1] = (state->flags & REG_FOLD) ? REOP_FLATi : REOP_FLAT;
664 pc = WriteCompactIndex(pc, (WCHAR*)t->kid - state->cpbegin);
665 pc = WriteCompactIndex(pc, t->u.flat.length);
666 } else if (t->u.flat.chr < 256) {
667 pc[-1] = (state->flags & REG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
668 *pc++ = (jsbytecode) t->u.flat.chr;
669 } else {
670 pc[-1] = (state->flags & REG_FOLD)
672 : REOP_UCFLAT1;
673 SET_ARG(pc, t->u.flat.chr);
674 pc += ARG_LEN;
675 }
676 break;
677
678 case REOP_LPAREN:
679 assert(emitStateSP);
680 pc = WriteCompactIndex(pc, t->u.parenIndex);
681 emitStateSP->continueNode = t;
682 emitStateSP->continueOp = REOP_RPAREN;
683 ++emitStateSP;
684 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
685 t = t->kid;
686 op = t->op;
687 continue;
688
689 case REOP_RPAREN:
690 pc = WriteCompactIndex(pc, t->u.parenIndex);
691 break;
692
693 case REOP_BACKREF:
694 pc = WriteCompactIndex(pc, t->u.parenIndex);
695 break;
696
697 case REOP_ASSERT:
698 assert(emitStateSP);
699 emitStateSP->nextTermFixup = pc;
700 pc += OFFSET_LEN;
701 emitStateSP->continueNode = t;
702 emitStateSP->continueOp = REOP_ASSERTTEST;
703 ++emitStateSP;
704 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
705 t = t->kid;
706 op = t->op;
707 continue;
708
709 case REOP_ASSERTTEST:
711 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
712 goto jump_too_big;
713 break;
714
715 case REOP_ASSERT_NOT:
716 assert(emitStateSP);
717 emitStateSP->nextTermFixup = pc;
718 pc += OFFSET_LEN;
719 emitStateSP->continueNode = t;
720 emitStateSP->continueOp = REOP_ASSERTNOTTEST;
721 ++emitStateSP;
722 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
723 t = t->kid;
724 op = t->op;
725 continue;
726
727 case REOP_QUANT:
728 assert(emitStateSP);
729 if (t->u.range.min == 0 && t->u.range.max == (UINT)-1) {
730 pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
731 } else if (t->u.range.min == 0 && t->u.range.max == 1) {
732 pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
733 } else if (t->u.range.min == 1 && t->u.range.max == (UINT) -1) {
734 pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
735 } else {
736 if (!t->u.range.greedy)
737 pc[-1] = REOP_MINIMALQUANT;
738 pc = WriteCompactIndex(pc, t->u.range.min);
739 /*
740 * Write max + 1 to avoid using size_t(max) + 1 bytes
741 * for (UINT)-1 sentinel.
742 */
743 pc = WriteCompactIndex(pc, t->u.range.max + 1);
744 }
745 emitStateSP->nextTermFixup = pc;
746 pc += OFFSET_LEN;
747 emitStateSP->continueNode = t;
748 emitStateSP->continueOp = REOP_ENDCHILD;
749 ++emitStateSP;
750 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
751 t = t->kid;
752 op = t->op;
753 continue;
754
755 case REOP_ENDCHILD:
756 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
757 goto jump_too_big;
758 break;
759
760 case REOP_CLASS:
761 if (!t->u.ucclass.sense)
762 pc[-1] = REOP_NCLASS;
763 pc = WriteCompactIndex(pc, t->u.ucclass.index);
764 charSet = &re->classList[t->u.ucclass.index];
765 charSet->converted = FALSE;
766 charSet->length = t->u.ucclass.bmsize;
767 charSet->u.src.startIndex = t->u.ucclass.startIndex;
768 charSet->u.src.length = t->u.ucclass.kidlen;
769 charSet->sense = t->u.ucclass.sense;
770 break;
771
772 default:
773 break;
774 }
775
776 t = t->next;
777 if (t) {
778 op = t->op;
779 } else {
780 if (emitStateSP == emitStateStack)
781 break;
782 --emitStateSP;
783 t = emitStateSP->continueNode;
784 op = (REOp) emitStateSP->continueOp;
785 }
786 }
787
788 cleanup:
789 free(emitStateStack);
790 return pc;
791
792 jump_too_big:
793 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
794 pc = NULL;
795 goto cleanup;
796}
#define free
Definition: debug_ros.c:5
#define malloc
Definition: debug_ros.c:4
static void cleanup(void)
Definition: main.c:1335
int ptrdiff_t
Definition: corecrt.h:194
GLdouble GLdouble t
Definition: gl.h:2047
GLenum GLenum GLvoid GLvoid GLvoid * span
Definition: glext.h:5664
static size_t GetCompactIndexWidth(size_t index)
Definition: regexp.c:361
#define ReportRegExpError(a, b, c)
Definition: regexp.c:44
#define JUMP_OFFSET_LO(off)
Definition: regexp.c:451
#define OFFSET_LEN
Definition: regexp.c:335
#define JUMP_OFFSET_HI(off)
Definition: regexp.c:450
#define OFFSET_MAX
Definition: regexp.c:336
#define SET_ARG(pc, arg)
Definition: regexp.c:332
static BOOL SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
Definition: regexp.c:454
static jsbytecode * WriteCompactIndex(jsbytecode *pc, size_t index)
Definition: regexp.c:370
RENode * continueNode
Definition: regexp.c:319
jsbytecode * endTermFixup
Definition: regexp.c:318
jsbytecode continueOp
Definition: regexp.c:320
jsbytecode * nextAltFixup
Definition: regexp.c:316
jsbytecode * altHead
Definition: regexp.c:315
jsbytecode * nextTermFixup
Definition: regexp.c:317
JSPackedBool jumpToJumpFlag
Definition: regexp.c:321
WORD length
Definition: regexp.c:64
JSPackedBool converted
Definition: regexp.c:62
struct RECharSet::@446::@447 src
JSPackedBool sense
Definition: regexp.c:63
union RECharSet::@446 u
REOp op
Definition: regexp.c:259
jsbytecode program[1]
Definition: regexp.h:63
struct RECharSet * classList
Definition: regexp.h:60

Referenced by regexp_new().

◆ ExecuteREBytecode()

static match_state_t * ExecuteREBytecode ( REGlobalData gData,
match_state_t x 
)
inlinestatic

Definition at line 2533 of file regexp.c.

2534{
2536 REBackTrackData *backTrackData;
2537 jsbytecode *nextpc, *testpc;
2538 REOp nextop;
2539 RECapture *cap;
2540 REProgState *curState;
2541 const WCHAR *startcp;
2542 size_t parenIndex, k;
2543 size_t parenSoFar = 0;
2544
2545 WCHAR matchCh1, matchCh2;
2546 RECharSet *charSet;
2547
2548 BOOL anchor;
2549 jsbytecode *pc = gData->regexp->program;
2550 REOp op = (REOp) *pc++;
2551
2552 /*
2553 * If the first node is a simple match, step the index into the string
2554 * until that match is made, or fail if it can't be found at all.
2555 */
2556 if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & REG_STICKY)) {
2557 anchor = FALSE;
2558 while (x->cp <= gData->cpend) {
2559 nextpc = pc; /* reset back to start each time */
2560 result = SimpleMatch(gData, x, op, &nextpc, TRUE);
2561 if (result) {
2562 anchor = TRUE;
2563 x = result;
2564 pc = nextpc; /* accept skip to next opcode */
2565 op = (REOp) *pc++;
2566 assert(op < REOP_LIMIT);
2567 break;
2568 }
2569 gData->skipped++;
2570 x->cp++;
2571 }
2572 if (!anchor)
2573 goto bad;
2574 }
2575
2576 for (;;) {
2577 const char *opname = reop_names[op];
2578 TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
2579 (int)gData->stateStackTop * 2, "", opname);
2580
2581 if (REOP_IS_SIMPLE(op)) {
2582 result = SimpleMatch(gData, x, op, &pc, TRUE);
2583 } else {
2584 curState = &gData->stateStack[gData->stateStackTop];
2585 switch (op) {
2586 case REOP_END:
2587 goto good;
2588 case REOP_ALTPREREQ2:
2589 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2590 pc += ARG_LEN;
2591 matchCh2 = GET_ARG(pc);
2592 pc += ARG_LEN;
2593 k = GET_ARG(pc);
2594 pc += ARG_LEN;
2595
2596 if (x->cp != gData->cpend) {
2597 if (*x->cp == matchCh2)
2598 goto doAlt;
2599
2600 charSet = &gData->regexp->classList[k];
2601 if (!charSet->converted && !ProcessCharSet(gData, charSet))
2602 goto bad;
2603 matchCh1 = *x->cp;
2604 k = matchCh1 >> 3;
2605 if ((charSet->length == 0 ||
2606 matchCh1 > charSet->length ||
2607 !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
2608 charSet->sense) {
2609 goto doAlt;
2610 }
2611 }
2612 result = NULL;
2613 break;
2614
2615 case REOP_ALTPREREQ:
2616 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2617 pc += ARG_LEN;
2618 matchCh1 = GET_ARG(pc);
2619 pc += ARG_LEN;
2620 matchCh2 = GET_ARG(pc);
2621 pc += ARG_LEN;
2622 if (x->cp == gData->cpend ||
2623 (*x->cp != matchCh1 && *x->cp != matchCh2)) {
2624 result = NULL;
2625 break;
2626 }
2627 /* else fall through... */
2628
2629 case REOP_ALT:
2630 doAlt:
2631 nextpc = pc + GET_OFFSET(pc); /* start of next alternate */
2632 pc += ARG_LEN; /* start of this alternate */
2633 curState->parenSoFar = parenSoFar;
2634 PUSH_STATE_STACK(gData);
2635 op = (REOp) *pc++;
2636 startcp = x->cp;
2637 if (REOP_IS_SIMPLE(op)) {
2638 if (!SimpleMatch(gData, x, op, &pc, TRUE)) {
2639 op = (REOp) *nextpc++;
2640 pc = nextpc;
2641 continue;
2642 }
2643 result = x;
2644 op = (REOp) *pc++;
2645 }
2646 nextop = (REOp) *nextpc++;
2647 if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
2648 goto bad;
2649 continue;
2650
2651 /*
2652 * Occurs at (successful) end of REOP_ALT,
2653 */
2654 case REOP_JUMP:
2655 /*
2656 * If we have not gotten a result here, it is because of an
2657 * empty match. Do the same thing REOP_EMPTY would do.
2658 */
2659 if (!result)
2660 result = x;
2661
2662 --gData->stateStackTop;
2663 pc += GET_OFFSET(pc);
2664 op = (REOp) *pc++;
2665 continue;
2666
2667 /*
2668 * Occurs at last (successful) end of REOP_ALT,
2669 */
2670 case REOP_ENDALT:
2671 /*
2672 * If we have not gotten a result here, it is because of an
2673 * empty match. Do the same thing REOP_EMPTY would do.
2674 */
2675 if (!result)
2676 result = x;
2677
2678 --gData->stateStackTop;
2679 op = (REOp) *pc++;
2680 continue;
2681
2682 case REOP_LPAREN:
2683 pc = ReadCompactIndex(pc, &parenIndex);
2684 TRACE("[ %Iu ]\n", (ULONG_PTR)parenIndex);
2685 assert(parenIndex < gData->regexp->parenCount);
2686 if (parenIndex + 1 > parenSoFar)
2687 parenSoFar = parenIndex + 1;
2688 x->parens[parenIndex].index = x->cp - gData->cpbegin;
2689 x->parens[parenIndex].length = 0;
2690 op = (REOp) *pc++;
2691 continue;
2692
2693 case REOP_RPAREN:
2694 {
2695 ptrdiff_t delta;
2696
2697 pc = ReadCompactIndex(pc, &parenIndex);
2698 assert(parenIndex < gData->regexp->parenCount);
2699 cap = &x->parens[parenIndex];
2700 delta = x->cp - (gData->cpbegin + cap->index);
2701 cap->length = (delta < 0) ? 0 : (size_t) delta;
2702 op = (REOp) *pc++;
2703
2704 if (!result)
2705 result = x;
2706 continue;
2707 }
2708 case REOP_ASSERT:
2709 nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */
2710 pc += ARG_LEN; /* start of ASSERT child */
2711 op = (REOp) *pc++;
2712 testpc = pc;
2713 if (REOP_IS_SIMPLE(op) &&
2714 !SimpleMatch(gData, x, op, &testpc, FALSE)) {
2715 result = NULL;
2716 break;
2717 }
2718 curState->u.assertion.top =
2719 (char *)gData->backTrackSP - (char *)gData->backTrackStack;
2720 curState->u.assertion.sz = gData->cursz;
2721 curState->index = x->cp - gData->cpbegin;
2722 curState->parenSoFar = parenSoFar;
2723 PUSH_STATE_STACK(gData);
2725 nextpc, x, x->cp, 0, 0)) {
2726 goto bad;
2727 }
2728 continue;
2729
2730 case REOP_ASSERT_NOT:
2731 nextpc = pc + GET_OFFSET(pc);
2732 pc += ARG_LEN;
2733 op = (REOp) *pc++;
2734 testpc = pc;
2735 if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
2736 SimpleMatch(gData, x, op, &testpc, FALSE) &&
2737 *testpc == REOP_ASSERTNOTTEST) {
2738 result = NULL;
2739 break;
2740 }
2741 curState->u.assertion.top
2742 = (char *)gData->backTrackSP -
2743 (char *)gData->backTrackStack;
2744 curState->u.assertion.sz = gData->cursz;
2745 curState->index = x->cp - gData->cpbegin;
2746 curState->parenSoFar = parenSoFar;
2747 PUSH_STATE_STACK(gData);
2749 nextpc, x, x->cp, 0, 0)) {
2750 goto bad;
2751 }
2752 continue;
2753
2754 case REOP_ASSERTTEST:
2755 --gData->stateStackTop;
2756 --curState;
2757 x->cp = gData->cpbegin + curState->index;
2758 gData->backTrackSP =
2759 (REBackTrackData *) ((char *)gData->backTrackStack +
2760 curState->u.assertion.top);
2761 gData->cursz = curState->u.assertion.sz;
2762 if (result)
2763 result = x;
2764 break;
2765
2766 case REOP_ASSERTNOTTEST:
2767 --gData->stateStackTop;
2768 --curState;
2769 x->cp = gData->cpbegin + curState->index;
2770 gData->backTrackSP =
2771 (REBackTrackData *) ((char *)gData->backTrackStack +
2772 curState->u.assertion.top);
2773 gData->cursz = curState->u.assertion.sz;
2774 result = (!result) ? x : NULL;
2775 break;
2776 case REOP_STAR:
2777 curState->u.quantifier.min = 0;
2778 curState->u.quantifier.max = (UINT)-1;
2779 goto quantcommon;
2780 case REOP_PLUS:
2781 curState->u.quantifier.min = 1;
2782 curState->u.quantifier.max = (UINT)-1;
2783 goto quantcommon;
2784 case REOP_OPT:
2785 curState->u.quantifier.min = 0;
2786 curState->u.quantifier.max = 1;
2787 goto quantcommon;
2788 case REOP_QUANT:
2789 pc = ReadCompactIndex(pc, &k);
2790 curState->u.quantifier.min = k;
2791 pc = ReadCompactIndex(pc, &k);
2792 /* max is k - 1 to use one byte for (UINT)-1 sentinel. */
2793 curState->u.quantifier.max = k - 1;
2794 assert(curState->u.quantifier.min <= curState->u.quantifier.max);
2795 quantcommon:
2796 if (curState->u.quantifier.max == 0) {
2797 pc = pc + GET_OFFSET(pc);
2798 op = (REOp) *pc++;
2799 result = x;
2800 continue;
2801 }
2802 /* Step over <next> */
2803 nextpc = pc + ARG_LEN;
2804 op = (REOp) *nextpc++;
2805 startcp = x->cp;
2806 if (REOP_IS_SIMPLE(op)) {
2807 if (!SimpleMatch(gData, x, op, &nextpc, TRUE)) {
2808 if (curState->u.quantifier.min == 0)
2809 result = x;
2810 else
2811 result = NULL;
2812 pc = pc + GET_OFFSET(pc);
2813 break;
2814 }
2815 op = (REOp) *nextpc++;
2816 result = x;
2817 }
2818 curState->index = startcp - gData->cpbegin;
2819 curState->continue_op = REOP_REPEAT;
2820 curState->continue_pc = pc;
2821 curState->parenSoFar = parenSoFar;
2822 PUSH_STATE_STACK(gData);
2823 if (curState->u.quantifier.min == 0 &&
2824 !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
2825 0, 0)) {
2826 goto bad;
2827 }
2828 pc = nextpc;
2829 continue;
2830
2831 case REOP_ENDCHILD: /* marks the end of a quantifier child */
2832 pc = curState[-1].continue_pc;
2833 op = (REOp) curState[-1].continue_op;
2834
2835 if (!result)
2836 result = x;
2837 continue;
2838
2839 case REOP_REPEAT:
2840 --curState;
2841 do {
2842 --gData->stateStackTop;
2843 if (!result) {
2844 /* Failed, see if we have enough children. */
2845 if (curState->u.quantifier.min == 0)
2846 goto repeatDone;
2847 goto break_switch;
2848 }
2849 if (curState->u.quantifier.min == 0 &&
2850 x->cp == gData->cpbegin + curState->index) {
2851 /* matched an empty string, that'll get us nowhere */
2852 result = NULL;
2853 goto break_switch;
2854 }
2855 if (curState->u.quantifier.min != 0)
2856 curState->u.quantifier.min--;
2857 if (curState->u.quantifier.max != (UINT) -1)
2858 curState->u.quantifier.max--;
2859 if (curState->u.quantifier.max == 0)
2860 goto repeatDone;
2861 nextpc = pc + ARG_LEN;
2862 nextop = (REOp) *nextpc;
2863 startcp = x->cp;
2864 if (REOP_IS_SIMPLE(nextop)) {
2865 nextpc++;
2866 if (!SimpleMatch(gData, x, nextop, &nextpc, TRUE)) {
2867 if (curState->u.quantifier.min == 0)
2868 goto repeatDone;
2869 result = NULL;
2870 goto break_switch;
2871 }
2872 result = x;
2873 }
2874 curState->index = startcp - gData->cpbegin;
2875 PUSH_STATE_STACK(gData);
2876 if (curState->u.quantifier.min == 0 &&
2878 pc, x, startcp,
2879 curState->parenSoFar,
2880 parenSoFar -
2881 curState->parenSoFar)) {
2882 goto bad;
2883 }
2884 } while (*nextpc == REOP_ENDCHILD);
2885 pc = nextpc;
2886 op = (REOp) *pc++;
2887 parenSoFar = curState->parenSoFar;
2888 continue;
2889
2890 repeatDone:
2891 result = x;
2892 pc += GET_OFFSET(pc);
2893 goto break_switch;
2894
2895 case REOP_MINIMALSTAR:
2896 curState->u.quantifier.min = 0;
2897 curState->u.quantifier.max = (UINT)-1;
2898 goto minimalquantcommon;
2899 case REOP_MINIMALPLUS:
2900 curState->u.quantifier.min = 1;
2901 curState->u.quantifier.max = (UINT)-1;
2902 goto minimalquantcommon;
2903 case REOP_MINIMALOPT:
2904 curState->u.quantifier.min = 0;
2905 curState->u.quantifier.max = 1;
2906 goto minimalquantcommon;
2907 case REOP_MINIMALQUANT:
2908 pc = ReadCompactIndex(pc, &k);
2909 curState->u.quantifier.min = k;
2910 pc = ReadCompactIndex(pc, &k);
2911 /* See REOP_QUANT comments about k - 1. */
2912 curState->u.quantifier.max = k - 1;
2913 assert(curState->u.quantifier.min
2914 <= curState->u.quantifier.max);
2915 minimalquantcommon:
2916 curState->index = x->cp - gData->cpbegin;
2917 curState->parenSoFar = parenSoFar;
2918 PUSH_STATE_STACK(gData);
2919 if (curState->u.quantifier.min != 0) {
2920 curState->continue_op = REOP_MINIMALREPEAT;
2921 curState->continue_pc = pc;
2922 /* step over <next> */
2923 pc += OFFSET_LEN;
2924 op = (REOp) *pc++;
2925 } else {
2927 pc, x, x->cp, 0, 0)) {
2928 goto bad;
2929 }
2930 --gData->stateStackTop;
2931 pc = pc + GET_OFFSET(pc);
2932 op = (REOp) *pc++;
2933 }
2934 continue;
2935
2936 case REOP_MINIMALREPEAT:
2937 --gData->stateStackTop;
2938 --curState;
2939
2940 TRACE("{%d,%d}\n", curState->u.quantifier.min, curState->u.quantifier.max);
2941#define PREPARE_REPEAT() \
2942 do { \
2943 curState->index = x->cp - gData->cpbegin; \
2944 curState->continue_op = REOP_MINIMALREPEAT; \
2945 curState->continue_pc = pc; \
2946 pc += ARG_LEN; \
2947 for (k = curState->parenSoFar; k < parenSoFar; k++) \
2948 x->parens[k].index = -1; \
2949 PUSH_STATE_STACK(gData); \
2950 op = (REOp) *pc++; \
2951 assert(op < REOP_LIMIT); \
2952 }while(0)
2953
2954 if (!result) {
2955 TRACE(" -\n");
2956 /*
2957 * Non-greedy failure - try to consume another child.
2958 */
2959 if (curState->u.quantifier.max == (UINT) -1 ||
2960 curState->u.quantifier.max > 0) {
2962 continue;
2963 }
2964 /* Don't need to adjust pc since we're going to pop. */
2965 break;
2966 }
2967 if (curState->u.quantifier.min == 0 &&
2968 x->cp == gData->cpbegin + curState->index) {
2969 /* Matched an empty string, that'll get us nowhere. */
2970 result = NULL;
2971 break;
2972 }
2973 if (curState->u.quantifier.min != 0)
2974 curState->u.quantifier.min--;
2975 if (curState->u.quantifier.max != (UINT) -1)
2976 curState->u.quantifier.max--;
2977 if (curState->u.quantifier.min != 0) {
2979 continue;
2980 }
2981 curState->index = x->cp - gData->cpbegin;
2982 curState->parenSoFar = parenSoFar;
2983 PUSH_STATE_STACK(gData);
2985 pc, x, x->cp,
2986 curState->parenSoFar,
2987 parenSoFar - curState->parenSoFar)) {
2988 goto bad;
2989 }
2990 --gData->stateStackTop;
2991 pc = pc + GET_OFFSET(pc);
2992 op = (REOp) *pc++;
2993 assert(op < REOP_LIMIT);
2994 continue;
2995 default:
2996 assert(FALSE);
2997 result = NULL;
2998 }
2999 break_switch:;
3000 }
3001
3002 /*
3003 * If the match failed and there's a backtrack option, take it.
3004 * Otherwise this is a complete and utter failure.
3005 */
3006 if (!result) {
3007 if (gData->cursz == 0)
3008 return NULL;
3009
3010 /* Potentially detect explosive regex here. */
3011 gData->backTrackCount++;
3012 if (gData->backTrackLimit &&
3013 gData->backTrackCount >= gData->backTrackLimit) {
3014 JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL,
3015 JSMSG_REGEXP_TOO_COMPLEX);
3016 gData->ok = FALSE;
3017 return NULL;
3018 }
3019
3020 backTrackData = gData->backTrackSP;
3021 gData->cursz = backTrackData->sz;
3022 gData->backTrackSP =
3023 (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
3024 x->cp = backTrackData->cp;
3025 pc = backTrackData->backtrack_pc;
3026 op = (REOp) backTrackData->backtrack_op;
3027 assert(op < REOP_LIMIT);
3028 gData->stateStackTop = backTrackData->saveStateStackTop;
3029 assert(gData->stateStackTop);
3030
3031 memcpy(gData->stateStack, backTrackData + 1,
3032 sizeof(REProgState) * backTrackData->saveStateStackTop);
3033 curState = &gData->stateStack[gData->stateStackTop - 1];
3034
3035 if (backTrackData->parenCount) {
3036 memcpy(&x->parens[backTrackData->parenIndex],
3037 (char *)(backTrackData + 1) +
3038 sizeof(REProgState) * backTrackData->saveStateStackTop,
3039 sizeof(RECapture) * backTrackData->parenCount);
3040 parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
3041 } else {
3042 for (k = curState->parenSoFar; k < parenSoFar; k++)
3043 x->parens[k].index = -1;
3044 parenSoFar = curState->parenSoFar;
3045 }
3046
3047 TRACE("\tBT_Pop: %Id,%Id\n",
3048 (ULONG_PTR)backTrackData->parenIndex,
3049 (ULONG_PTR)backTrackData->parenCount);
3050 continue;
3051 }
3052 x = result;
3053
3054 /*
3055 * Continue with the expression.
3056 */
3057 op = (REOp)*pc++;
3058 assert(op < REOP_LIMIT);
3059 }
3060
3061bad:
3062 TRACE("\n");
3063 return NULL;
3064
3065good:
3066 TRACE("\n");
3067 return x;
3068}
#define REG_STICKY
Definition: regexp.h:39
GLuint64EXT * result
Definition: glext.h:11304
static REBackTrackData * PushBackTrackState(REGlobalData *gData, REOp op, jsbytecode *target, match_state_t *x, const WCHAR *cp, size_t parenIndex, size_t parenCount)
Definition: regexp.c:1919
static jsbytecode * ReadCompactIndex(jsbytecode *pc, size_t *result)
Definition: regexp.c:383
static BOOL ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
Definition: regexp.c:2084
#define REOP_IS_SIMPLE(op)
Definition: regexp.c:145
#define GET_ARG(pc)
Definition: regexp.c:331
#define GET_OFFSET(pc)
Definition: regexp.c:337
#define PREPARE_REPEAT()
#define PUSH_STATE_STACK(data)
Definition: regexp.c:2309
static match_state_t * SimpleMatch(REGlobalData *gData, match_state_t *x, REOp op, jsbytecode **startpc, BOOL updatecp)
Definition: regexp.c:2325
static const char * reop_names[]
Definition: regexp.c:147
if(dx< 0)
Definition: linetemp.h:194
#define memcpy(s1, s2, n)
Definition: mkisofs.h:878
#define cap
Definition: glfuncs.h:226
#define TRACE(s)
Definition: solgame.cpp:4
jsbytecode * backtrack_pc
Definition: regexp.c:219
size_t parenCount
Definition: regexp.c:223
size_t sz
Definition: regexp.c:218
const WCHAR * cp
Definition: regexp.c:221
size_t parenIndex
Definition: regexp.c:222
jsbytecode backtrack_op
Definition: regexp.c:220
size_t saveStateStackTop
Definition: regexp.c:224
BYTE * bits
Definition: regexp.c:66
size_t backTrackLimit
Definition: regexp.c:250
REProgState * stateStack
Definition: regexp.c:241
REBackTrackData * backTrackSP
Definition: regexp.c:246
size_t cursz
Definition: regexp.c:248
REBackTrackData * backTrackStack
Definition: regexp.c:245
size_t backTrackCount
Definition: regexp.c:249
BOOL ok
Definition: regexp.c:235
size_t stateStackTop
Definition: regexp.c:242
ptrdiff_t skipped
Definition: regexp.c:237
void * cx
Definition: regexp.c:233
jsbytecode * continue_pc
Definition: regexp.c:201
struct REProgState::@448::@450 assertion
size_t parenSoFar
Definition: regexp.c:204
jsbytecode continue_op
Definition: regexp.c:202
union REProgState::@448 u
ptrdiff_t index
Definition: regexp.c:203
struct REProgState::@448::@449 quantifier
uint32_t ULONG_PTR
Definition: typedefs.h:65

Referenced by MatchRegExp().

◆ FindParenCount()

static UINT FindParenCount ( CompilerState state)
static

Definition at line 904 of file regexp.c.

905{
907 int i;
908
909 if (state->flags & JSREG_FIND_PAREN_COUNT)
910 return OVERFLOW_VALUE;
911
912 /*
913 * Copy state into temp, flag it so we never report an invalid backref,
914 * and reset its members to parse the entire regexp. This is obviously
915 * suboptimal, but GetDecimalValue calls us only if a backref appears to
916 * refer to a forward parenthetical, which is rare.
917 */
918 temp = *state;
920 temp.cp = temp.cpbegin;
921 temp.parenCount = 0;
922 temp.classCount = 0;
923 temp.progLength = 0;
924 temp.treeDepth = 0;
925 temp.classBitmapsMem = 0;
926 for (i = 0; i < CLASS_CACHE_SIZE; i++)
927 temp.classCache[i].start = NULL;
928
929 if (!ParseRegExp(&temp)) {
931 return OVERFLOW_VALUE;
932 }
933 return temp.parenCount;
934}
#define JSREG_FIND_PAREN_COUNT
Definition: regexp.c:893
static BOOL ParseRegExp(CompilerState *)
Definition: regexp.c:1648
#define OVERFLOW_VALUE
Definition: regexp.c:901
#define CLASS_CACHE_SIZE
Definition: regexp.c:290
#define JSREG_FIND_PAREN_ERROR
Definition: regexp.c:894
static calc_node_t temp
Definition: rpn_ieee.c:38

Referenced by ParseTerm().

◆ FlatNIMatcher()

static match_state_t * FlatNIMatcher ( REGlobalData gData,
match_state_t x,
const WCHAR matchChars,
size_t  length 
)
inlinestatic

Definition at line 1980 of file regexp.c.

1982{
1983 size_t i;
1984 assert(gData->cpend >= x->cp);
1985 if (length > (size_t)(gData->cpend - x->cp))
1986 return NULL;
1987 for (i = 0; i != length; i++) {
1988 if (towupper(matchChars[i]) != towupper(x->cp[i]))
1989 return NULL;
1990 }
1991 x->cp += length;
1992 return x;
1993}

Referenced by SimpleMatch().

◆ GetCompactIndexWidth()

static size_t GetCompactIndexWidth ( size_t  index)
static

Definition at line 361 of file regexp.c.

362{
363 size_t width;
364
365 for (width = 1; (index >>= 7) != 0; ++width) { }
366 return width;
367}
GLint GLint GLsizei width
Definition: gl.h:1546
GLuint index
Definition: glext.h:6031

Referenced by EmitREBytecode(), ParseMinMaxQuantifier(), ParseRegExp(), ParseTerm(), and regexp_new().

◆ GetDecimalValue()

static UINT GetDecimalValue ( WCHAR  c,
UINT  max,
UINT(*)(CompilerState *state findMax,
CompilerState state 
)
static

Definition at line 943 of file regexp.c.

945{
947 BOOL overflow = (value > max && (!findMax || value > findMax(state)));
948
949 /* The following restriction allows simpler overflow checks. */
950 assert(max <= ((UINT)-1 - 9) / 10);
951 while (state->cp < state->cpend) {
952 c = *state->cp;
953 if (!JS7_ISDEC(c))
954 break;
955 value = 10 * value + JS7_UNDEC(c);
956 if (!overflow && value > max && (!findMax || value > findMax(state)))
957 overflow = TRUE;
958 ++state->cp;
959 }
960 return overflow ? OVERFLOW_VALUE : value;
961}
#define JS7_ISDEC(c)
Definition: regexp.c:89
Definition: pdh_main.c:96

Referenced by ParseMinMaxQuantifier(), and ParseTerm().

◆ InitMatch()

static HRESULT InitMatch ( regexp_t re,
void cx,
heap_pool_t pool,
REGlobalData gData 
)
static

Definition at line 3097 of file regexp.c.

3098{
3099 UINT i;
3100
3103 if (!gData->backTrackStack)
3104 goto bad;
3105
3106 gData->backTrackSP = gData->backTrackStack;
3107 gData->cursz = 0;
3108 gData->backTrackCount = 0;
3109 gData->backTrackLimit = 0;
3110
3112 gData->stateStack = heap_pool_alloc(gData->pool, sizeof(REProgState) * INITIAL_STATESTACK);
3113 if (!gData->stateStack)
3114 goto bad;
3115
3116 gData->stateStackTop = 0;
3117 gData->cx = cx;
3118 gData->pool = pool;
3119 gData->regexp = re;
3120 gData->ok = TRUE;
3121
3122 for (i = 0; i < re->classCount; i++) {
3123 if (!re->classList[i].converted &&
3124 !ProcessCharSet(gData, &re->classList[i])) {
3125 return E_FAIL;
3126 }
3127 }
3128
3129 return S_OK;
3130
3131bad:
3132 gData->ok = FALSE;
3133 return E_OUTOFMEMORY;
3134}
#define E_OUTOFMEMORY
Definition: ddrawi.h:100
#define E_FAIL
Definition: ddrawi.h:102
#define S_OK
Definition: intsafe.h:52
#define INITIAL_BACKTRACK
Definition: regexp.c:230
#define INITIAL_STATESTACK
Definition: regexp.c:229
void * heap_pool_alloc(heap_pool_t *, DWORD) __WINE_ALLOC_SIZE(2)
Definition: jsutils.c:72
_Out_opt_ int * cx
Definition: commctrl.h:585
size_t backTrackStackSize
Definition: regexp.c:247
heap_pool_t * pool
Definition: regexp.c:252
size_t stateStackLimit
Definition: regexp.c:243
size_t classCount
Definition: regexp.h:59

Referenced by regexp_execute().

◆ isASCIIHexDigit()

static BOOL isASCIIHexDigit ( WCHAR  c,
UINT digit 
)
static

Definition at line 426 of file regexp.c.

427{
428 UINT cv = c;
429
430 if (cv < '0')
431 return FALSE;
432 if (cv <= '9') {
433 *digit = cv - '0';
434 return TRUE;
435 }
436 cv |= 0x20;
437 if (cv >= 'a' && cv <= 'f') {
438 *digit = cv - 'a' + 10;
439 return TRUE;
440 }
441 return FALSE;
442}
CONDITION_VARIABLE cv
Definition: sync.c:1887

Referenced by CalculateBitmapSize(), ParseTerm(), and ProcessCharSet().

◆ MatchRegExp()

static match_state_t * MatchRegExp ( REGlobalData gData,
match_state_t x 
)
static

Definition at line 3070 of file regexp.c.

3071{
3073 const WCHAR *cp = x->cp;
3074 const WCHAR *cp2;
3075 UINT j;
3076
3077 /*
3078 * Have to include the position beyond the last character
3079 * in order to detect end-of-input/line condition.
3080 */
3081 for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
3082 gData->skipped = cp2 - cp;
3083 x->cp = cp2;
3084 for (j = 0; j < gData->regexp->parenCount; j++)
3085 x->parens[j].index = -1;
3086 result = ExecuteREBytecode(gData, x);
3087 if (!gData->ok || result || (gData->regexp->flags & REG_STICKY))
3088 return result;
3089 gData->backTrackSP = gData->backTrackStack;
3090 gData->cursz = 0;
3091 gData->stateStackTop = 0;
3092 cp2 = cp + gData->skipped;
3093 }
3094 return NULL;
3095}
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint GLint GLint j
Definition: glfuncs.h:250
static match_state_t * ExecuteREBytecode(REGlobalData *gData, match_state_t *x)
Definition: regexp.c:2533
POINT cp
Definition: magnifier.c:59
size_t parenCount
Definition: regexp.h:58

Referenced by regexp_execute().

◆ NewRENode()

static RENode * NewRENode ( CompilerState state,
REOp  op 
)
static

Definition at line 407 of file regexp.c.

408{
409 RENode *ren;
410
411 ren = heap_pool_alloc(state->pool, sizeof(*ren));
412 if (!ren) {
413 throw_error(state->context, E_OUTOFMEMORY, L"");
414 return NULL;
415 }
416 ren->op = op;
417 ren->next = NULL;
418 ren->kid = NULL;
419 return ren;
420}
HRESULT throw_error(script_ctx_t *ctx, HRESULT error, const WCHAR *str)
Definition: error.c:398
#define L(x)
Definition: resources.c:13
Definition: regexp.c:258
RENode * next
Definition: regexp.c:260
void * kid
Definition: regexp.c:261

Referenced by ParseMinMaxQuantifier(), ParseQuantifier(), ParseRegExp(), ParseTerm(), ProcessOp(), and regexp_new().

◆ ParseMinMaxQuantifier()

static INT ParseMinMaxQuantifier ( CompilerState state,
BOOL  ignoreValues 
)
static

Definition at line 1161 of file regexp.c.

1162{
1163 UINT min, max;
1164 WCHAR c;
1165 const WCHAR *errp = state->cp++;
1166
1167 c = *state->cp;
1168 if (JS7_ISDEC(c)) {
1169 ++state->cp;
1170 min = GetDecimalValue(c, 0xFFFF, NULL, state);
1171 c = *state->cp;
1172
1173 if (!ignoreValues && min == OVERFLOW_VALUE)
1174 return JSMSG_MIN_TOO_BIG;
1175
1176 if (c == ',') {
1177 c = *++state->cp;
1178 if (JS7_ISDEC(c)) {
1179 ++state->cp;
1180 max = GetDecimalValue(c, 0xFFFF, NULL, state);
1181 c = *state->cp;
1182 if (!ignoreValues && max == OVERFLOW_VALUE)
1183 return JSMSG_MAX_TOO_BIG;
1184 if (!ignoreValues && min > max)
1185 return JSMSG_OUT_OF_ORDER;
1186 } else {
1187 max = (UINT)-1;
1188 }
1189 } else {
1190 max = min;
1191 }
1192 if (c == '}') {
1193 state->result = NewRENode(state, REOP_QUANT);
1194 if (!state->result)
1195 return JSMSG_OUT_OF_MEMORY;
1196 state->result->u.range.min = min;
1197 state->result->u.range.max = max;
1198 /*
1199 * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1200 * where <max> is written as compact(max+1) to make
1201 * (UINT)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1202 */
1203 state->progLength += (1 + GetCompactIndexWidth(min)
1205 +3);
1206 return 0;
1207 }
1208 }
1209
1210 state->cp = errp;
1211 return -1;
1212}
#define JSMSG_OUT_OF_MEMORY
Definition: regexp.c:77
#define JSMSG_MIN_TOO_BIG
Definition: regexp.c:74
static UINT GetDecimalValue(WCHAR c, UINT max, UINT(*findMax)(CompilerState *state), CompilerState *state)
Definition: regexp.c:943
#define JSMSG_MAX_TOO_BIG
Definition: regexp.c:75
#define JSMSG_OUT_OF_ORDER
Definition: regexp.c:76
static RENode * NewRENode(CompilerState *state, REOp op)
Definition: regexp.c:407
#define min(a, b)
Definition: monoChain.cc:55

Referenced by ParseQuantifier(), ParseRegExp(), and ParseTerm().

◆ ParseQuantifier()

static BOOL ParseQuantifier ( CompilerState state)
static

Definition at line 1215 of file regexp.c.

1216{
1217 RENode *term;
1218 term = state->result;
1219 if (state->cp < state->cpend) {
1220 switch (*state->cp) {
1221 case '+':
1222 state->result = NewRENode(state, REOP_QUANT);
1223 if (!state->result)
1224 return FALSE;
1225 state->result->u.range.min = 1;
1226 state->result->u.range.max = (UINT)-1;
1227 /* <PLUS>, <next> ... <ENDCHILD> */
1228 state->progLength += 4;
1229 goto quantifier;
1230 case '*':
1231 state->result = NewRENode(state, REOP_QUANT);
1232 if (!state->result)
1233 return FALSE;
1234 state->result->u.range.min = 0;
1235 state->result->u.range.max = (UINT)-1;
1236 /* <STAR>, <next> ... <ENDCHILD> */
1237 state->progLength += 4;
1238 goto quantifier;
1239 case '?':
1240 state->result = NewRENode(state, REOP_QUANT);
1241 if (!state->result)
1242 return FALSE;
1243 state->result->u.range.min = 0;
1244 state->result->u.range.max = 1;
1245 /* <OPT>, <next> ... <ENDCHILD> */
1246 state->progLength += 4;
1247 goto quantifier;
1248 case '{': /* balance '}' */
1249 {
1250 INT err;
1251
1253 if (err == 0)
1254 goto quantifier;
1255 if (err == -1)
1256 return TRUE;
1257
1258 ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
1259 return FALSE;
1260 }
1261 default:;
1262 }
1263 }
1264 return TRUE;
1265
1266quantifier:
1267 if (state->treeDepth == TREE_DEPTH_MAX) {
1268 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1269 return FALSE;
1270 }
1271
1272 ++state->treeDepth;
1273 ++state->cp;
1274 state->result->kid = term;
1275 if (state->cp < state->cpend && *state->cp == '?') {
1276 ++state->cp;
1277 state->result->u.range.greedy = FALSE;
1278 } else {
1279 state->result->u.range.greedy = TRUE;
1280 }
1281 return TRUE;
1282}
static INT ParseMinMaxQuantifier(CompilerState *state, BOOL ignoreValues)
Definition: regexp.c:1161
#define TREE_DEPTH_MAX
Definition: regexp.c:345
#define ReportRegExpErrorHelper(a, b, c, d)
Definition: regexp.c:45
#define err(...)
int32_t INT
Definition: typedefs.h:58

Referenced by ParseRegExp(), and ParseTerm().

◆ ParseRegExp()

static BOOL ParseRegExp ( CompilerState state)
static

Definition at line 1648 of file regexp.c.

1649{
1650 size_t parenIndex;
1651 RENode *operand;
1652 REOpData *operatorStack;
1653 RENode **operandStack;
1654 REOp op;
1655 INT i;
1656 BOOL result = FALSE;
1657
1658 INT operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
1659 INT operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
1660
1661 /* Watch out for empty regexp */
1662 if (state->cp == state->cpend) {
1663 state->result = NewRENode(state, REOP_EMPTY);
1664 return (state->result != NULL);
1665 }
1666
1667 operatorStack = malloc(sizeof(REOpData) * operatorStackSize);
1668 if (!operatorStack)
1669 return FALSE;
1670
1671 operandStack = malloc(sizeof(RENode *) * operandStackSize);
1672 if (!operandStack)
1673 goto out;
1674
1675 for (;;) {
1676 parenIndex = state->parenCount;
1677 if (state->cp == state->cpend) {
1678 /*
1679 * If we are at the end of the regexp and we're short one or more
1680 * operands, the regexp must have the form /x|/ or some such, with
1681 * left parentheses making us short more than one operand.
1682 */
1683 if (operatorSP >= operandSP) {
1684 operand = NewRENode(state, REOP_EMPTY);
1685 if (!operand)
1686 goto out;
1687 goto pushOperand;
1688 }
1689 } else {
1690 switch (*state->cp) {
1691 case '(':
1692 ++state->cp;
1693 if (state->cp + 1 < state->cpend &&
1694 *state->cp == '?' &&
1695 (state->cp[1] == '=' ||
1696 state->cp[1] == '!' ||
1697 state->cp[1] == ':')) {
1698 switch (state->cp[1]) {
1699 case '=':
1700 op = REOP_ASSERT;
1701 /* ASSERT, <next>, ... ASSERTTEST */
1702 state->progLength += 4;
1703 break;
1704 case '!':
1706 /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
1707 state->progLength += 4;
1708 break;
1709 default:
1711 break;
1712 }
1713 state->cp += 2;
1714 } else {
1715 op = REOP_LPAREN;
1716 /* LPAREN, <index>, ... RPAREN, <index> */
1717 state->progLength
1718 += 2 * (1 + GetCompactIndexWidth(parenIndex));
1719 state->parenCount++;
1720 if (state->parenCount == 65535) {
1721 ReportRegExpError(state, JSREPORT_ERROR,
1722 JSMSG_TOO_MANY_PARENS);
1723 goto out;
1724 }
1725 }
1726 goto pushOperator;
1727
1728 case ')':
1729 /*
1730 * If there's no stacked open parenthesis, throw syntax error.
1731 */
1732 for (i = operatorSP - 1; ; i--) {
1733 if (i < 0) {
1734 ReportRegExpError(state, JSREPORT_ERROR,
1735 JSMSG_UNMATCHED_RIGHT_PAREN);
1736 goto out;
1737 }
1738 if (operatorStack[i].op == REOP_ASSERT ||
1739 operatorStack[i].op == REOP_ASSERT_NOT ||
1740 operatorStack[i].op == REOP_LPARENNON ||
1741 operatorStack[i].op == REOP_LPAREN) {
1742 break;
1743 }
1744 }
1745 /* FALL THROUGH */
1746
1747 case '|':
1748 /* Expected an operand before these, so make an empty one */
1749 operand = NewRENode(state, REOP_EMPTY);
1750 if (!operand)
1751 goto out;
1752 goto pushOperand;
1753
1754 default:
1755 if (!ParseTerm(state))
1756 goto out;
1757 operand = state->result;
1758pushOperand:
1759 if (operandSP == operandStackSize) {
1760 RENode **tmp;
1761 operandStackSize += operandStackSize;
1762 tmp = realloc(operandStack, sizeof(RENode *) * operandStackSize);
1763 if (!tmp)
1764 goto out;
1765 operandStack = tmp;
1766 }
1767 operandStack[operandSP++] = operand;
1768 break;
1769 }
1770 }
1771
1772 /* At the end; process remaining operators. */
1773restartOperator:
1774 if (state->cp == state->cpend) {
1775 while (operatorSP) {
1776 --operatorSP;
1777 if (!ProcessOp(state, &operatorStack[operatorSP],
1778 operandStack, operandSP))
1779 goto out;
1780 --operandSP;
1781 }
1782 assert(operandSP == 1);
1783 state->result = operandStack[0];
1784 result = TRUE;
1785 goto out;
1786 }
1787
1788 switch (*state->cp) {
1789 case '|':
1790 /* Process any stacked 'concat' operators */
1791 ++state->cp;
1792 while (operatorSP &&
1793 operatorStack[operatorSP - 1].op == REOP_CONCAT) {
1794 --operatorSP;
1795 if (!ProcessOp(state, &operatorStack[operatorSP],
1796 operandStack, operandSP)) {
1797 goto out;
1798 }
1799 --operandSP;
1800 }
1801 op = REOP_ALT;
1802 goto pushOperator;
1803
1804 case ')':
1805 /*
1806 * If there's no stacked open parenthesis, throw syntax error.
1807 */
1808 for (i = operatorSP - 1; ; i--) {
1809 if (i < 0) {
1810 ReportRegExpError(state, JSREPORT_ERROR,
1811 JSMSG_UNMATCHED_RIGHT_PAREN);
1812 goto out;
1813 }
1814 if (operatorStack[i].op == REOP_ASSERT ||
1815 operatorStack[i].op == REOP_ASSERT_NOT ||
1816 operatorStack[i].op == REOP_LPARENNON ||
1817 operatorStack[i].op == REOP_LPAREN) {
1818 break;
1819 }
1820 }
1821 ++state->cp;
1822
1823 /* Process everything on the stack until the open parenthesis. */
1824 for (;;) {
1825 assert(operatorSP);
1826 --operatorSP;
1827 switch (operatorStack[operatorSP].op) {
1828 case REOP_ASSERT:
1829 case REOP_ASSERT_NOT:
1830 case REOP_LPAREN:
1831 operand = NewRENode(state, operatorStack[operatorSP].op);
1832 if (!operand)
1833 goto out;
1834 operand->u.parenIndex =
1835 operatorStack[operatorSP].parenIndex;
1836 assert(operandSP);
1837 operand->kid = operandStack[operandSP - 1];
1838 operandStack[operandSP - 1] = operand;
1839 if (state->treeDepth == TREE_DEPTH_MAX) {
1840 ReportRegExpError(state, JSREPORT_ERROR,
1841 JSMSG_REGEXP_TOO_COMPLEX);
1842 goto out;
1843 }
1844 ++state->treeDepth;
1845 /* FALL THROUGH */
1846
1847 case REOP_LPARENNON:
1848 state->result = operandStack[operandSP - 1];
1849 if (!ParseQuantifier(state))
1850 goto out;
1851 operandStack[operandSP - 1] = state->result;
1852 goto restartOperator;
1853 default:
1854 if (!ProcessOp(state, &operatorStack[operatorSP],
1855 operandStack, operandSP))
1856 goto out;
1857 --operandSP;
1858 break;
1859 }
1860 }
1861 break;
1862
1863 case '{':
1864 {
1865 const WCHAR *errp = state->cp;
1866
1867 if (ParseMinMaxQuantifier(state, TRUE) < 0) {
1868 /*
1869 * This didn't even scan correctly as a quantifier, so we should
1870 * treat it as flat.
1871 */
1872 op = REOP_CONCAT;
1873 goto pushOperator;
1874 }
1875
1876 state->cp = errp;
1877 /* FALL THROUGH */
1878 }
1879
1880 case '+':
1881 case '*':
1882 case '?':
1883 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
1884 state->cp);
1885 result = FALSE;
1886 goto out;
1887
1888 default:
1889 /* Anything else is the start of the next term. */
1890 op = REOP_CONCAT;
1891pushOperator:
1892 if (operatorSP == operatorStackSize) {
1893 REOpData *tmp;
1894 operatorStackSize += operatorStackSize;
1895 tmp = realloc(operatorStack, sizeof(REOpData) * operatorStackSize);
1896 if (!tmp)
1897 goto out;
1898 operatorStack = tmp;
1899 }
1900 operatorStack[operatorSP].op = op;
1901 operatorStack[operatorSP].errPos = state->cp;
1902 operatorStack[operatorSP++].parenIndex = parenIndex;
1903 break;
1904 }
1905 }
1906out:
1907 free(operatorStack);
1908 free(operandStack);
1909 return result;
1910}
#define realloc
Definition: debug_ros.c:6
static BOOL ParseTerm(CompilerState *state)
Definition: regexp.c:1339
#define INITIAL_STACK_SIZE
Definition: regexp.c:1645
static BOOL ParseQuantifier(CompilerState *state)
Definition: regexp.c:1215
static BOOL ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack, INT operandSP)
Definition: regexp.c:803
size_t parenIndex
Definition: regexp.c:265
union RENode::@451 u
const WCHAR * errPos
Definition: regexp.c:446
REOp op
Definition: regexp.c:445
size_t parenIndex
Definition: regexp.c:447
wchar_t tm const _CrtWcstime_Writes_and_advances_ptr_ count wchar_t ** out
Definition: wcsftime.cpp:383

Referenced by FindParenCount(), and regexp_new().

◆ ParseTerm()

static BOOL ParseTerm ( CompilerState state)
static

Definition at line 1339 of file regexp.c.

1340{
1341 WCHAR c = *state->cp++;
1342 UINT nDigits;
1343 UINT num, tmp, n, i;
1344 const WCHAR *termStart;
1345
1346 switch (c) {
1347 /* assertions and atoms */
1348 case '^':
1349 state->result = NewRENode(state, REOP_BOL);
1350 if (!state->result)
1351 return FALSE;
1352 state->progLength++;
1353 return TRUE;
1354 case '$':
1355 state->result = NewRENode(state, REOP_EOL);
1356 if (!state->result)
1357 return FALSE;
1358 state->progLength++;
1359 return TRUE;
1360 case '\\':
1361 if (state->cp >= state->cpend) {
1362 /* a trailing '\' is an error */
1363 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
1364 return FALSE;
1365 }
1366 c = *state->cp++;
1367 switch (c) {
1368 /* assertion escapes */
1369 case 'b' :
1370 state->result = NewRENode(state, REOP_WBDRY);
1371 if (!state->result)
1372 return FALSE;
1373 state->progLength++;
1374 return TRUE;
1375 case 'B':
1376 state->result = NewRENode(state, REOP_WNONBDRY);
1377 if (!state->result)
1378 return FALSE;
1379 state->progLength++;
1380 return TRUE;
1381 /* Decimal escape */
1382 case '0':
1383 /* Give a strict warning. See also the note below. */
1384 WARN("non-octal digit in an escape sequence that doesn't match a back-reference\n");
1385 doOctal:
1386 num = 0;
1387 while (state->cp < state->cpend) {
1388 c = *state->cp;
1389 if (c < '0' || '7' < c)
1390 break;
1391 state->cp++;
1392 tmp = 8 * num + (UINT)JS7_UNDEC(c);
1393 if (tmp > 0377)
1394 break;
1395 num = tmp;
1396 }
1397 c = (WCHAR)num;
1398 doFlat:
1399 state->result = NewRENode(state, REOP_FLAT);
1400 if (!state->result)
1401 return FALSE;
1402 state->result->u.flat.chr = c;
1403 state->result->u.flat.length = 1;
1404 state->progLength += 3;
1405 break;
1406 case '1':
1407 case '2':
1408 case '3':
1409 case '4':
1410 case '5':
1411 case '6':
1412 case '7':
1413 case '8':
1414 case '9':
1415 termStart = state->cp - 1;
1416 num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1417 if (state->flags & JSREG_FIND_PAREN_ERROR)
1418 return FALSE;
1419 if (num == OVERFLOW_VALUE) {
1420 /* Give a strict mode warning. */
1421 WARN("back-reference exceeds number of capturing parentheses\n");
1422
1423 /*
1424 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1425 * error here. However, for compatibility with IE, we treat the
1426 * whole backref as flat if the first character in it is not a
1427 * valid octal character, and as an octal escape otherwise.
1428 */
1429 state->cp = termStart;
1430 if (c >= '8') {
1431 /* Treat this as flat. termStart - 1 is the \. */
1432 c = '\\';
1433 goto asFlat;
1434 }
1435
1436 /* Treat this as an octal escape. */
1437 goto doOctal;
1438 }
1439 assert(1 <= num && num <= 0x10000);
1440 state->result = NewRENode(state, REOP_BACKREF);
1441 if (!state->result)
1442 return FALSE;
1443 state->result->u.parenIndex = num - 1;
1444 state->progLength
1445 += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1446 break;
1447 /* Control escape */
1448 case 'f':
1449 c = 0xC;
1450 goto doFlat;
1451 case 'n':
1452 c = 0xA;
1453 goto doFlat;
1454 case 'r':
1455 c = 0xD;
1456 goto doFlat;
1457 case 't':
1458 c = 0x9;
1459 goto doFlat;
1460 case 'v':
1461 c = 0xB;
1462 goto doFlat;
1463 /* Control letter */
1464 case 'c':
1465 if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
1466 c = (WCHAR) (*state->cp++ & 0x1F);
1467 } else {
1468 /* back off to accepting the original '\' as a literal */
1469 --state->cp;
1470 c = '\\';
1471 }
1472 goto doFlat;
1473 /* HexEscapeSequence */
1474 case 'x':
1475 nDigits = 2;
1476 goto lexHex;
1477 /* UnicodeEscapeSequence */
1478 case 'u':
1479 nDigits = 4;
1480lexHex:
1481 n = 0;
1482 for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1483 UINT digit;
1484 c = *state->cp++;
1485 if (!isASCIIHexDigit(c, &digit)) {
1486 /*
1487 * Back off to accepting the original 'u' or 'x' as a
1488 * literal.
1489 */
1490 state->cp -= i + 2;
1491 n = *state->cp++;
1492 break;
1493 }
1494 n = (n << 4) | digit;
1495 }
1496 c = (WCHAR) n;
1497 goto doFlat;
1498 /* Character class escapes */
1499 case 'd':
1500 state->result = NewRENode(state, REOP_DIGIT);
1501doSimple:
1502 if (!state->result)
1503 return FALSE;
1504 state->progLength++;
1505 break;
1506 case 'D':
1507 state->result = NewRENode(state, REOP_NONDIGIT);
1508 goto doSimple;
1509 case 's':
1510 state->result = NewRENode(state, REOP_SPACE);
1511 goto doSimple;
1512 case 'S':
1513 state->result = NewRENode(state, REOP_NONSPACE);
1514 goto doSimple;
1515 case 'w':
1516 state->result = NewRENode(state, REOP_ALNUM);
1517 goto doSimple;
1518 case 'W':
1519 state->result = NewRENode(state, REOP_NONALNUM);
1520 goto doSimple;
1521 /* IdentityEscape */
1522 default:
1523 state->result = NewRENode(state, REOP_FLAT);
1524 if (!state->result)
1525 return FALSE;
1526 state->result->u.flat.chr = c;
1527 state->result->u.flat.length = 1;
1528 state->result->kid = (void *) (state->cp - 1);
1529 state->progLength += 3;
1530 break;
1531 }
1532 break;
1533 case '[':
1534 state->result = NewRENode(state, REOP_CLASS);
1535 if (!state->result)
1536 return FALSE;
1537 termStart = state->cp;
1538 state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1539 for (;;) {
1540 if (state->cp == state->cpend) {
1541 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1542 JSMSG_UNTERM_CLASS, termStart);
1543
1544 return FALSE;
1545 }
1546 if (*state->cp == '\\') {
1547 state->cp++;
1548 if (state->cp != state->cpend)
1549 state->cp++;
1550 continue;
1551 }
1552 if (*state->cp == ']') {
1553 state->result->u.ucclass.kidlen = state->cp - termStart;
1554 break;
1555 }
1556 state->cp++;
1557 }
1558 for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1559 if (!state->classCache[i].start) {
1560 state->classCache[i].start = termStart;
1561 state->classCache[i].length = state->result->u.ucclass.kidlen;
1562 state->classCache[i].index = state->classCount;
1563 break;
1564 }
1565 if (state->classCache[i].length ==
1566 state->result->u.ucclass.kidlen) {
1567 for (n = 0; ; n++) {
1568 if (n == state->classCache[i].length) {
1569 state->result->u.ucclass.index
1570 = state->classCache[i].index;
1571 goto claim;
1572 }
1573 if (state->classCache[i].start[n] != termStart[n])
1574 break;
1575 }
1576 }
1577 }
1578 state->result->u.ucclass.index = state->classCount++;
1579
1580 claim:
1581 /*
1582 * Call CalculateBitmapSize now as we want any errors it finds
1583 * to be reported during the parse phase, not at execution.
1584 */
1585 if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1586 return FALSE;
1587 /*
1588 * Update classBitmapsMem with number of bytes to hold bmsize bits,
1589 * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1590 * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1591 */
1592 n = (state->result->u.ucclass.bmsize >> 3) + 1;
1593 if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1594 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1595 return FALSE;
1596 }
1597 state->classBitmapsMem += n;
1598 /* CLASS, <index> */
1599 state->progLength
1600 += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1601 break;
1602
1603 case '.':
1604 state->result = NewRENode(state, REOP_DOT);
1605 goto doSimple;
1606
1607 case '{':
1608 {
1609 const WCHAR *errp = state->cp--;
1610 INT err;
1611
1613 state->cp = errp;
1614
1615 if (err < 0)
1616 goto asFlat;
1617
1618 /* FALL THROUGH */
1619 }
1620 case '*':
1621 case '+':
1622 case '?':
1624 return FALSE;
1625 default:
1626asFlat:
1627 state->result = NewRENode(state, REOP_FLAT);
1628 if (!state->result)
1629 return FALSE;
1630 state->result->u.flat.chr = c;
1631 state->result->u.flat.length = 1;
1632 state->result->kid = (void *) (state->cp - 1);
1633 state->progLength += 3;
1634 break;
1635 }
1636 return ParseQuantifier(state);
1637}
#define WARN(fmt,...)
Definition: precomp.h:61
GLuint GLuint num
Definition: glext.h:9618
static UINT FindParenCount(CompilerState *state)
Definition: regexp.c:904
static BOOL CalculateBitmapSize(CompilerState *state, RENode *target, const WCHAR *src, const WCHAR *end)
Definition: regexp.c:967
#define CLASS_BITMAPS_MEM_LIMIT
Definition: regexp.c:351
#define SCRIPTLANGUAGEVERSION_ES5
Definition: jscript.h:53
#define JS_E_REGEXP_SYNTAX
Definition: jscript.h:564
#define JS_E_UNEXPECTED_QUANTIFIER
Definition: jscript.h:565

Referenced by ParseRegExp().

◆ ProcessCharSet()

static BOOL ProcessCharSet ( REGlobalData gData,
RECharSet charSet 
)
static

Definition at line 2084 of file regexp.c.

2085{
2086 const WCHAR *src, *end;
2087 BOOL inRange = FALSE;
2088 WCHAR rangeStart = 0;
2089 UINT byteLength, n;
2090 WCHAR c, thisCh;
2091 INT nDigits, i;
2092
2093 assert(!charSet->converted);
2094 /*
2095 * Assert that startIndex and length points to chars inside [] inside
2096 * source string.
2097 */
2098 assert(1 <= charSet->u.src.startIndex);
2099 assert(charSet->u.src.startIndex < gData->regexp->source_len);
2100 assert(charSet->u.src.length <= gData->regexp->source_len
2101 - 1 - charSet->u.src.startIndex);
2102
2103 charSet->converted = TRUE;
2104 src = gData->regexp->source + charSet->u.src.startIndex;
2105
2106 end = src + charSet->u.src.length;
2107
2108 assert(src[-1] == '[' && end[0] == ']');
2109
2110 byteLength = (charSet->length >> 3) + 1;
2111 charSet->u.bits = malloc(byteLength);
2112 if (!charSet->u.bits) {
2113 throw_error(gData->cx, E_OUTOFMEMORY, L"");
2114 gData->ok = FALSE;
2115 return FALSE;
2116 }
2117 memset(charSet->u.bits, 0, byteLength);
2118
2119 if (src == end)
2120 return TRUE;
2121
2122 if (*src == '^') {
2123 assert(charSet->sense == FALSE);
2124 ++src;
2125 } else {
2126 assert(charSet->sense == TRUE);
2127 }
2128
2129 while (src != end) {
2130 switch (*src) {
2131 case '\\':
2132 ++src;
2133 c = *src++;
2134 switch (c) {
2135 case 'b':
2136 thisCh = 0x8;
2137 break;
2138 case 'f':
2139 thisCh = 0xC;
2140 break;
2141 case 'n':
2142 thisCh = 0xA;
2143 break;
2144 case 'r':
2145 thisCh = 0xD;
2146 break;
2147 case 't':
2148 thisCh = 0x9;
2149 break;
2150 case 'v':
2151 thisCh = 0xB;
2152 break;
2153 case 'c':
2154 if (src < end && JS_ISWORD(*src)) {
2155 thisCh = (WCHAR)(*src++ & 0x1F);
2156 } else {
2157 --src;
2158 thisCh = '\\';
2159 }
2160 break;
2161 case 'x':
2162 nDigits = 2;
2163 goto lexHex;
2164 case 'u':
2165 nDigits = 4;
2166 lexHex:
2167 n = 0;
2168 for (i = 0; (i < nDigits) && (src < end); i++) {
2169 UINT digit;
2170 c = *src++;
2171 if (!isASCIIHexDigit(c, &digit)) {
2172 /*
2173 * Back off to accepting the original '\'
2174 * as a literal
2175 */
2176 src -= i + 1;
2177 n = '\\';
2178 break;
2179 }
2180 n = (n << 4) | digit;
2181 }
2182 thisCh = (WCHAR)n;
2183 break;
2184 case '0':
2185 case '1':
2186 case '2':
2187 case '3':
2188 case '4':
2189 case '5':
2190 case '6':
2191 case '7':
2192 /*
2193 * This is a non-ECMA extension - decimal escapes (in this
2194 * case, octal!) are supposed to be an error inside class
2195 * ranges, but supported here for backwards compatibility.
2196 */
2197 n = JS7_UNDEC(c);
2198 c = *src;
2199 if ('0' <= c && c <= '7') {
2200 src++;
2201 n = 8 * n + JS7_UNDEC(c);
2202 c = *src;
2203 if ('0' <= c && c <= '7') {
2204 src++;
2205 i = 8 * n + JS7_UNDEC(c);
2206 if (i <= 0377)
2207 n = i;
2208 else
2209 src--;
2210 }
2211 }
2212 thisCh = (WCHAR)n;
2213 break;
2214
2215 case 'd':
2216 AddCharacterRangeToCharSet(charSet, '0', '9');
2217 continue; /* don't need range processing */
2218 case 'D':
2219 AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2221 (WCHAR)('9' + 1),
2222 (WCHAR)charSet->length);
2223 continue;
2224 case 's':
2225 for (i = (INT)charSet->length; i >= 0; i--)
2226 if (iswspace(i))
2227 AddCharacterToCharSet(charSet, (WCHAR)i);
2228 continue;
2229 case 'S':
2230 for (i = (INT)charSet->length; i >= 0; i--)
2231 if (!iswspace(i))
2232 AddCharacterToCharSet(charSet, (WCHAR)i);
2233 continue;
2234 case 'w':
2235 for (i = (INT)charSet->length; i >= 0; i--)
2236 if (JS_ISWORD(i))
2237 AddCharacterToCharSet(charSet, (WCHAR)i);
2238 continue;
2239 case 'W':
2240 for (i = (INT)charSet->length; i >= 0; i--)
2241 if (!JS_ISWORD(i))
2242 AddCharacterToCharSet(charSet, (WCHAR)i);
2243 continue;
2244 default:
2245 thisCh = c;
2246 break;
2247
2248 }
2249 break;
2250
2251 default:
2252 thisCh = *src++;
2253 break;
2254
2255 }
2256 if (inRange) {
2257 if (gData->regexp->flags & REG_FOLD) {
2258 assert(rangeStart <= thisCh);
2259 for (i = rangeStart; i <= thisCh; i++) {
2260 WCHAR uch, dch;
2261
2262 AddCharacterToCharSet(charSet, i);
2263 uch = towupper(i);
2264 dch = towlower(i);
2265 if (i != uch)
2266 AddCharacterToCharSet(charSet, uch);
2267 if (i != dch)
2268 AddCharacterToCharSet(charSet, dch);
2269 }
2270 } else {
2271 AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2272 }
2273 inRange = FALSE;
2274 } else {
2275 if (gData->regexp->flags & REG_FOLD) {
2276 AddCharacterToCharSet(charSet, towupper(thisCh));
2277 AddCharacterToCharSet(charSet, towlower(thisCh));
2278 } else {
2279 AddCharacterToCharSet(charSet, thisCh);
2280 }
2281 if (src < end - 1) {
2282 if (*src == '-') {
2283 ++src;
2284 inRange = TRUE;
2285 rangeStart = thisCh;
2286 }
2287 }
2288 }
2289 }
2290 return TRUE;
2291}
static void AddCharacterToCharSet(RECharSet *cs, WCHAR c)
Definition: regexp.c:2050
#define JS_ISWORD(c)
Definition: regexp.c:87
static void AddCharacterRangeToCharSet(RECharSet *cs, UINT c1, UINT c2)
Definition: regexp.c:2060
#define iswspace(_c)
Definition: ctype.h:669
#define memset(x, y, z)
Definition: compat.h:39
const WCHAR * source
Definition: regexp.h:61
DWORD source_len
Definition: regexp.h:62

Referenced by ExecuteREBytecode(), and InitMatch().

◆ ProcessOp()

static BOOL ProcessOp ( CompilerState state,
REOpData opData,
RENode **  operandStack,
INT  operandSP 
)
static

Definition at line 803 of file regexp.c.

805{
806 RENode *result;
807
808 switch (opData->op) {
809 case REOP_ALT:
811 if (!result)
812 return FALSE;
813 result->kid = operandStack[operandSP - 2];
814 result->u.kid2 = operandStack[operandSP - 1];
815 operandStack[operandSP - 2] = result;
816
817 if (state->treeDepth == TREE_DEPTH_MAX) {
818 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
819 return FALSE;
820 }
821 ++state->treeDepth;
822
823 /*
824 * Look at both alternates to see if there's a FLAT or a CLASS at
825 * the start of each. If so, use a prerequisite match.
826 */
827 if (((RENode *) result->kid)->op == REOP_FLAT &&
828 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
829 (state->flags & REG_FOLD) == 0) {
831 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
832 result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
833 /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
834 JUMP, <end> ... ENDALT */
835 state->progLength += 13;
836 }
837 else
838 if (((RENode *) result->kid)->op == REOP_CLASS &&
839 ((RENode *) result->kid)->u.ucclass.index < 256 &&
840 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
841 (state->flags & REG_FOLD) == 0) {
843 result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
844 result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
845 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
846 JUMP, <end> ... ENDALT */
847 state->progLength += 13;
848 }
849 else
850 if (((RENode *) result->kid)->op == REOP_FLAT &&
851 ((RENode *) result->u.kid2)->op == REOP_CLASS &&
852 ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
853 (state->flags & REG_FOLD) == 0) {
855 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
856 result->u.altprereq.ch2 =
857 ((RENode *) result->u.kid2)->u.ucclass.index;
858 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
859 JUMP, <end> ... ENDALT */
860 state->progLength += 13;
861 }
862 else {
863 /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
864 state->progLength += 7;
865 }
866 break;
867
868 case REOP_CONCAT:
869 result = operandStack[operandSP - 2];
870 while (result->next)
871 result = result->next;
872 result->next = operandStack[operandSP - 1];
873 break;
874
875 case REOP_ASSERT:
876 case REOP_ASSERT_NOT:
877 case REOP_LPARENNON:
878 case REOP_LPAREN:
879 /* These should have been processed by a close paren. */
880 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
881 opData->errPos);
882 return FALSE;
883
884 default:;
885 }
886 return TRUE;
887}
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble * u
Definition: glfuncs.h:240
void * kid2
Definition: regexp.c:263

Referenced by ParseRegExp().

◆ PushBackTrackState()

static REBackTrackData * PushBackTrackState ( REGlobalData gData,
REOp  op,
jsbytecode target,
match_state_t x,
const WCHAR cp,
size_t  parenIndex,
size_t  parenCount 
)
static

Definition at line 1919 of file regexp.c.

1922{
1923 size_t i;
1925 (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
1926
1927 size_t sz = sizeof(REBackTrackData) +
1928 gData->stateStackTop * sizeof(REProgState) +
1929 parenCount * sizeof(RECapture);
1930
1931 ptrdiff_t btsize = gData->backTrackStackSize;
1932 ptrdiff_t btincr = ((char *)result + sz) -
1933 ((char *)gData->backTrackStack + btsize);
1934
1935 TRACE("\tBT_Push: %Iu,%Iu\n", (ULONG_PTR)parenIndex, (ULONG_PTR)parenCount);
1936
1937 JS_COUNT_OPERATION(gData->cx, JSOW_JUMP * (1 + parenCount));
1938 if (btincr > 0) {
1939 ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
1940
1941 JS_COUNT_OPERATION(gData->cx, JSOW_ALLOCATION);
1942 btincr = ((btincr+btsize-1)/btsize)*btsize;
1943 gData->backTrackStack = heap_pool_grow(gData->pool, gData->backTrackStack, btsize, btincr);
1944 if (!gData->backTrackStack) {
1945 throw_error(gData->cx, E_OUTOFMEMORY, L"");
1946 gData->ok = FALSE;
1947 return NULL;
1948 }
1949 gData->backTrackStackSize = btsize + btincr;
1950 result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
1951 }
1952 gData->backTrackSP = result;
1953 result->sz = gData->cursz;
1954 gData->cursz = sz;
1955
1956 result->backtrack_op = op;
1957 result->backtrack_pc = target;
1958 result->cp = cp;
1959 result->parenCount = parenCount;
1960 result->parenIndex = parenIndex;
1961
1962 result->saveStateStackTop = gData->stateStackTop;
1963 assert(gData->stateStackTop);
1964 memcpy(result + 1, gData->stateStack,
1965 sizeof(REProgState) * result->saveStateStackTop);
1966
1967 if (parenCount != 0) {
1968 memcpy((char *)(result + 1) +
1969 sizeof(REProgState) * result->saveStateStackTop,
1970 &x->parens[parenIndex],
1971 sizeof(RECapture) * parenCount);
1972 for (i = 0; i != parenCount; i++)
1973 x->parens[parenIndex + i].index = -1;
1974 }
1975
1976 return result;
1977}
GLintptr offset
Definition: glext.h:5920
#define JS_COUNT_OPERATION(a, b)
Definition: regexp.c:48
struct REBackTrackData REBackTrackData
void * heap_pool_grow(heap_pool_t *, void *, DWORD, DWORD)
Definition: jsutils.c:125

Referenced by ExecuteREBytecode().

◆ ReadCompactIndex()

static jsbytecode * ReadCompactIndex ( jsbytecode pc,
size_t result 
)
inlinestatic

Definition at line 383 of file regexp.c.

384{
385 size_t nextByte;
386
387 nextByte = *pc++;
388 if ((nextByte & 0x80) == 0) {
389 /*
390 * Short-circuit the most common case when compact index <= 127.
391 */
392 *result = nextByte;
393 } else {
394 size_t shift = 7;
395 *result = 0x7F & nextByte;
396 do {
397 nextByte = *pc++;
398 *result |= (nextByte & 0x7F) << shift;
399 shift += 7;
400 } while ((nextByte & 0x80) != 0);
401 }
402 return pc;
403}
#define shift
Definition: input.c:3280

Referenced by ExecuteREBytecode(), and SimpleMatch().

◆ ReallocStateStack()

static BOOL ReallocStateStack ( REGlobalData gData)
static

Definition at line 2294 of file regexp.c.

2295{
2296 size_t limit = gData->stateStackLimit;
2297 size_t sz = sizeof(REProgState) * limit;
2298
2299 gData->stateStack = heap_pool_grow(gData->pool, gData->stateStack, sz, sz);
2300 if (!gData->stateStack) {
2301 throw_error(gData->cx, E_OUTOFMEMORY, L"");
2302 gData->ok = FALSE;
2303 return FALSE;
2304 }
2305 gData->stateStackLimit = limit + limit;
2306 return TRUE;
2307}
GLint limit
Definition: glext.h:10326
struct REProgState REProgState

◆ regexp_destroy()

void regexp_destroy ( regexp_t re)

Definition at line 3177 of file regexp.c.

3178{
3179 if (re->classList) {
3180 UINT i;
3181 for (i = 0; i < re->classCount; i++) {
3182 if (re->classList[i].converted)
3183 free(re->classList[i].u.bits);
3184 re->classList[i].u.bits = NULL;
3185 }
3186 free(re->classList);
3187 }
3188 free(re);
3189}

Referenced by RegExp2_put_Pattern(), RegExp2_Release(), RegExp_destructor(), regexp_new(), and regexp_set_flags().

◆ regexp_execute()

HRESULT regexp_execute ( regexp_t regexp,
void cx,
heap_pool_t pool,
const WCHAR str,
DWORD  str_len,
match_state_t result 
)

Definition at line 3136 of file regexp.c.

3138{
3140 REGlobalData gData;
3142 const WCHAR *str_beg = result->cp;
3143 HRESULT hres;
3144
3145 assert(result->cp != NULL);
3146
3147 gData.cpbegin = str;
3148 gData.cpend = str+str_len;
3149 gData.start = result->cp-str;
3150 gData.skipped = 0;
3151 gData.pool = pool;
3152
3153 hres = InitMatch(regexp, cx, pool, &gData);
3154 if(FAILED(hres)) {
3155 WARN("InitMatch failed\n");
3156 heap_pool_clear(mark);
3157 return hres;
3158 }
3159
3160 res = MatchRegExp(&gData, result);
3161 heap_pool_clear(mark);
3162 if(!gData.ok) {
3163 WARN("MatchRegExp failed\n");
3164 return E_FAIL;
3165 }
3166
3167 if(!res) {
3168 result->match_len = 0;
3169 return S_FALSE;
3170 }
3171
3172 result->match_len = (result->cp-str_beg) - gData.skipped;
3173 result->paren_count = regexp->parenCount;
3174 return S_OK;
3175}
GLuint res
Definition: glext.h:9613
#define FAILED(hr)
Definition: intsafe.h:51
static HRESULT InitMatch(regexp_t *re, void *cx, heap_pool_t *pool, REGlobalData *gData)
Definition: regexp.c:3097
static match_state_t * MatchRegExp(REGlobalData *gData, match_state_t *x)
Definition: regexp.c:3070
void heap_pool_clear(heap_pool_t *)
Definition: jsutils.c:141
heap_pool_t * heap_pool_mark(heap_pool_t *)
Definition: jsutils.c:177
HRESULT hres
Definition: protocol.c:465
const WCHAR * str
size_t start
Definition: regexp.c:236
#define str_len
Definition: treelist.c:89
#define S_FALSE
Definition: winerror.h:3451

Referenced by do_regexp_match_next(), RegExp2_Execute(), RegExp2_Replace(), and RegExp2_Test().

◆ regexp_new()

regexp_t * regexp_new ( void cx,
heap_pool_t pool,
const WCHAR str,
DWORD  str_len,
WORD  flags,
BOOL  flat 
)

Definition at line 3191 of file regexp.c.

3193{
3194 regexp_t *re;
3195 heap_pool_t *mark;
3197 size_t resize;
3198 jsbytecode *endPC;
3199 UINT i;
3200 size_t len;
3201
3202 re = NULL;
3203 mark = heap_pool_mark(pool);
3204 len = str_len;
3205
3206 state.context = cx;
3207 state.pool = pool;
3208 state.cp = str;
3209 if (!state.cp)
3210 goto out;
3211 state.cpbegin = state.cp;
3212 state.cpend = state.cp + len;
3213 state.flags = flags;
3214 state.parenCount = 0;
3215 state.classCount = 0;
3216 state.progLength = 0;
3217 state.treeDepth = 0;
3218 state.classBitmapsMem = 0;
3219 for (i = 0; i < CLASS_CACHE_SIZE; i++)
3220 state.classCache[i].start = NULL;
3221
3222 if (len != 0 && flat) {
3223 state.result = NewRENode(&state, REOP_FLAT);
3224 if (!state.result)
3225 goto out;
3226 state.result->u.flat.chr = *state.cpbegin;
3227 state.result->u.flat.length = len;
3228 state.result->kid = (void *) state.cpbegin;
3229 /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
3230 state.progLength += 1 + GetCompactIndexWidth(0)
3232 } else {
3233 if (!ParseRegExp(&state))
3234 goto out;
3235 }
3236 resize = offsetof(regexp_t, program) + state.progLength + 1;
3237 re = malloc(resize);
3238 if (!re)
3239 goto out;
3240
3241 assert(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
3242 re->classCount = state.classCount;
3243 if (re->classCount) {
3244 re->classList = malloc(re->classCount * sizeof(RECharSet));
3245 if (!re->classList) {
3246 regexp_destroy(re);
3247 re = NULL;
3248 goto out;
3249 }
3250 for (i = 0; i < re->classCount; i++)
3251 re->classList[i].converted = FALSE;
3252 } else {
3253 re->classList = NULL;
3254 }
3255 endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
3256 if (!endPC) {
3257 regexp_destroy(re);
3258 re = NULL;
3259 goto out;
3260 }
3261 *endPC++ = REOP_END;
3262 /*
3263 * Check whether size was overestimated and shrink using realloc.
3264 * This is safe since no pointers to newly parsed regexp or its parts
3265 * besides re exist here.
3266 */
3267 if ((size_t)(endPC - re->program) != state.progLength + 1) {
3268 regexp_t *tmp;
3269 assert((size_t)(endPC - re->program) < state.progLength + 1);
3270 resize = offsetof(regexp_t, program) + (endPC - re->program);
3271 tmp = realloc(re, resize);
3272 if (tmp)
3273 re = tmp;
3274 }
3275
3276 re->flags = flags;
3277 re->parenCount = state.parenCount;
3278 re->source = str;
3279 re->source_len = str_len;
3280
3281out:
3282 heap_pool_clear(mark);
3283 return re;
3284}
GLuint program
Definition: glext.h:6723
GLbitfield flags
Definition: glext.h:7161
static jsbytecode * EmitREBytecode(CompilerState *state, regexp_t *re, size_t treeDepth, jsbytecode *pc, RENode *t)
Definition: regexp.c:473
void regexp_destroy(regexp_t *re)
Definition: regexp.c:3177
#define offsetof(TYPE, MEMBER)

Referenced by create_regexp(), RegExp2_Execute(), RegExp2_Replace(), RegExp2_Test(), and regexp_set_flags().

◆ SetForwardJumpOffset()

static BOOL SetForwardJumpOffset ( jsbytecode jump,
jsbytecode target 
)
static

Definition at line 454 of file regexp.c.

455{
456 ptrdiff_t offset = target - jump;
457
458 /* Check that target really points forward. */
459 assert(offset >= 2);
460 if ((size_t)offset > OFFSET_MAX)
461 return FALSE;
462
463 jump[0] = JUMP_OFFSET_HI(offset);
464 jump[1] = JUMP_OFFSET_LO(offset);
465 return TRUE;
466}

Referenced by EmitREBytecode().

◆ SimpleMatch()

static match_state_t * SimpleMatch ( REGlobalData gData,
match_state_t x,
REOp  op,
jsbytecode **  startpc,
BOOL  updatecp 
)
inlinestatic

gData->cx->regExpStatics.multiline && FIXME !!!

gData->cx->regExpStatics.multiline &&

Definition at line 2325 of file regexp.c.

2327{
2329 WCHAR matchCh;
2330 size_t parenIndex;
2331 size_t offset, length, index;
2332 jsbytecode *pc = *startpc; /* pc has already been incremented past op */
2333 const WCHAR *source;
2334 const WCHAR *startcp = x->cp;
2335 WCHAR ch;
2336 RECharSet *charSet;
2337
2338 const char *opname = reop_names[op];
2339 TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
2340 (int)gData->stateStackTop * 2, "", opname);
2341
2342 switch (op) {
2343 case REOP_EMPTY:
2344 result = x;
2345 break;
2346 case REOP_BOL:
2347 if (x->cp != gData->cpbegin) {
2348 if (
2349 !(gData->regexp->flags & REG_MULTILINE)) {
2350 break;
2351 }
2352 if (!RE_IS_LINE_TERM(x->cp[-1]))
2353 break;
2354 }
2355 result = x;
2356 break;
2357 case REOP_EOL:
2358 if (x->cp != gData->cpend) {
2359 if (
2360 !(gData->regexp->flags & REG_MULTILINE)) {
2361 break;
2362 }
2363 if (!RE_IS_LINE_TERM(*x->cp))
2364 break;
2365 }
2366 result = x;
2367 break;
2368 case REOP_WBDRY:
2369 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2370 !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2371 result = x;
2372 }
2373 break;
2374 case REOP_WNONBDRY:
2375 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2376 (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2377 result = x;
2378 }
2379 break;
2380 case REOP_DOT:
2381 if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
2382 result = x;
2383 result->cp++;
2384 }
2385 break;
2386 case REOP_DIGIT:
2387 if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) {
2388 result = x;
2389 result->cp++;
2390 }
2391 break;
2392 case REOP_NONDIGIT:
2393 if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) {
2394 result = x;
2395 result->cp++;
2396 }
2397 break;
2398 case REOP_ALNUM:
2399 if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
2400 result = x;
2401 result->cp++;
2402 }
2403 break;
2404 case REOP_NONALNUM:
2405 if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
2406 result = x;
2407 result->cp++;
2408 }
2409 break;
2410 case REOP_SPACE:
2411 if (x->cp != gData->cpend && iswspace(*x->cp)) {
2412 result = x;
2413 result->cp++;
2414 }
2415 break;
2416 case REOP_NONSPACE:
2417 if (x->cp != gData->cpend && !iswspace(*x->cp)) {
2418 result = x;
2419 result->cp++;
2420 }
2421 break;
2422 case REOP_BACKREF:
2423 pc = ReadCompactIndex(pc, &parenIndex);
2424 assert(parenIndex < gData->regexp->parenCount);
2425 result = BackrefMatcher(gData, x, parenIndex);
2426 break;
2427 case REOP_FLAT:
2428 pc = ReadCompactIndex(pc, &offset);
2429 assert(offset < gData->regexp->source_len);
2430 pc = ReadCompactIndex(pc, &length);
2431 assert(1 <= length);
2432 assert(length <= gData->regexp->source_len - offset);
2433 if (length <= (size_t)(gData->cpend - x->cp)) {
2434 source = gData->regexp->source + offset;
2435 TRACE("%s\n", debugstr_wn(source, length));
2436 for (index = 0; index != length; index++) {
2437 if (source[index] != x->cp[index])
2438 return NULL;
2439 }
2440 x->cp += length;
2441 result = x;
2442 }
2443 break;
2444 case REOP_FLAT1:
2445 matchCh = *pc++;
2446 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2447 if (x->cp != gData->cpend && *x->cp == matchCh) {
2448 result = x;
2449 result->cp++;
2450 }
2451 break;
2452 case REOP_FLATi:
2453 pc = ReadCompactIndex(pc, &offset);
2454 assert(offset < gData->regexp->source_len);
2455 pc = ReadCompactIndex(pc, &length);
2456 assert(1 <= length);
2457 assert(length <= gData->regexp->source_len - offset);
2458 source = gData->regexp->source;
2459 result = FlatNIMatcher(gData, x, source + offset, length);
2460 break;
2461 case REOP_FLAT1i:
2462 matchCh = *pc++;
2463 if (x->cp != gData->cpend && towupper(*x->cp) == towupper(matchCh)) {
2464 result = x;
2465 result->cp++;
2466 }
2467 break;
2468 case REOP_UCFLAT1:
2469 matchCh = GET_ARG(pc);
2470 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2471 pc += ARG_LEN;
2472 if (x->cp != gData->cpend && *x->cp == matchCh) {
2473 result = x;
2474 result->cp++;
2475 }
2476 break;
2477 case REOP_UCFLAT1i:
2478 matchCh = GET_ARG(pc);
2479 pc += ARG_LEN;
2480 if (x->cp != gData->cpend && towupper(*x->cp) == towupper(matchCh)) {
2481 result = x;
2482 result->cp++;
2483 }
2484 break;
2485 case REOP_CLASS:
2486 pc = ReadCompactIndex(pc, &index);
2487 assert(index < gData->regexp->classCount);
2488 if (x->cp != gData->cpend) {
2489 charSet = &gData->regexp->classList[index];
2490 assert(charSet->converted);
2491 ch = *x->cp;
2492 index = ch >> 3;
2493 if (charSet->length != 0 &&
2494 ch <= charSet->length &&
2495 (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2496 result = x;
2497 result->cp++;
2498 }
2499 }
2500 break;
2501 case REOP_NCLASS:
2502 pc = ReadCompactIndex(pc, &index);
2503 assert(index < gData->regexp->classCount);
2504 if (x->cp != gData->cpend) {
2505 charSet = &gData->regexp->classList[index];
2506 assert(charSet->converted);
2507 ch = *x->cp;
2508 index = ch >> 3;
2509 if (charSet->length == 0 ||
2510 ch > charSet->length ||
2511 !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2512 result = x;
2513 result->cp++;
2514 }
2515 }
2516 break;
2517
2518 default:
2519 assert(FALSE);
2520 }
2521 if (result) {
2522 if (!updatecp)
2523 x->cp = startcp;
2524 *startpc = pc;
2525 TRACE(" *\n");
2526 return result;
2527 }
2528 x->cp = startcp;
2529 return NULL;
2530}
#define index(s, c)
Definition: various.h:29
#define REG_MULTILINE
Definition: regexp.h:38
unsigned char ch[4][2]
Definition: console.c:118
static match_state_t * FlatNIMatcher(REGlobalData *gData, match_state_t *x, const WCHAR *matchChars, size_t length)
Definition: regexp.c:1980
#define RE_IS_LINE_TERM(c)
Definition: regexp.c:84
static match_state_t * BackrefMatcher(REGlobalData *gData, match_state_t *x, size_t parenIndex)
Definition: regexp.c:2019
#define debugstr_wn
Definition: kernel32.h:33

Referenced by ExecuteREBytecode().

◆ WINE_DEFAULT_DEBUG_CHANNEL()

WINE_DEFAULT_DEBUG_CHANNEL ( jscript  )

◆ WriteCompactIndex()

static jsbytecode * WriteCompactIndex ( jsbytecode pc,
size_t  index 
)
inlinestatic

Definition at line 370 of file regexp.c.

371{
372 size_t next;
373
374 while ((next = index >> 7) != 0) {
375 *pc++ = (jsbytecode)(index | 0x80);
376 index = next;
377 }
378 *pc++ = (jsbytecode)index;
379 return pc;
380}
static unsigned __int64 next
Definition: rand_nt.c:6

Referenced by EmitREBytecode().

Variable Documentation

◆ reop_names

const char* reop_names[]
static

Definition at line 147 of file regexp.c.

Referenced by ExecuteREBytecode(), and SimpleMatch().