ReactOS 0.4.16-dev-91-g764881a
regexp.c File Reference
#include <assert.h>
#include "jscript.h"
#include "regexp.h"
#include "wine/debug.h"
Include dependency graph for regexp.c:

Go to the source code of this file.

Classes

struct  RECharSet
 
struct  REProgState
 
struct  REBackTrackData
 
struct  REGlobalData
 
struct  RENode
 
struct  CompilerState
 
struct  EmitStateStackEntry
 
struct  REOpData
 

Macros

#define ReportRegExpError(a, b, c)
 
#define ReportRegExpErrorHelper(a, b, c, d)
 
#define JS_ReportErrorNumber(a, b, c, d)
 
#define JS_ReportErrorFlagsAndNumber(a, b, c, d, e, f)
 
#define js_ReportOutOfScriptQuota(a)
 
#define JS_ReportOutOfMemory(a)
 
#define JS_COUNT_OPERATION(a, b)
 
#define JSMSG_MIN_TOO_BIG   47
 
#define JSMSG_MAX_TOO_BIG   48
 
#define JSMSG_OUT_OF_ORDER   49
 
#define JSMSG_OUT_OF_MEMORY   137
 
#define LINE_SEPARATOR   0x2028
 
#define PARA_SEPARATOR   0x2029
 
#define RE_IS_LETTER(c)
 
#define RE_IS_LINE_TERM(c)
 
#define JS_ISWORD(c)   ((c) < 128 && (isalnum(c) || (c) == '_'))
 
#define JS7_ISDEC(c)   ((((unsigned)(c)) - '0') <= 9)
 
#define JS7_UNDEC(c)   ((c) - '0')
 
#define REOP_IS_SIMPLE(op)   ((op) <= REOP_NCLASS)
 
#define INITIAL_STATESTACK   100
 
#define INITIAL_BACKTRACK   8000
 
#define CLASS_CACHE_SIZE   4
 
#define ARG_LEN   2
 
#define GET_ARG(pc)   ((WORD)(((pc)[0] << 8) | (pc)[1]))
 
#define SET_ARG(pc, arg)
 
#define OFFSET_LEN   ARG_LEN
 
#define OFFSET_MAX   ((1 << (ARG_LEN * 8)) - 1)
 
#define GET_OFFSET(pc)   GET_ARG(pc)
 
#define TREE_DEPTH_MAX   ((1 << 24) / sizeof(EmitStateStackEntry))
 
#define CLASS_BITMAPS_MEM_LIMIT   (1 << 24)
 
#define JUMP_OFFSET_HI(off)   ((jsbytecode)((off) >> 8))
 
#define JUMP_OFFSET_LO(off)   ((jsbytecode)(off))
 
#define JSREG_FIND_PAREN_COUNT   0x8000
 
#define JSREG_FIND_PAREN_ERROR   0x4000
 
#define OVERFLOW_VALUE   ((UINT)-1)
 
#define INITIAL_STACK_SIZE   128
 
#define PUSH_STATE_STACK(data)
 
#define PREPARE_REPEAT()
 

Typedefs

typedef BYTE JSPackedBool
 
typedef struct RECharSet RECharSet
 
typedef enum REOp REOp
 
typedef struct REProgState REProgState
 
typedef struct REBackTrackData REBackTrackData
 
typedef struct REGlobalData REGlobalData
 
typedef struct RENode RENode
 
typedef struct CompilerState CompilerState
 
typedef struct EmitStateStackEntry EmitStateStackEntry
 

Enumerations

enum  REOp {
  REOP_EMPTY , REOP_BOL , REOP_EOL , REOP_WBDRY ,
  REOP_WNONBDRY , REOP_DOT , REOP_DIGIT , REOP_NONDIGIT ,
  REOP_ALNUM , REOP_NONALNUM , REOP_SPACE , REOP_NONSPACE ,
  REOP_BACKREF , REOP_FLAT , REOP_FLAT1 , REOP_FLATi ,
  REOP_FLAT1i , REOP_UCFLAT1 , REOP_UCFLAT1i , REOP_UCFLAT ,
  REOP_UCFLATi , REOP_CLASS , REOP_NCLASS , REOP_ALT ,
  REOP_QUANT , REOP_STAR , REOP_PLUS , REOP_OPT ,
  REOP_LPAREN , REOP_RPAREN , REOP_JUMP , REOP_DOTSTAR ,
  REOP_LPARENNON , REOP_ASSERT , REOP_ASSERT_NOT , REOP_ASSERTTEST ,
  REOP_ASSERTNOTTEST , REOP_MINIMALSTAR , REOP_MINIMALPLUS , REOP_MINIMALOPT ,
  REOP_MINIMALQUANT , REOP_ENDCHILD , REOP_REPEAT , REOP_MINIMALREPEAT ,
  REOP_ALTPREREQ , REOP_ALTPREREQ2 , REOP_ENDALT , REOP_CONCAT ,
  REOP_END , REOP_LIMIT , REOP_EMPTY , REOP_BOL ,
  REOP_EOL , REOP_WBDRY , REOP_WNONBDRY , REOP_DOT ,
  REOP_DIGIT , REOP_NONDIGIT , REOP_ALNUM , REOP_NONALNUM ,
  REOP_SPACE , REOP_NONSPACE , REOP_BACKREF , REOP_FLAT ,
  REOP_FLAT1 , REOP_FLATi , REOP_FLAT1i , REOP_UCFLAT1 ,
  REOP_UCFLAT1i , REOP_UCFLAT , REOP_UCFLATi , REOP_CLASS ,
  REOP_NCLASS , REOP_ALT , REOP_QUANT , REOP_STAR ,
  REOP_PLUS , REOP_OPT , REOP_LPAREN , REOP_RPAREN ,
  REOP_JUMP , REOP_DOTSTAR , REOP_LPARENNON , REOP_ASSERT ,
  REOP_ASSERT_NOT , REOP_ASSERTTEST , REOP_ASSERTNOTTEST , REOP_MINIMALSTAR ,
  REOP_MINIMALPLUS , REOP_MINIMALOPT , REOP_MINIMALQUANT , REOP_ENDCHILD ,
  REOP_REPEAT , REOP_MINIMALREPEAT , REOP_ALTPREREQ , REOP_ALTPREREQ2 ,
  REOP_ENDALT , REOP_CONCAT , REOP_END , REOP_LIMIT
}
 

Functions

 WINE_DEFAULT_DEBUG_CHANNEL (jscript)
 
static BOOL ParseRegExp (CompilerState *)
 
static size_t GetCompactIndexWidth (size_t index)
 
static jsbytecodeWriteCompactIndex (jsbytecode *pc, size_t index)
 
static jsbytecodeReadCompactIndex (jsbytecode *pc, size_t *result)
 
static RENodeNewRENode (CompilerState *state, REOp op)
 
static BOOL isASCIIHexDigit (WCHAR c, UINT *digit)
 
static BOOL SetForwardJumpOffset (jsbytecode *jump, jsbytecode *target)
 
static jsbytecodeEmitREBytecode (CompilerState *state, regexp_t *re, size_t treeDepth, jsbytecode *pc, RENode *t)
 
static BOOL ProcessOp (CompilerState *state, REOpData *opData, RENode **operandStack, INT operandSP)
 
static UINT FindParenCount (CompilerState *state)
 
static UINT GetDecimalValue (WCHAR c, UINT max, UINT(*findMax)(CompilerState *state), CompilerState *state)
 
static BOOL CalculateBitmapSize (CompilerState *state, RENode *target, const WCHAR *src, const WCHAR *end)
 
static INT ParseMinMaxQuantifier (CompilerState *state, BOOL ignoreValues)
 
static BOOL ParseQuantifier (CompilerState *state)
 
static BOOL ParseTerm (CompilerState *state)
 
static REBackTrackDataPushBackTrackState (REGlobalData *gData, REOp op, jsbytecode *target, match_state_t *x, const WCHAR *cp, size_t parenIndex, size_t parenCount)
 
static match_state_tFlatNIMatcher (REGlobalData *gData, match_state_t *x, const WCHAR *matchChars, size_t length)
 
static match_state_tBackrefMatcher (REGlobalData *gData, match_state_t *x, size_t parenIndex)
 
static void AddCharacterToCharSet (RECharSet *cs, WCHAR c)
 
static void AddCharacterRangeToCharSet (RECharSet *cs, UINT c1, UINT c2)
 
static BOOL ProcessCharSet (REGlobalData *gData, RECharSet *charSet)
 
static BOOL ReallocStateStack (REGlobalData *gData)
 
static match_state_tSimpleMatch (REGlobalData *gData, match_state_t *x, REOp op, jsbytecode **startpc, BOOL updatecp)
 
static match_state_tExecuteREBytecode (REGlobalData *gData, match_state_t *x)
 
static match_state_tMatchRegExp (REGlobalData *gData, match_state_t *x)
 
static HRESULT InitMatch (regexp_t *re, void *cx, heap_pool_t *pool, REGlobalData *gData)
 
HRESULT regexp_execute (regexp_t *regexp, void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_len, match_state_t *result)
 
void regexp_destroy (regexp_t *re)
 
regexp_tregexp_new (void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_len, WORD flags, BOOL flat)
 

Variables

static const charreop_names []
 

Macro Definition Documentation

◆ ARG_LEN

#define ARG_LEN   2

Definition at line 332 of file regexp.c.

◆ CLASS_BITMAPS_MEM_LIMIT

#define CLASS_BITMAPS_MEM_LIMIT   (1 << 24)

Definition at line 353 of file regexp.c.

◆ CLASS_CACHE_SIZE

#define CLASS_CACHE_SIZE   4

Definition at line 292 of file regexp.c.

◆ GET_ARG

#define GET_ARG (   pc)    ((WORD)(((pc)[0] << 8) | (pc)[1]))

Definition at line 333 of file regexp.c.

◆ GET_OFFSET

#define GET_OFFSET (   pc)    GET_ARG(pc)

Definition at line 339 of file regexp.c.

◆ INITIAL_BACKTRACK

#define INITIAL_BACKTRACK   8000

Definition at line 232 of file regexp.c.

◆ INITIAL_STACK_SIZE

#define INITIAL_STACK_SIZE   128

Definition at line 1648 of file regexp.c.

◆ INITIAL_STATESTACK

#define INITIAL_STATESTACK   100

Definition at line 231 of file regexp.c.

◆ JS7_ISDEC

#define JS7_ISDEC (   c)    ((((unsigned)(c)) - '0') <= 9)

Definition at line 91 of file regexp.c.

◆ JS7_UNDEC

#define JS7_UNDEC (   c)    ((c) - '0')

Definition at line 92 of file regexp.c.

◆ JS_COUNT_OPERATION

#define JS_COUNT_OPERATION (   a,
  b 
)

Definition at line 50 of file regexp.c.

◆ JS_ISWORD

#define JS_ISWORD (   c)    ((c) < 128 && (isalnum(c) || (c) == '_'))

Definition at line 89 of file regexp.c.

◆ JS_ReportErrorFlagsAndNumber

#define JS_ReportErrorFlagsAndNumber (   a,
  b,
  c,
  d,
  e,
  f 
)

Definition at line 47 of file regexp.c.

◆ JS_ReportErrorNumber

#define JS_ReportErrorNumber (   a,
  b,
  c,
  d 
)

Definition at line 46 of file regexp.c.

◆ JS_ReportOutOfMemory

#define JS_ReportOutOfMemory (   a)

Definition at line 49 of file regexp.c.

◆ js_ReportOutOfScriptQuota

#define js_ReportOutOfScriptQuota (   a)

Definition at line 48 of file regexp.c.

◆ JSMSG_MAX_TOO_BIG

#define JSMSG_MAX_TOO_BIG   48

Definition at line 77 of file regexp.c.

◆ JSMSG_MIN_TOO_BIG

#define JSMSG_MIN_TOO_BIG   47

Definition at line 76 of file regexp.c.

◆ JSMSG_OUT_OF_MEMORY

#define JSMSG_OUT_OF_MEMORY   137

Definition at line 79 of file regexp.c.

◆ JSMSG_OUT_OF_ORDER

#define JSMSG_OUT_OF_ORDER   49

Definition at line 78 of file regexp.c.

◆ JSREG_FIND_PAREN_COUNT

#define JSREG_FIND_PAREN_COUNT   0x8000

Definition at line 895 of file regexp.c.

◆ JSREG_FIND_PAREN_ERROR

#define JSREG_FIND_PAREN_ERROR   0x4000

Definition at line 896 of file regexp.c.

◆ JUMP_OFFSET_HI

#define JUMP_OFFSET_HI (   off)    ((jsbytecode)((off) >> 8))

Definition at line 452 of file regexp.c.

◆ JUMP_OFFSET_LO

#define JUMP_OFFSET_LO (   off)    ((jsbytecode)(off))

Definition at line 453 of file regexp.c.

◆ LINE_SEPARATOR

#define LINE_SEPARATOR   0x2028

Definition at line 81 of file regexp.c.

◆ OFFSET_LEN

#define OFFSET_LEN   ARG_LEN

Definition at line 337 of file regexp.c.

◆ OFFSET_MAX

#define OFFSET_MAX   ((1 << (ARG_LEN * 8)) - 1)

Definition at line 338 of file regexp.c.

◆ OVERFLOW_VALUE

#define OVERFLOW_VALUE   ((UINT)-1)

Definition at line 903 of file regexp.c.

◆ PARA_SEPARATOR

#define PARA_SEPARATOR   0x2029

Definition at line 82 of file regexp.c.

◆ PREPARE_REPEAT

#define PREPARE_REPEAT ( )
Value:
do { \
curState->index = x->cp - gData->cpbegin; \
curState->continue_op = REOP_MINIMALREPEAT; \
curState->continue_pc = pc; \
pc += ARG_LEN; \
for (k = curState->parenSoFar; k < parenSoFar; k++) \
x->parens[k].index = -1; \
PUSH_STATE_STACK(gData); \
op = (REOp) *pc++; \
assert(op < REOP_LIMIT); \
}while(0)
UINT op
Definition: effect.c:236
GLint GLint GLint GLint GLint x
Definition: gl.h:1548
REOp
Definition: regexp.c:94
@ REOP_MINIMALREPEAT
Definition: regexp.c:138
@ REOP_LIMIT
Definition: regexp.c:144
#define ARG_LEN
Definition: regexp.c:332
int k
Definition: mpi.c:3369

◆ PUSH_STATE_STACK

#define PUSH_STATE_STACK (   data)
Value:
do { \
++(data)->stateStackTop; \
if ((data)->stateStackTop == (data)->stateStackLimit && \
return NULL; \
} \
}while(0)
#define NULL
Definition: types.h:112
GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const GLvoid * data
Definition: gl.h:1950
static BOOL ReallocStateStack(REGlobalData *gData)
Definition: regexp.c:2297

Definition at line 2312 of file regexp.c.

◆ RE_IS_LETTER

#define RE_IS_LETTER (   c)
Value:
(((c >= 'A') && (c <= 'Z')) || \
((c >= 'a') && (c <= 'z')) )
const GLubyte * c
Definition: glext.h:8905

Definition at line 84 of file regexp.c.

◆ RE_IS_LINE_TERM

#define RE_IS_LINE_TERM (   c)
Value:
((c == '\n') || (c == '\r') || \
#define PARA_SEPARATOR
Definition: regexp.c:82
#define LINE_SEPARATOR
Definition: regexp.c:81

Definition at line 86 of file regexp.c.

◆ REOP_IS_SIMPLE

#define REOP_IS_SIMPLE (   op)    ((op) <= REOP_NCLASS)

Definition at line 147 of file regexp.c.

◆ ReportRegExpError

#define ReportRegExpError (   a,
  b,
  c 
)

Definition at line 44 of file regexp.c.

◆ ReportRegExpErrorHelper

#define ReportRegExpErrorHelper (   a,
  b,
  c,
  d 
)

Definition at line 45 of file regexp.c.

◆ SET_ARG

#define SET_ARG (   pc,
  arg 
)
Value:
((pc)[0] = (jsbytecode) ((arg) >> 8), \
(pc)[1] = (jsbytecode) (arg))
BYTE jsbytecode
Definition: regexp.h:54
void * arg
Definition: msvc.h:10

Definition at line 334 of file regexp.c.

◆ TREE_DEPTH_MAX

#define TREE_DEPTH_MAX   ((1 << 24) / sizeof(EmitStateStackEntry))

Definition at line 347 of file regexp.c.

Typedef Documentation

◆ CompilerState

◆ EmitStateStackEntry

◆ JSPackedBool

typedef BYTE JSPackedBool

Definition at line 53 of file regexp.c.

◆ REBackTrackData

◆ RECharSet

◆ REGlobalData

◆ RENode

typedef struct RENode RENode

Definition at line 259 of file regexp.c.

◆ REOp

typedef enum REOp REOp

◆ REProgState

Enumeration Type Documentation

◆ REOp

Enumerator
REOP_EMPTY 
REOP_BOL 
REOP_EOL 
REOP_WBDRY 
REOP_WNONBDRY 
REOP_DOT 
REOP_DIGIT 
REOP_NONDIGIT 
REOP_ALNUM 
REOP_NONALNUM 
REOP_SPACE 
REOP_NONSPACE 
REOP_BACKREF 
REOP_FLAT 
REOP_FLAT1 
REOP_FLATi 
REOP_FLAT1i 
REOP_UCFLAT1 
REOP_UCFLAT1i 
REOP_UCFLAT 
REOP_UCFLATi 
REOP_CLASS 
REOP_NCLASS 
REOP_ALT 
REOP_QUANT 
REOP_STAR 
REOP_PLUS 
REOP_OPT 
REOP_LPAREN 
REOP_RPAREN 
REOP_JUMP 
REOP_DOTSTAR 
REOP_LPARENNON 
REOP_ASSERT 
REOP_ASSERT_NOT 
REOP_ASSERTTEST 
REOP_ASSERTNOTTEST 
REOP_MINIMALSTAR 
REOP_MINIMALPLUS 
REOP_MINIMALOPT 
REOP_MINIMALQUANT 
REOP_ENDCHILD 
REOP_REPEAT 
REOP_MINIMALREPEAT 
REOP_ALTPREREQ 
REOP_ALTPREREQ2 
REOP_ENDALT 
REOP_CONCAT 
REOP_END 
REOP_LIMIT 
REOP_EMPTY 
REOP_BOL 
REOP_EOL 
REOP_WBDRY 
REOP_WNONBDRY 
REOP_DOT 
REOP_DIGIT 
REOP_NONDIGIT 
REOP_ALNUM 
REOP_NONALNUM 
REOP_SPACE 
REOP_NONSPACE 
REOP_BACKREF 
REOP_FLAT 
REOP_FLAT1 
REOP_FLATi 
REOP_FLAT1i 
REOP_UCFLAT1 
REOP_UCFLAT1i 
REOP_UCFLAT 
REOP_UCFLATi 
REOP_CLASS 
REOP_NCLASS 
REOP_ALT 
REOP_QUANT 
REOP_STAR 
REOP_PLUS 
REOP_OPT 
REOP_LPAREN 
REOP_RPAREN 
REOP_JUMP 
REOP_DOTSTAR 
REOP_LPARENNON 
REOP_ASSERT 
REOP_ASSERT_NOT 
REOP_ASSERTTEST 
REOP_ASSERTNOTTEST 
REOP_MINIMALSTAR 
REOP_MINIMALPLUS 
REOP_MINIMALOPT 
REOP_MINIMALQUANT 
REOP_ENDCHILD 
REOP_REPEAT 
REOP_MINIMALREPEAT 
REOP_ALTPREREQ 
REOP_ALTPREREQ2 
REOP_ENDALT 
REOP_CONCAT 
REOP_END 
REOP_LIMIT 

Definition at line 94 of file regexp.c.

94 {
100 REOP_DOT,
108 REOP_FLAT,
118 REOP_ALT,
120 REOP_STAR,
121 REOP_PLUS,
122 REOP_OPT,
125 REOP_JUMP,
143 REOP_END,
144 REOP_LIMIT /* META: no operator >= to this */
145} REOp;
@ REOP_LPAREN
Definition: regexp.c:123
@ REOP_SPACE
Definition: regexp.c:105
@ REOP_WBDRY
Definition: regexp.c:98
@ REOP_UCFLAT
Definition: regexp.c:114
@ REOP_ALTPREREQ
Definition: regexp.c:139
@ REOP_REPEAT
Definition: regexp.c:137
@ REOP_QUANT
Definition: regexp.c:119
@ REOP_UCFLAT1i
Definition: regexp.c:113
@ REOP_ASSERTTEST
Definition: regexp.c:130
@ REOP_EMPTY
Definition: regexp.c:95
@ REOP_OPT
Definition: regexp.c:122
@ REOP_DOTSTAR
Definition: regexp.c:126
@ REOP_FLAT1i
Definition: regexp.c:111
@ REOP_MINIMALPLUS
Definition: regexp.c:133
@ REOP_JUMP
Definition: regexp.c:125
@ REOP_ALT
Definition: regexp.c:118
@ REOP_NONSPACE
Definition: regexp.c:106
@ REOP_DOT
Definition: regexp.c:100
@ REOP_BACKREF
Definition: regexp.c:107
@ REOP_BOL
Definition: regexp.c:96
@ REOP_ENDCHILD
Definition: regexp.c:136
@ REOP_NCLASS
Definition: regexp.c:117
@ REOP_ASSERT
Definition: regexp.c:128
@ REOP_ALNUM
Definition: regexp.c:103
@ REOP_WNONBDRY
Definition: regexp.c:99
@ REOP_MINIMALOPT
Definition: regexp.c:134
@ REOP_END
Definition: regexp.c:143
@ REOP_UCFLATi
Definition: regexp.c:115
@ REOP_CLASS
Definition: regexp.c:116
@ REOP_MINIMALQUANT
Definition: regexp.c:135
@ REOP_RPAREN
Definition: regexp.c:124
@ REOP_DIGIT
Definition: regexp.c:101
@ REOP_NONDIGIT
Definition: regexp.c:102
@ REOP_ENDALT
Definition: regexp.c:141
@ REOP_ALTPREREQ2
Definition: regexp.c:140
@ REOP_FLAT1
Definition: regexp.c:109
@ REOP_ASSERTNOTTEST
Definition: regexp.c:131
@ REOP_EOL
Definition: regexp.c:97
@ REOP_ASSERT_NOT
Definition: regexp.c:129
@ REOP_FLATi
Definition: regexp.c:110
@ REOP_PLUS
Definition: regexp.c:121
@ REOP_CONCAT
Definition: regexp.c:142
@ REOP_FLAT
Definition: regexp.c:108
@ REOP_MINIMALSTAR
Definition: regexp.c:132
@ REOP_STAR
Definition: regexp.c:120
@ REOP_NONALNUM
Definition: regexp.c:104
@ REOP_LPARENNON
Definition: regexp.c:127
@ REOP_UCFLAT1
Definition: regexp.c:112

Function Documentation

◆ AddCharacterRangeToCharSet()

static void AddCharacterRangeToCharSet ( RECharSet cs,
UINT  c1,
UINT  c2 
)
static

Definition at line 2063 of file regexp.c.

2064{
2065 UINT i;
2066
2067 UINT byteIndex1 = c1 >> 3;
2068 UINT byteIndex2 = c2 >> 3;
2069
2070 assert(c2 <= cs->length && c1 <= c2);
2071
2072 c1 &= 0x7;
2073 c2 &= 0x7;
2074
2075 if (byteIndex1 == byteIndex2) {
2076 cs->u.bits[byteIndex1] |= ((BYTE)0xFF >> (7 - (c2 - c1))) << c1;
2077 } else {
2078 cs->u.bits[byteIndex1] |= 0xFF << c1;
2079 for (i = byteIndex1 + 1; i < byteIndex2; i++)
2080 cs->u.bits[i] = 0xFF;
2081 cs->u.bits[byteIndex2] |= (BYTE)0xFF >> (7 - c2);
2082 }
2083}
#define assert(x)
Definition: debug.h:53
GLuint GLsizei GLsizei * length
Definition: glext.h:6040
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
#define cs
Definition: i386-dis.c:442
unsigned int UINT
Definition: ndis.h:50
unsigned char BYTE
Definition: xxhash.c:193

Referenced by ProcessCharSet().

◆ AddCharacterToCharSet()

static void AddCharacterToCharSet ( RECharSet cs,
WCHAR  c 
)
static

Definition at line 2053 of file regexp.c.

2054{
2055 UINT byteIndex = (UINT)(c >> 3);
2056 assert(c <= cs->length);
2057 cs->u.bits[byteIndex] |= 1 << (c & 0x7);
2058}

Referenced by ProcessCharSet().

◆ BackrefMatcher()

static match_state_t * BackrefMatcher ( REGlobalData gData,
match_state_t x,
size_t  parenIndex 
)
static

Definition at line 2022 of file regexp.c.

2023{
2024 size_t len, i;
2025 const WCHAR *parenContent;
2026 RECapture *cap = &x->parens[parenIndex];
2027
2028 if (cap->index == -1)
2029 return x;
2030
2031 len = cap->length;
2032 if (x->cp + len > gData->cpend)
2033 return NULL;
2034
2035 parenContent = &gData->cpbegin[cap->index];
2036 if (gData->regexp->flags & REG_FOLD) {
2037 for (i = 0; i < len; i++) {
2038 if (towupper(parenContent[i]) != towupper(x->cp[i]))
2039 return NULL;
2040 }
2041 } else {
2042 for (i = 0; i < len; i++) {
2043 if (parenContent[i] != x->cp[i])
2044 return NULL;
2045 }
2046 }
2047 x->cp += len;
2048 return x;
2049}
GLenum GLsizei len
Definition: glext.h:6722
GLenum cap
Definition: glext.h:9639
#define REG_FOLD
Definition: regexp.h:36
const WCHAR * cpbegin
Definition: regexp.c:240
const WCHAR * cpend
Definition: regexp.c:241
regexp_t * regexp
Definition: regexp.c:236
WORD flags
Definition: regexp.h:57
#define towupper(c)
Definition: wctype.h:99
__wchar_t WCHAR
Definition: xmlstorage.h:180

Referenced by SimpleMatch().

◆ CalculateBitmapSize()

static BOOL CalculateBitmapSize ( CompilerState state,
RENode target,
const WCHAR src,
const WCHAR end 
)
static

Definition at line 969 of file regexp.c.

971{
972 UINT max = 0;
973 BOOL inRange = FALSE;
974 WCHAR c, rangeStart = 0;
975 UINT n, digit, nDigits, i;
976
977 target->u.ucclass.bmsize = 0;
978 target->u.ucclass.sense = TRUE;
979
980 if (src == end)
981 return TRUE;
982
983 if (*src == '^') {
984 ++src;
985 target->u.ucclass.sense = FALSE;
986 }
987
988 while (src != end) {
989 BOOL canStartRange = TRUE;
990 UINT localMax = 0;
991
992 switch (*src) {
993 case '\\':
994 ++src;
995 c = *src++;
996 switch (c) {
997 case 'b':
998 localMax = 0x8;
999 break;
1000 case 'f':
1001 localMax = 0xC;
1002 break;
1003 case 'n':
1004 localMax = 0xA;
1005 break;
1006 case 'r':
1007 localMax = 0xD;
1008 break;
1009 case 't':
1010 localMax = 0x9;
1011 break;
1012 case 'v':
1013 localMax = 0xB;
1014 break;
1015 case 'c':
1016 if (src < end && RE_IS_LETTER(*src)) {
1017 localMax = (UINT) (*src++) & 0x1F;
1018 } else {
1019 --src;
1020 localMax = '\\';
1021 }
1022 break;
1023 case 'x':
1024 nDigits = 2;
1025 goto lexHex;
1026 case 'u':
1027 nDigits = 4;
1028lexHex:
1029 n = 0;
1030 for (i = 0; (i < nDigits) && (src < end); i++) {
1031 c = *src++;
1032 if (!isASCIIHexDigit(c, &digit)) {
1033 /*
1034 * Back off to accepting the original
1035 *'\' as a literal.
1036 */
1037 src -= i + 1;
1038 n = '\\';
1039 break;
1040 }
1041 n = (n << 4) | digit;
1042 }
1043 localMax = n;
1044 break;
1045 case 'd':
1046 canStartRange = FALSE;
1047 if (inRange) {
1048 JS_ReportErrorNumber(state->context,
1049 js_GetErrorMessage, NULL,
1050 JSMSG_BAD_CLASS_RANGE);
1051 return FALSE;
1052 }
1053 localMax = '9';
1054 break;
1055 case 'D':
1056 case 's':
1057 case 'S':
1058 case 'w':
1059 case 'W':
1060 canStartRange = FALSE;
1061 if (inRange) {
1062 JS_ReportErrorNumber(state->context,
1063 js_GetErrorMessage, NULL,
1064 JSMSG_BAD_CLASS_RANGE);
1065 return FALSE;
1066 }
1067 max = 65535;
1068
1069 /*
1070 * If this is the start of a range, ensure that it's less than
1071 * the end.
1072 */
1073 localMax = 0;
1074 break;
1075 case '0':
1076 case '1':
1077 case '2':
1078 case '3':
1079 case '4':
1080 case '5':
1081 case '6':
1082 case '7':
1083 /*
1084 * This is a non-ECMA extension - decimal escapes (in this
1085 * case, octal!) are supposed to be an error inside class
1086 * ranges, but supported here for backwards compatibility.
1087 *
1088 */
1089 n = JS7_UNDEC(c);
1090 c = *src;
1091 if ('0' <= c && c <= '7') {
1092 src++;
1093 n = 8 * n + JS7_UNDEC(c);
1094 c = *src;
1095 if ('0' <= c && c <= '7') {
1096 src++;
1097 i = 8 * n + JS7_UNDEC(c);
1098 if (i <= 0377)
1099 n = i;
1100 else
1101 src--;
1102 }
1103 }
1104 localMax = n;
1105 break;
1106
1107 default:
1108 localMax = c;
1109 break;
1110 }
1111 break;
1112 default:
1113 localMax = *src++;
1114 break;
1115 }
1116
1117 if (inRange) {
1118 /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1119 if (rangeStart > localMax) {
1120 JS_ReportErrorNumber(state->context,
1121 js_GetErrorMessage, NULL,
1122 JSMSG_BAD_CLASS_RANGE);
1123 return FALSE;
1124 }
1125 inRange = FALSE;
1126 } else {
1127 if (canStartRange && src < end - 1) {
1128 if (*src == '-') {
1129 ++src;
1130 inRange = TRUE;
1131 rangeStart = (WCHAR)localMax;
1132 continue;
1133 }
1134 }
1135 if (state->flags & REG_FOLD)
1136 rangeStart = localMax; /* one run of the uc/dc loop below */
1137 }
1138
1139 if (state->flags & REG_FOLD) {
1140 WCHAR maxch = localMax;
1141
1142 for (i = rangeStart; i <= localMax; i++) {
1143 WCHAR uch, dch;
1144
1145 uch = towupper(i);
1146 dch = towlower(i);
1147 if(maxch < uch)
1148 maxch = uch;
1149 if(maxch < dch)
1150 maxch = dch;
1151 }
1152 localMax = maxch;
1153 }
1154
1155 if (localMax > max)
1156 max = localMax;
1157 }
1158 target->u.ucclass.bmsize = max;
1159 return TRUE;
1160}
static int state
Definition: maze.c:121
#define TRUE
Definition: types.h:120
#define FALSE
Definition: types.h:117
unsigned char uch
Definition: zlib.h:47
unsigned int BOOL
Definition: ntddk_ex.h:94
GLuint GLuint end
Definition: gl.h:1545
GLdouble n
Definition: glext.h:7729
GLenum src
Definition: glext.h:6340
GLenum target
Definition: glext.h:7315
#define JS7_UNDEC(c)
Definition: regexp.c:92
#define RE_IS_LETTER(c)
Definition: regexp.c:84
static BOOL isASCIIHexDigit(WCHAR c, UINT *digit)
Definition: regexp.c:428
#define JS_ReportErrorNumber(a, b, c, d)
Definition: regexp.c:46
#define c
Definition: ke_i.h:80
#define max(a, b)
Definition: svc.c:63
#define towlower(c)
Definition: wctype.h:97

Referenced by ParseTerm().

◆ EmitREBytecode()

static jsbytecode * EmitREBytecode ( CompilerState state,
regexp_t re,
size_t  treeDepth,
jsbytecode pc,
RENode t 
)
static

Definition at line 475 of file regexp.c.

477{
478 EmitStateStackEntry *emitStateSP, *emitStateStack;
479 RECharSet *charSet;
480 REOp op;
481
482 if (treeDepth == 0) {
483 emitStateStack = NULL;
484 } else {
485 emitStateStack = heap_alloc(sizeof(EmitStateStackEntry) * treeDepth);
486 if (!emitStateStack)
487 return NULL;
488 }
489 emitStateSP = emitStateStack;
490 op = t->op;
492
493 for (;;) {
494 *pc++ = op;
495 switch (op) {
496 case REOP_EMPTY:
497 --pc;
498 break;
499
500 case REOP_ALTPREREQ2:
501 case REOP_ALTPREREQ:
502 assert(emitStateSP);
503 emitStateSP->altHead = pc - 1;
504 emitStateSP->endTermFixup = pc;
505 pc += OFFSET_LEN;
506 SET_ARG(pc, t->u.altprereq.ch1);
507 pc += ARG_LEN;
508 SET_ARG(pc, t->u.altprereq.ch2);
509 pc += ARG_LEN;
510
511 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
512 pc += OFFSET_LEN;
513
514 emitStateSP->continueNode = t;
515 emitStateSP->continueOp = REOP_JUMP;
516 emitStateSP->jumpToJumpFlag = FALSE;
517 ++emitStateSP;
518 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
519 t = t->kid;
520 op = t->op;
522 continue;
523
524 case REOP_JUMP:
525 emitStateSP->nextTermFixup = pc; /* offset to following term */
526 pc += OFFSET_LEN;
527 if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
528 goto jump_too_big;
529 emitStateSP->continueOp = REOP_ENDALT;
530 ++emitStateSP;
531 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
532 t = t->u.kid2;
533 op = t->op;
535 continue;
536
537 case REOP_ENDALT:
538 /*
539 * If we already patched emitStateSP->nextTermFixup to jump to
540 * a nearer jump, to avoid 16-bit immediate offset overflow, we
541 * are done here.
542 */
543 if (emitStateSP->jumpToJumpFlag)
544 break;
545
546 /*
547 * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
548 * REOP_ENDALT is executed only on successful match of the last
549 * alternate in a group.
550 */
551 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
552 goto jump_too_big;
553 if (t->op != REOP_ALT) {
554 if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
555 goto jump_too_big;
556 }
557
558 /*
559 * If the program is bigger than the REOP_JUMP offset range, then
560 * we must check for alternates before this one that are part of
561 * the same group, and fix up their jump offsets to target jumps
562 * close enough to fit in a 16-bit unsigned offset immediate.
563 */
564 if ((size_t)(pc - re->program) > OFFSET_MAX &&
565 emitStateSP > emitStateStack) {
566 EmitStateStackEntry *esp, *esp2;
567 jsbytecode *alt, *jump;
569
570 esp2 = emitStateSP;
571 alt = esp2->altHead;
572 for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
573 if (esp->continueOp == REOP_ENDALT &&
574 !esp->jumpToJumpFlag &&
575 esp->nextTermFixup + OFFSET_LEN == alt &&
576 (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
577 ? esp->endTermFixup
578 : esp->nextTermFixup)) > OFFSET_MAX) {
579 alt = esp->altHead;
580 jump = esp->nextTermFixup;
581
582 /*
583 * The span must be 1 less than the distance from
584 * jump offset to jump offset, so we actually jump
585 * to a REOP_JUMP bytecode, not to its offset!
586 */
587 for (;;) {
588 assert(jump < esp2->nextTermFixup);
589 span = esp2->nextTermFixup - jump - 1;
590 if ((size_t)span <= OFFSET_MAX)
591 break;
592 do {
593 if (--esp2 == esp)
594 goto jump_too_big;
595 } while (esp2->continueOp != REOP_ENDALT);
596 }
597
598 jump[0] = JUMP_OFFSET_HI(span);
599 jump[1] = JUMP_OFFSET_LO(span);
600
601 if (esp->continueNode->op != REOP_ALT) {
602 /*
603 * We must patch the offset at esp->endTermFixup
604 * as well, for the REOP_ALTPREREQ{,2} opcodes.
605 * If we're unlucky and endTermFixup is more than
606 * OFFSET_MAX bytes from its target, we cheat by
607 * jumping 6 bytes to the jump whose offset is at
608 * esp->nextTermFixup, which has the same target.
609 */
610 jump = esp->endTermFixup;
611 header = esp->nextTermFixup - jump;
612 span += header;
613 if ((size_t)span > OFFSET_MAX)
614 span = header;
615
616 jump[0] = JUMP_OFFSET_HI(span);
617 jump[1] = JUMP_OFFSET_LO(span);
618 }
619
620 esp->jumpToJumpFlag = TRUE;
621 }
622 }
623 }
624 break;
625
626 case REOP_ALT:
627 assert(emitStateSP);
628 emitStateSP->altHead = pc - 1;
629 emitStateSP->nextAltFixup = pc; /* offset to next alternate */
630 pc += OFFSET_LEN;
631 emitStateSP->continueNode = t;
632 emitStateSP->continueOp = REOP_JUMP;
633 emitStateSP->jumpToJumpFlag = FALSE;
634 ++emitStateSP;
635 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
636 t = t->kid;
637 op = t->op;
639 continue;
640
641 case REOP_FLAT:
642 /*
643 * Coalesce FLATs if possible and if it would not increase bytecode
644 * beyond preallocated limit. The latter happens only when bytecode
645 * size for coalesced string with offset p and length 2 exceeds 6
646 * bytes preallocated for 2 single char nodes, i.e. when
647 * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
648 * GetCompactIndexWidth(p) > 4.
649 * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
650 * nodes strictly decreases bytecode size, the check has to be
651 * done only for the first coalescing.
652 */
653 if (t->kid &&
654 GetCompactIndexWidth((WCHAR*)t->kid - state->cpbegin) <= 4)
655 {
656 while (t->next &&
657 t->next->op == REOP_FLAT &&
658 (WCHAR*)t->kid + t->u.flat.length ==
659 t->next->kid) {
660 t->u.flat.length += t->next->u.flat.length;
661 t->next = t->next->next;
662 }
663 }
664 if (t->kid && t->u.flat.length > 1) {
665 pc[-1] = (state->flags & REG_FOLD) ? REOP_FLATi : REOP_FLAT;
666 pc = WriteCompactIndex(pc, (WCHAR*)t->kid - state->cpbegin);
667 pc = WriteCompactIndex(pc, t->u.flat.length);
668 } else if (t->u.flat.chr < 256) {
669 pc[-1] = (state->flags & REG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
670 *pc++ = (jsbytecode) t->u.flat.chr;
671 } else {
672 pc[-1] = (state->flags & REG_FOLD)
674 : REOP_UCFLAT1;
675 SET_ARG(pc, t->u.flat.chr);
676 pc += ARG_LEN;
677 }
678 break;
679
680 case REOP_LPAREN:
681 assert(emitStateSP);
682 pc = WriteCompactIndex(pc, t->u.parenIndex);
683 emitStateSP->continueNode = t;
684 emitStateSP->continueOp = REOP_RPAREN;
685 ++emitStateSP;
686 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
687 t = t->kid;
688 op = t->op;
689 continue;
690
691 case REOP_RPAREN:
692 pc = WriteCompactIndex(pc, t->u.parenIndex);
693 break;
694
695 case REOP_BACKREF:
696 pc = WriteCompactIndex(pc, t->u.parenIndex);
697 break;
698
699 case REOP_ASSERT:
700 assert(emitStateSP);
701 emitStateSP->nextTermFixup = pc;
702 pc += OFFSET_LEN;
703 emitStateSP->continueNode = t;
704 emitStateSP->continueOp = REOP_ASSERTTEST;
705 ++emitStateSP;
706 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
707 t = t->kid;
708 op = t->op;
709 continue;
710
711 case REOP_ASSERTTEST:
713 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
714 goto jump_too_big;
715 break;
716
717 case REOP_ASSERT_NOT:
718 assert(emitStateSP);
719 emitStateSP->nextTermFixup = pc;
720 pc += OFFSET_LEN;
721 emitStateSP->continueNode = t;
722 emitStateSP->continueOp = REOP_ASSERTNOTTEST;
723 ++emitStateSP;
724 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
725 t = t->kid;
726 op = t->op;
727 continue;
728
729 case REOP_QUANT:
730 assert(emitStateSP);
731 if (t->u.range.min == 0 && t->u.range.max == (UINT)-1) {
732 pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
733 } else if (t->u.range.min == 0 && t->u.range.max == 1) {
734 pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
735 } else if (t->u.range.min == 1 && t->u.range.max == (UINT) -1) {
736 pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
737 } else {
738 if (!t->u.range.greedy)
739 pc[-1] = REOP_MINIMALQUANT;
740 pc = WriteCompactIndex(pc, t->u.range.min);
741 /*
742 * Write max + 1 to avoid using size_t(max) + 1 bytes
743 * for (UINT)-1 sentinel.
744 */
745 pc = WriteCompactIndex(pc, t->u.range.max + 1);
746 }
747 emitStateSP->nextTermFixup = pc;
748 pc += OFFSET_LEN;
749 emitStateSP->continueNode = t;
750 emitStateSP->continueOp = REOP_ENDCHILD;
751 ++emitStateSP;
752 assert((size_t)(emitStateSP - emitStateStack) <= treeDepth);
753 t = t->kid;
754 op = t->op;
755 continue;
756
757 case REOP_ENDCHILD:
758 if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
759 goto jump_too_big;
760 break;
761
762 case REOP_CLASS:
763 if (!t->u.ucclass.sense)
764 pc[-1] = REOP_NCLASS;
765 pc = WriteCompactIndex(pc, t->u.ucclass.index);
766 charSet = &re->classList[t->u.ucclass.index];
767 charSet->converted = FALSE;
768 charSet->length = t->u.ucclass.bmsize;
769 charSet->u.src.startIndex = t->u.ucclass.startIndex;
770 charSet->u.src.length = t->u.ucclass.kidlen;
771 charSet->sense = t->u.ucclass.sense;
772 break;
773
774 default:
775 break;
776 }
777
778 t = t->next;
779 if (t) {
780 op = t->op;
781 } else {
782 if (emitStateSP == emitStateStack)
783 break;
784 --emitStateSP;
785 t = emitStateSP->continueNode;
786 op = (REOp) emitStateSP->continueOp;
787 }
788 }
789
790 cleanup:
791 heap_free(emitStateStack);
792 return pc;
793
794 jump_too_big:
795 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
796 pc = NULL;
797 goto cleanup;
798}
static void * heap_alloc(size_t len)
Definition: appwiz.h:66
static BOOL heap_free(void *mem)
Definition: appwiz.h:76
static void cleanup(void)
Definition: main.c:1335
__kernel_ptrdiff_t ptrdiff_t
Definition: linux.h:247
GLdouble GLdouble t
Definition: gl.h:2047
GLenum GLenum GLvoid GLvoid GLvoid * span
Definition: glext.h:5664
static size_t GetCompactIndexWidth(size_t index)
Definition: regexp.c:363
#define ReportRegExpError(a, b, c)
Definition: regexp.c:44
#define JUMP_OFFSET_LO(off)
Definition: regexp.c:453
#define OFFSET_LEN
Definition: regexp.c:337
#define JUMP_OFFSET_HI(off)
Definition: regexp.c:452
#define OFFSET_MAX
Definition: regexp.c:338
#define SET_ARG(pc, arg)
Definition: regexp.c:334
static BOOL SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
Definition: regexp.c:456
static jsbytecode * WriteCompactIndex(jsbytecode *pc, size_t index)
Definition: regexp.c:372
RENode * continueNode
Definition: regexp.c:321
jsbytecode * endTermFixup
Definition: regexp.c:320
jsbytecode continueOp
Definition: regexp.c:322
jsbytecode * nextAltFixup
Definition: regexp.c:318
jsbytecode * altHead
Definition: regexp.c:317
jsbytecode * nextTermFixup
Definition: regexp.c:319
JSPackedBool jumpToJumpFlag
Definition: regexp.c:323
WORD length
Definition: regexp.c:66
struct RECharSet::@447::@448 src
JSPackedBool converted
Definition: regexp.c:64
union RECharSet::@447 u
JSPackedBool sense
Definition: regexp.c:65
REOp op
Definition: regexp.c:261
jsbytecode program[1]
Definition: regexp.h:63
struct RECharSet * classList
Definition: regexp.h:60

Referenced by regexp_new().

◆ ExecuteREBytecode()

static match_state_t * ExecuteREBytecode ( REGlobalData gData,
match_state_t x 
)
inlinestatic

Definition at line 2536 of file regexp.c.

2537{
2539 REBackTrackData *backTrackData;
2540 jsbytecode *nextpc, *testpc;
2541 REOp nextop;
2542 RECapture *cap;
2543 REProgState *curState;
2544 const WCHAR *startcp;
2545 size_t parenIndex, k;
2546 size_t parenSoFar = 0;
2547
2548 WCHAR matchCh1, matchCh2;
2549 RECharSet *charSet;
2550
2551 BOOL anchor;
2552 jsbytecode *pc = gData->regexp->program;
2553 REOp op = (REOp) *pc++;
2554
2555 /*
2556 * If the first node is a simple match, step the index into the string
2557 * until that match is made, or fail if it can't be found at all.
2558 */
2559 if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & REG_STICKY)) {
2560 anchor = FALSE;
2561 while (x->cp <= gData->cpend) {
2562 nextpc = pc; /* reset back to start each time */
2563 result = SimpleMatch(gData, x, op, &nextpc, TRUE);
2564 if (result) {
2565 anchor = TRUE;
2566 x = result;
2567 pc = nextpc; /* accept skip to next opcode */
2568 op = (REOp) *pc++;
2569 assert(op < REOP_LIMIT);
2570 break;
2571 }
2572 gData->skipped++;
2573 x->cp++;
2574 }
2575 if (!anchor)
2576 goto bad;
2577 }
2578
2579 for (;;) {
2580 const char *opname = reop_names[op];
2581 TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
2582 (int)gData->stateStackTop * 2, "", opname);
2583
2584 if (REOP_IS_SIMPLE(op)) {
2585 result = SimpleMatch(gData, x, op, &pc, TRUE);
2586 } else {
2587 curState = &gData->stateStack[gData->stateStackTop];
2588 switch (op) {
2589 case REOP_END:
2590 goto good;
2591 case REOP_ALTPREREQ2:
2592 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2593 pc += ARG_LEN;
2594 matchCh2 = GET_ARG(pc);
2595 pc += ARG_LEN;
2596 k = GET_ARG(pc);
2597 pc += ARG_LEN;
2598
2599 if (x->cp != gData->cpend) {
2600 if (*x->cp == matchCh2)
2601 goto doAlt;
2602
2603 charSet = &gData->regexp->classList[k];
2604 if (!charSet->converted && !ProcessCharSet(gData, charSet))
2605 goto bad;
2606 matchCh1 = *x->cp;
2607 k = matchCh1 >> 3;
2608 if ((charSet->length == 0 ||
2609 matchCh1 > charSet->length ||
2610 !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
2611 charSet->sense) {
2612 goto doAlt;
2613 }
2614 }
2615 result = NULL;
2616 break;
2617
2618 case REOP_ALTPREREQ:
2619 nextpc = pc + GET_OFFSET(pc); /* start of next op */
2620 pc += ARG_LEN;
2621 matchCh1 = GET_ARG(pc);
2622 pc += ARG_LEN;
2623 matchCh2 = GET_ARG(pc);
2624 pc += ARG_LEN;
2625 if (x->cp == gData->cpend ||
2626 (*x->cp != matchCh1 && *x->cp != matchCh2)) {
2627 result = NULL;
2628 break;
2629 }
2630 /* else fall through... */
2631
2632 case REOP_ALT:
2633 doAlt:
2634 nextpc = pc + GET_OFFSET(pc); /* start of next alternate */
2635 pc += ARG_LEN; /* start of this alternate */
2636 curState->parenSoFar = parenSoFar;
2637 PUSH_STATE_STACK(gData);
2638 op = (REOp) *pc++;
2639 startcp = x->cp;
2640 if (REOP_IS_SIMPLE(op)) {
2641 if (!SimpleMatch(gData, x, op, &pc, TRUE)) {
2642 op = (REOp) *nextpc++;
2643 pc = nextpc;
2644 continue;
2645 }
2646 result = x;
2647 op = (REOp) *pc++;
2648 }
2649 nextop = (REOp) *nextpc++;
2650 if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
2651 goto bad;
2652 continue;
2653
2654 /*
2655 * Occurs at (successful) end of REOP_ALT,
2656 */
2657 case REOP_JUMP:
2658 /*
2659 * If we have not gotten a result here, it is because of an
2660 * empty match. Do the same thing REOP_EMPTY would do.
2661 */
2662 if (!result)
2663 result = x;
2664
2665 --gData->stateStackTop;
2666 pc += GET_OFFSET(pc);
2667 op = (REOp) *pc++;
2668 continue;
2669
2670 /*
2671 * Occurs at last (successful) end of REOP_ALT,
2672 */
2673 case REOP_ENDALT:
2674 /*
2675 * If we have not gotten a result here, it is because of an
2676 * empty match. Do the same thing REOP_EMPTY would do.
2677 */
2678 if (!result)
2679 result = x;
2680
2681 --gData->stateStackTop;
2682 op = (REOp) *pc++;
2683 continue;
2684
2685 case REOP_LPAREN:
2686 pc = ReadCompactIndex(pc, &parenIndex);
2687 TRACE("[ %lu ]\n", (ULONG_PTR)parenIndex);
2688 assert(parenIndex < gData->regexp->parenCount);
2689 if (parenIndex + 1 > parenSoFar)
2690 parenSoFar = parenIndex + 1;
2691 x->parens[parenIndex].index = x->cp - gData->cpbegin;
2692 x->parens[parenIndex].length = 0;
2693 op = (REOp) *pc++;
2694 continue;
2695
2696 case REOP_RPAREN:
2697 {
2698 ptrdiff_t delta;
2699
2700 pc = ReadCompactIndex(pc, &parenIndex);
2701 assert(parenIndex < gData->regexp->parenCount);
2702 cap = &x->parens[parenIndex];
2703 delta = x->cp - (gData->cpbegin + cap->index);
2704 cap->length = (delta < 0) ? 0 : (size_t) delta;
2705 op = (REOp) *pc++;
2706
2707 if (!result)
2708 result = x;
2709 continue;
2710 }
2711 case REOP_ASSERT:
2712 nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */
2713 pc += ARG_LEN; /* start of ASSERT child */
2714 op = (REOp) *pc++;
2715 testpc = pc;
2716 if (REOP_IS_SIMPLE(op) &&
2717 !SimpleMatch(gData, x, op, &testpc, FALSE)) {
2718 result = NULL;
2719 break;
2720 }
2721 curState->u.assertion.top =
2722 (char *)gData->backTrackSP - (char *)gData->backTrackStack;
2723 curState->u.assertion.sz = gData->cursz;
2724 curState->index = x->cp - gData->cpbegin;
2725 curState->parenSoFar = parenSoFar;
2726 PUSH_STATE_STACK(gData);
2728 nextpc, x, x->cp, 0, 0)) {
2729 goto bad;
2730 }
2731 continue;
2732
2733 case REOP_ASSERT_NOT:
2734 nextpc = pc + GET_OFFSET(pc);
2735 pc += ARG_LEN;
2736 op = (REOp) *pc++;
2737 testpc = pc;
2738 if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
2739 SimpleMatch(gData, x, op, &testpc, FALSE) &&
2740 *testpc == REOP_ASSERTNOTTEST) {
2741 result = NULL;
2742 break;
2743 }
2744 curState->u.assertion.top
2745 = (char *)gData->backTrackSP -
2746 (char *)gData->backTrackStack;
2747 curState->u.assertion.sz = gData->cursz;
2748 curState->index = x->cp - gData->cpbegin;
2749 curState->parenSoFar = parenSoFar;
2750 PUSH_STATE_STACK(gData);
2752 nextpc, x, x->cp, 0, 0)) {
2753 goto bad;
2754 }
2755 continue;
2756
2757 case REOP_ASSERTTEST:
2758 --gData->stateStackTop;
2759 --curState;
2760 x->cp = gData->cpbegin + curState->index;
2761 gData->backTrackSP =
2762 (REBackTrackData *) ((char *)gData->backTrackStack +
2763 curState->u.assertion.top);
2764 gData->cursz = curState->u.assertion.sz;
2765 if (result)
2766 result = x;
2767 break;
2768
2769 case REOP_ASSERTNOTTEST:
2770 --gData->stateStackTop;
2771 --curState;
2772 x->cp = gData->cpbegin + curState->index;
2773 gData->backTrackSP =
2774 (REBackTrackData *) ((char *)gData->backTrackStack +
2775 curState->u.assertion.top);
2776 gData->cursz = curState->u.assertion.sz;
2777 result = (!result) ? x : NULL;
2778 break;
2779 case REOP_STAR:
2780 curState->u.quantifier.min = 0;
2781 curState->u.quantifier.max = (UINT)-1;
2782 goto quantcommon;
2783 case REOP_PLUS:
2784 curState->u.quantifier.min = 1;
2785 curState->u.quantifier.max = (UINT)-1;
2786 goto quantcommon;
2787 case REOP_OPT:
2788 curState->u.quantifier.min = 0;
2789 curState->u.quantifier.max = 1;
2790 goto quantcommon;
2791 case REOP_QUANT:
2792 pc = ReadCompactIndex(pc, &k);
2793 curState->u.quantifier.min = k;
2794 pc = ReadCompactIndex(pc, &k);
2795 /* max is k - 1 to use one byte for (UINT)-1 sentinel. */
2796 curState->u.quantifier.max = k - 1;
2797 assert(curState->u.quantifier.min <= curState->u.quantifier.max);
2798 quantcommon:
2799 if (curState->u.quantifier.max == 0) {
2800 pc = pc + GET_OFFSET(pc);
2801 op = (REOp) *pc++;
2802 result = x;
2803 continue;
2804 }
2805 /* Step over <next> */
2806 nextpc = pc + ARG_LEN;
2807 op = (REOp) *nextpc++;
2808 startcp = x->cp;
2809 if (REOP_IS_SIMPLE(op)) {
2810 if (!SimpleMatch(gData, x, op, &nextpc, TRUE)) {
2811 if (curState->u.quantifier.min == 0)
2812 result = x;
2813 else
2814 result = NULL;
2815 pc = pc + GET_OFFSET(pc);
2816 break;
2817 }
2818 op = (REOp) *nextpc++;
2819 result = x;
2820 }
2821 curState->index = startcp - gData->cpbegin;
2822 curState->continue_op = REOP_REPEAT;
2823 curState->continue_pc = pc;
2824 curState->parenSoFar = parenSoFar;
2825 PUSH_STATE_STACK(gData);
2826 if (curState->u.quantifier.min == 0 &&
2827 !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
2828 0, 0)) {
2829 goto bad;
2830 }
2831 pc = nextpc;
2832 continue;
2833
2834 case REOP_ENDCHILD: /* marks the end of a quantifier child */
2835 pc = curState[-1].continue_pc;
2836 op = (REOp) curState[-1].continue_op;
2837
2838 if (!result)
2839 result = x;
2840 continue;
2841
2842 case REOP_REPEAT:
2843 --curState;
2844 do {
2845 --gData->stateStackTop;
2846 if (!result) {
2847 /* Failed, see if we have enough children. */
2848 if (curState->u.quantifier.min == 0)
2849 goto repeatDone;
2850 goto break_switch;
2851 }
2852 if (curState->u.quantifier.min == 0 &&
2853 x->cp == gData->cpbegin + curState->index) {
2854 /* matched an empty string, that'll get us nowhere */
2855 result = NULL;
2856 goto break_switch;
2857 }
2858 if (curState->u.quantifier.min != 0)
2859 curState->u.quantifier.min--;
2860 if (curState->u.quantifier.max != (UINT) -1)
2861 curState->u.quantifier.max--;
2862 if (curState->u.quantifier.max == 0)
2863 goto repeatDone;
2864 nextpc = pc + ARG_LEN;
2865 nextop = (REOp) *nextpc;
2866 startcp = x->cp;
2867 if (REOP_IS_SIMPLE(nextop)) {
2868 nextpc++;
2869 if (!SimpleMatch(gData, x, nextop, &nextpc, TRUE)) {
2870 if (curState->u.quantifier.min == 0)
2871 goto repeatDone;
2872 result = NULL;
2873 goto break_switch;
2874 }
2875 result = x;
2876 }
2877 curState->index = startcp - gData->cpbegin;
2878 PUSH_STATE_STACK(gData);
2879 if (curState->u.quantifier.min == 0 &&
2881 pc, x, startcp,
2882 curState->parenSoFar,
2883 parenSoFar -
2884 curState->parenSoFar)) {
2885 goto bad;
2886 }
2887 } while (*nextpc == REOP_ENDCHILD);
2888 pc = nextpc;
2889 op = (REOp) *pc++;
2890 parenSoFar = curState->parenSoFar;
2891 continue;
2892
2893 repeatDone:
2894 result = x;
2895 pc += GET_OFFSET(pc);
2896 goto break_switch;
2897
2898 case REOP_MINIMALSTAR:
2899 curState->u.quantifier.min = 0;
2900 curState->u.quantifier.max = (UINT)-1;
2901 goto minimalquantcommon;
2902 case REOP_MINIMALPLUS:
2903 curState->u.quantifier.min = 1;
2904 curState->u.quantifier.max = (UINT)-1;
2905 goto minimalquantcommon;
2906 case REOP_MINIMALOPT:
2907 curState->u.quantifier.min = 0;
2908 curState->u.quantifier.max = 1;
2909 goto minimalquantcommon;
2910 case REOP_MINIMALQUANT:
2911 pc = ReadCompactIndex(pc, &k);
2912 curState->u.quantifier.min = k;
2913 pc = ReadCompactIndex(pc, &k);
2914 /* See REOP_QUANT comments about k - 1. */
2915 curState->u.quantifier.max = k - 1;
2916 assert(curState->u.quantifier.min
2917 <= curState->u.quantifier.max);
2918 minimalquantcommon:
2919 curState->index = x->cp - gData->cpbegin;
2920 curState->parenSoFar = parenSoFar;
2921 PUSH_STATE_STACK(gData);
2922 if (curState->u.quantifier.min != 0) {
2923 curState->continue_op = REOP_MINIMALREPEAT;
2924 curState->continue_pc = pc;
2925 /* step over <next> */
2926 pc += OFFSET_LEN;
2927 op = (REOp) *pc++;
2928 } else {
2930 pc, x, x->cp, 0, 0)) {
2931 goto bad;
2932 }
2933 --gData->stateStackTop;
2934 pc = pc + GET_OFFSET(pc);
2935 op = (REOp) *pc++;
2936 }
2937 continue;
2938
2939 case REOP_MINIMALREPEAT:
2940 --gData->stateStackTop;
2941 --curState;
2942
2943 TRACE("{%d,%d}\n", curState->u.quantifier.min, curState->u.quantifier.max);
2944#define PREPARE_REPEAT() \
2945 do { \
2946 curState->index = x->cp - gData->cpbegin; \
2947 curState->continue_op = REOP_MINIMALREPEAT; \
2948 curState->continue_pc = pc; \
2949 pc += ARG_LEN; \
2950 for (k = curState->parenSoFar; k < parenSoFar; k++) \
2951 x->parens[k].index = -1; \
2952 PUSH_STATE_STACK(gData); \
2953 op = (REOp) *pc++; \
2954 assert(op < REOP_LIMIT); \
2955 }while(0)
2956
2957 if (!result) {
2958 TRACE(" -\n");
2959 /*
2960 * Non-greedy failure - try to consume another child.
2961 */
2962 if (curState->u.quantifier.max == (UINT) -1 ||
2963 curState->u.quantifier.max > 0) {
2965 continue;
2966 }
2967 /* Don't need to adjust pc since we're going to pop. */
2968 break;
2969 }
2970 if (curState->u.quantifier.min == 0 &&
2971 x->cp == gData->cpbegin + curState->index) {
2972 /* Matched an empty string, that'll get us nowhere. */
2973 result = NULL;
2974 break;
2975 }
2976 if (curState->u.quantifier.min != 0)
2977 curState->u.quantifier.min--;
2978 if (curState->u.quantifier.max != (UINT) -1)
2979 curState->u.quantifier.max--;
2980 if (curState->u.quantifier.min != 0) {
2982 continue;
2983 }
2984 curState->index = x->cp - gData->cpbegin;
2985 curState->parenSoFar = parenSoFar;
2986 PUSH_STATE_STACK(gData);
2988 pc, x, x->cp,
2989 curState->parenSoFar,
2990 parenSoFar - curState->parenSoFar)) {
2991 goto bad;
2992 }
2993 --gData->stateStackTop;
2994 pc = pc + GET_OFFSET(pc);
2995 op = (REOp) *pc++;
2996 assert(op < REOP_LIMIT);
2997 continue;
2998 default:
2999 assert(FALSE);
3000 result = NULL;
3001 }
3002 break_switch:;
3003 }
3004
3005 /*
3006 * If the match failed and there's a backtrack option, take it.
3007 * Otherwise this is a complete and utter failure.
3008 */
3009 if (!result) {
3010 if (gData->cursz == 0)
3011 return NULL;
3012
3013 /* Potentially detect explosive regex here. */
3014 gData->backTrackCount++;
3015 if (gData->backTrackLimit &&
3016 gData->backTrackCount >= gData->backTrackLimit) {
3017 JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL,
3018 JSMSG_REGEXP_TOO_COMPLEX);
3019 gData->ok = FALSE;
3020 return NULL;
3021 }
3022
3023 backTrackData = gData->backTrackSP;
3024 gData->cursz = backTrackData->sz;
3025 gData->backTrackSP =
3026 (REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
3027 x->cp = backTrackData->cp;
3028 pc = backTrackData->backtrack_pc;
3029 op = (REOp) backTrackData->backtrack_op;
3030 assert(op < REOP_LIMIT);
3031 gData->stateStackTop = backTrackData->saveStateStackTop;
3032 assert(gData->stateStackTop);
3033
3034 memcpy(gData->stateStack, backTrackData + 1,
3035 sizeof(REProgState) * backTrackData->saveStateStackTop);
3036 curState = &gData->stateStack[gData->stateStackTop - 1];
3037
3038 if (backTrackData->parenCount) {
3039 memcpy(&x->parens[backTrackData->parenIndex],
3040 (char *)(backTrackData + 1) +
3041 sizeof(REProgState) * backTrackData->saveStateStackTop,
3042 sizeof(RECapture) * backTrackData->parenCount);
3043 parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
3044 } else {
3045 for (k = curState->parenSoFar; k < parenSoFar; k++)
3046 x->parens[k].index = -1;
3047 parenSoFar = curState->parenSoFar;
3048 }
3049
3050 TRACE("\tBT_Pop: %ld,%ld\n",
3051 (ULONG_PTR)backTrackData->parenIndex,
3052 (ULONG_PTR)backTrackData->parenCount);
3053 continue;
3054 }
3055 x = result;
3056
3057 /*
3058 * Continue with the expression.
3059 */
3060 op = (REOp)*pc++;
3061 assert(op < REOP_LIMIT);
3062 }
3063
3064bad:
3065 TRACE("\n");
3066 return NULL;
3067
3068good:
3069 TRACE("\n");
3070 return x;
3071}
GLuint64EXT * result
Definition: glext.h:11304
static REBackTrackData * PushBackTrackState(REGlobalData *gData, REOp op, jsbytecode *target, match_state_t *x, const WCHAR *cp, size_t parenIndex, size_t parenCount)
Definition: regexp.c:1922
static jsbytecode * ReadCompactIndex(jsbytecode *pc, size_t *result)
Definition: regexp.c:385
static BOOL ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
Definition: regexp.c:2087
#define REOP_IS_SIMPLE(op)
Definition: regexp.c:147
#define GET_ARG(pc)
Definition: regexp.c:333
#define GET_OFFSET(pc)
Definition: regexp.c:339
#define PREPARE_REPEAT()
#define PUSH_STATE_STACK(data)
Definition: regexp.c:2312
static match_state_t * SimpleMatch(REGlobalData *gData, match_state_t *x, REOp op, jsbytecode **startpc, BOOL updatecp)
Definition: regexp.c:2328
static const char * reop_names[]
Definition: regexp.c:149
#define REG_STICKY
Definition: regexp.h:39
if(dx< 0)
Definition: linetemp.h:194
#define memcpy(s1, s2, n)
Definition: mkisofs.h:878
#define cap
Definition: glfuncs.h:226
#define TRACE(s)
Definition: solgame.cpp:4
jsbytecode * backtrack_pc
Definition: regexp.c:221
size_t parenCount
Definition: regexp.c:225
size_t sz
Definition: regexp.c:220
const WCHAR * cp
Definition: regexp.c:223
size_t parenIndex
Definition: regexp.c:224
jsbytecode backtrack_op
Definition: regexp.c:222
size_t saveStateStackTop
Definition: regexp.c:226
BYTE * bits
Definition: regexp.c:68
size_t backTrackLimit
Definition: regexp.c:252
REProgState * stateStack
Definition: regexp.c:243
REBackTrackData * backTrackSP
Definition: regexp.c:248
size_t cursz
Definition: regexp.c:250
REBackTrackData * backTrackStack
Definition: regexp.c:247
size_t backTrackCount
Definition: regexp.c:251
BOOL ok
Definition: regexp.c:237
size_t stateStackTop
Definition: regexp.c:244
ptrdiff_t skipped
Definition: regexp.c:239
void * cx
Definition: regexp.c:235
jsbytecode * continue_pc
Definition: regexp.c:203
struct REProgState::@449::@451 assertion
size_t parenSoFar
Definition: regexp.c:206
jsbytecode continue_op
Definition: regexp.c:204
struct REProgState::@449::@450 quantifier
ptrdiff_t index
Definition: regexp.c:205
union REProgState::@449 u
uint32_t ULONG_PTR
Definition: typedefs.h:65

Referenced by MatchRegExp().

◆ FindParenCount()

static UINT FindParenCount ( CompilerState state)
static

Definition at line 906 of file regexp.c.

907{
909 int i;
910
911 if (state->flags & JSREG_FIND_PAREN_COUNT)
912 return OVERFLOW_VALUE;
913
914 /*
915 * Copy state into temp, flag it so we never report an invalid backref,
916 * and reset its members to parse the entire regexp. This is obviously
917 * suboptimal, but GetDecimalValue calls us only if a backref appears to
918 * refer to a forward parenthetical, which is rare.
919 */
920 temp = *state;
922 temp.cp = temp.cpbegin;
923 temp.parenCount = 0;
924 temp.classCount = 0;
925 temp.progLength = 0;
926 temp.treeDepth = 0;
927 temp.classBitmapsMem = 0;
928 for (i = 0; i < CLASS_CACHE_SIZE; i++)
929 temp.classCache[i].start = NULL;
930
931 if (!ParseRegExp(&temp)) {
933 return OVERFLOW_VALUE;
934 }
935 return temp.parenCount;
936}
#define JSREG_FIND_PAREN_COUNT
Definition: regexp.c:895
static BOOL ParseRegExp(CompilerState *)
Definition: regexp.c:1651
#define OVERFLOW_VALUE
Definition: regexp.c:903
#define CLASS_CACHE_SIZE
Definition: regexp.c:292
#define JSREG_FIND_PAREN_ERROR
Definition: regexp.c:896
static calc_node_t temp
Definition: rpn_ieee.c:38

Referenced by ParseTerm().

◆ FlatNIMatcher()

static match_state_t * FlatNIMatcher ( REGlobalData gData,
match_state_t x,
const WCHAR matchChars,
size_t  length 
)
inlinestatic

Definition at line 1983 of file regexp.c.

1985{
1986 size_t i;
1987 assert(gData->cpend >= x->cp);
1988 if (length > (size_t)(gData->cpend - x->cp))
1989 return NULL;
1990 for (i = 0; i != length; i++) {
1991 if (towupper(matchChars[i]) != towupper(x->cp[i]))
1992 return NULL;
1993 }
1994 x->cp += length;
1995 return x;
1996}

Referenced by SimpleMatch().

◆ GetCompactIndexWidth()

static size_t GetCompactIndexWidth ( size_t  index)
static

Definition at line 363 of file regexp.c.

364{
365 size_t width;
366
367 for (width = 1; (index >>= 7) != 0; ++width) { }
368 return width;
369}
GLint GLint GLsizei width
Definition: gl.h:1546
GLuint index
Definition: glext.h:6031

Referenced by EmitREBytecode(), ParseMinMaxQuantifier(), ParseRegExp(), ParseTerm(), and regexp_new().

◆ GetDecimalValue()

static UINT GetDecimalValue ( WCHAR  c,
UINT  max,
UINT(*)(CompilerState *state findMax,
CompilerState state 
)
static

Definition at line 945 of file regexp.c.

947{
949 BOOL overflow = (value > max && (!findMax || value > findMax(state)));
950
951 /* The following restriction allows simpler overflow checks. */
952 assert(max <= ((UINT)-1 - 9) / 10);
953 while (state->cp < state->cpend) {
954 c = *state->cp;
955 if (!JS7_ISDEC(c))
956 break;
957 value = 10 * value + JS7_UNDEC(c);
958 if (!overflow && value > max && (!findMax || value > findMax(state)))
959 overflow = TRUE;
960 ++state->cp;
961 }
962 return overflow ? OVERFLOW_VALUE : value;
963}
#define JS7_ISDEC(c)
Definition: regexp.c:91
Definition: pdh_main.c:94

Referenced by ParseMinMaxQuantifier(), and ParseTerm().

◆ InitMatch()

static HRESULT InitMatch ( regexp_t re,
void cx,
heap_pool_t pool,
REGlobalData gData 
)
static

Definition at line 3100 of file regexp.c.

3101{
3102 UINT i;
3103
3106 if (!gData->backTrackStack)
3107 goto bad;
3108
3109 gData->backTrackSP = gData->backTrackStack;
3110 gData->cursz = 0;
3111 gData->backTrackCount = 0;
3112 gData->backTrackLimit = 0;
3113
3115 gData->stateStack = heap_pool_alloc(gData->pool, sizeof(REProgState) * INITIAL_STATESTACK);
3116 if (!gData->stateStack)
3117 goto bad;
3118
3119 gData->stateStackTop = 0;
3120 gData->cx = cx;
3121 gData->pool = pool;
3122 gData->regexp = re;
3123 gData->ok = TRUE;
3124
3125 for (i = 0; i < re->classCount; i++) {
3126 if (!re->classList[i].converted &&
3127 !ProcessCharSet(gData, &re->classList[i])) {
3128 return E_FAIL;
3129 }
3130 }
3131
3132 return S_OK;
3133
3134bad:
3136 gData->ok = FALSE;
3137 return E_OUTOFMEMORY;
3138}
#define E_OUTOFMEMORY
Definition: ddrawi.h:100
#define E_FAIL
Definition: ddrawi.h:102
#define S_OK
Definition: intsafe.h:52
#define js_ReportOutOfScriptQuota(a)
Definition: regexp.c:48
#define INITIAL_BACKTRACK
Definition: regexp.c:232
#define INITIAL_STATESTACK
Definition: regexp.c:231
void * heap_pool_alloc(heap_pool_t *, DWORD) __WINE_ALLOC_SIZE(2) DECLSPEC_HIDDEN
Definition: jsutils.c:77
_Out_opt_ int * cx
Definition: commctrl.h:585
size_t backTrackStackSize
Definition: regexp.c:249
heap_pool_t * pool
Definition: regexp.c:254
size_t stateStackLimit
Definition: regexp.c:245
size_t classCount
Definition: regexp.h:59

Referenced by regexp_execute().

◆ isASCIIHexDigit()

static BOOL isASCIIHexDigit ( WCHAR  c,
UINT digit 
)
static

Definition at line 428 of file regexp.c.

429{
430 UINT cv = c;
431
432 if (cv < '0')
433 return FALSE;
434 if (cv <= '9') {
435 *digit = cv - '0';
436 return TRUE;
437 }
438 cv |= 0x20;
439 if (cv >= 'a' && cv <= 'f') {
440 *digit = cv - 'a' + 10;
441 return TRUE;
442 }
443 return FALSE;
444}

Referenced by CalculateBitmapSize(), ParseTerm(), and ProcessCharSet().

◆ MatchRegExp()

static match_state_t * MatchRegExp ( REGlobalData gData,
match_state_t x 
)
static

Definition at line 3073 of file regexp.c.

3074{
3076 const WCHAR *cp = x->cp;
3077 const WCHAR *cp2;
3078 UINT j;
3079
3080 /*
3081 * Have to include the position beyond the last character
3082 * in order to detect end-of-input/line condition.
3083 */
3084 for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
3085 gData->skipped = cp2 - cp;
3086 x->cp = cp2;
3087 for (j = 0; j < gData->regexp->parenCount; j++)
3088 x->parens[j].index = -1;
3089 result = ExecuteREBytecode(gData, x);
3090 if (!gData->ok || result || (gData->regexp->flags & REG_STICKY))
3091 return result;
3092 gData->backTrackSP = gData->backTrackStack;
3093 gData->cursz = 0;
3094 gData->stateStackTop = 0;
3095 cp2 = cp + gData->skipped;
3096 }
3097 return NULL;
3098}
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint GLint GLint j
Definition: glfuncs.h:250
static match_state_t * ExecuteREBytecode(REGlobalData *gData, match_state_t *x)
Definition: regexp.c:2536
POINT cp
Definition: magnifier.c:59
size_t parenCount
Definition: regexp.h:58

Referenced by regexp_execute().

◆ NewRENode()

static RENode * NewRENode ( CompilerState state,
REOp  op 
)
static

Definition at line 409 of file regexp.c.

410{
411 RENode *ren;
412
413 ren = heap_pool_alloc(state->pool, sizeof(*ren));
414 if (!ren) {
415 /* js_ReportOutOfScriptQuota(cx); */
416 return NULL;
417 }
418 ren->op = op;
419 ren->next = NULL;
420 ren->kid = NULL;
421 return ren;
422}
Definition: regexp.c:260
RENode * next
Definition: regexp.c:262
void * kid
Definition: regexp.c:263

Referenced by ParseMinMaxQuantifier(), ParseQuantifier(), ParseRegExp(), ParseTerm(), ProcessOp(), and regexp_new().

◆ ParseMinMaxQuantifier()

static INT ParseMinMaxQuantifier ( CompilerState state,
BOOL  ignoreValues 
)
static

Definition at line 1163 of file regexp.c.

1164{
1165 UINT min, max;
1166 WCHAR c;
1167 const WCHAR *errp = state->cp++;
1168
1169 c = *state->cp;
1170 if (JS7_ISDEC(c)) {
1171 ++state->cp;
1172 min = GetDecimalValue(c, 0xFFFF, NULL, state);
1173 c = *state->cp;
1174
1175 if (!ignoreValues && min == OVERFLOW_VALUE)
1176 return JSMSG_MIN_TOO_BIG;
1177
1178 if (c == ',') {
1179 c = *++state->cp;
1180 if (JS7_ISDEC(c)) {
1181 ++state->cp;
1182 max = GetDecimalValue(c, 0xFFFF, NULL, state);
1183 c = *state->cp;
1184 if (!ignoreValues && max == OVERFLOW_VALUE)
1185 return JSMSG_MAX_TOO_BIG;
1186 if (!ignoreValues && min > max)
1187 return JSMSG_OUT_OF_ORDER;
1188 } else {
1189 max = (UINT)-1;
1190 }
1191 } else {
1192 max = min;
1193 }
1194 if (c == '}') {
1195 state->result = NewRENode(state, REOP_QUANT);
1196 if (!state->result)
1197 return JSMSG_OUT_OF_MEMORY;
1198 state->result->u.range.min = min;
1199 state->result->u.range.max = max;
1200 /*
1201 * QUANT, <min>, <max>, <next> ... <ENDCHILD>
1202 * where <max> is written as compact(max+1) to make
1203 * (UINT)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1204 */
1205 state->progLength += (1 + GetCompactIndexWidth(min)
1207 +3);
1208 return 0;
1209 }
1210 }
1211
1212 state->cp = errp;
1213 return -1;
1214}
#define JSMSG_OUT_OF_MEMORY
Definition: regexp.c:79
#define JSMSG_MIN_TOO_BIG
Definition: regexp.c:76
static UINT GetDecimalValue(WCHAR c, UINT max, UINT(*findMax)(CompilerState *state), CompilerState *state)
Definition: regexp.c:945
#define JSMSG_MAX_TOO_BIG
Definition: regexp.c:77
#define JSMSG_OUT_OF_ORDER
Definition: regexp.c:78
static RENode * NewRENode(CompilerState *state, REOp op)
Definition: regexp.c:409
#define min(a, b)
Definition: monoChain.cc:55

Referenced by ParseQuantifier(), ParseRegExp(), and ParseTerm().

◆ ParseQuantifier()

static BOOL ParseQuantifier ( CompilerState state)
static

Definition at line 1217 of file regexp.c.

1218{
1219 RENode *term;
1220 term = state->result;
1221 if (state->cp < state->cpend) {
1222 switch (*state->cp) {
1223 case '+':
1224 state->result = NewRENode(state, REOP_QUANT);
1225 if (!state->result)
1226 return FALSE;
1227 state->result->u.range.min = 1;
1228 state->result->u.range.max = (UINT)-1;
1229 /* <PLUS>, <next> ... <ENDCHILD> */
1230 state->progLength += 4;
1231 goto quantifier;
1232 case '*':
1233 state->result = NewRENode(state, REOP_QUANT);
1234 if (!state->result)
1235 return FALSE;
1236 state->result->u.range.min = 0;
1237 state->result->u.range.max = (UINT)-1;
1238 /* <STAR>, <next> ... <ENDCHILD> */
1239 state->progLength += 4;
1240 goto quantifier;
1241 case '?':
1242 state->result = NewRENode(state, REOP_QUANT);
1243 if (!state->result)
1244 return FALSE;
1245 state->result->u.range.min = 0;
1246 state->result->u.range.max = 1;
1247 /* <OPT>, <next> ... <ENDCHILD> */
1248 state->progLength += 4;
1249 goto quantifier;
1250 case '{': /* balance '}' */
1251 {
1252 INT err;
1253
1255 if (err == 0)
1256 goto quantifier;
1257 if (err == -1)
1258 return TRUE;
1259
1260 ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
1261 return FALSE;
1262 }
1263 default:;
1264 }
1265 }
1266 return TRUE;
1267
1268quantifier:
1269 if (state->treeDepth == TREE_DEPTH_MAX) {
1270 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1271 return FALSE;
1272 }
1273
1274 ++state->treeDepth;
1275 ++state->cp;
1276 state->result->kid = term;
1277 if (state->cp < state->cpend && *state->cp == '?') {
1278 ++state->cp;
1279 state->result->u.range.greedy = FALSE;
1280 } else {
1281 state->result->u.range.greedy = TRUE;
1282 }
1283 return TRUE;
1284}
static INT ParseMinMaxQuantifier(CompilerState *state, BOOL ignoreValues)
Definition: regexp.c:1163
#define TREE_DEPTH_MAX
Definition: regexp.c:347
#define ReportRegExpErrorHelper(a, b, c, d)
Definition: regexp.c:45
#define err(...)
int32_t INT
Definition: typedefs.h:58

Referenced by ParseRegExp(), and ParseTerm().

◆ ParseRegExp()

static BOOL ParseRegExp ( CompilerState state)
static

Definition at line 1651 of file regexp.c.

1652{
1653 size_t parenIndex;
1654 RENode *operand;
1655 REOpData *operatorStack;
1656 RENode **operandStack;
1657 REOp op;
1658 INT i;
1659 BOOL result = FALSE;
1660
1661 INT operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
1662 INT operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
1663
1664 /* Watch out for empty regexp */
1665 if (state->cp == state->cpend) {
1666 state->result = NewRENode(state, REOP_EMPTY);
1667 return (state->result != NULL);
1668 }
1669
1670 operatorStack = heap_alloc(sizeof(REOpData) * operatorStackSize);
1671 if (!operatorStack)
1672 return FALSE;
1673
1674 operandStack = heap_alloc(sizeof(RENode *) * operandStackSize);
1675 if (!operandStack)
1676 goto out;
1677
1678 for (;;) {
1679 parenIndex = state->parenCount;
1680 if (state->cp == state->cpend) {
1681 /*
1682 * If we are at the end of the regexp and we're short one or more
1683 * operands, the regexp must have the form /x|/ or some such, with
1684 * left parentheses making us short more than one operand.
1685 */
1686 if (operatorSP >= operandSP) {
1687 operand = NewRENode(state, REOP_EMPTY);
1688 if (!operand)
1689 goto out;
1690 goto pushOperand;
1691 }
1692 } else {
1693 switch (*state->cp) {
1694 case '(':
1695 ++state->cp;
1696 if (state->cp + 1 < state->cpend &&
1697 *state->cp == '?' &&
1698 (state->cp[1] == '=' ||
1699 state->cp[1] == '!' ||
1700 state->cp[1] == ':')) {
1701 switch (state->cp[1]) {
1702 case '=':
1703 op = REOP_ASSERT;
1704 /* ASSERT, <next>, ... ASSERTTEST */
1705 state->progLength += 4;
1706 break;
1707 case '!':
1709 /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
1710 state->progLength += 4;
1711 break;
1712 default:
1714 break;
1715 }
1716 state->cp += 2;
1717 } else {
1718 op = REOP_LPAREN;
1719 /* LPAREN, <index>, ... RPAREN, <index> */
1720 state->progLength
1721 += 2 * (1 + GetCompactIndexWidth(parenIndex));
1722 state->parenCount++;
1723 if (state->parenCount == 65535) {
1724 ReportRegExpError(state, JSREPORT_ERROR,
1725 JSMSG_TOO_MANY_PARENS);
1726 goto out;
1727 }
1728 }
1729 goto pushOperator;
1730
1731 case ')':
1732 /*
1733 * If there's no stacked open parenthesis, throw syntax error.
1734 */
1735 for (i = operatorSP - 1; ; i--) {
1736 if (i < 0) {
1737 ReportRegExpError(state, JSREPORT_ERROR,
1738 JSMSG_UNMATCHED_RIGHT_PAREN);
1739 goto out;
1740 }
1741 if (operatorStack[i].op == REOP_ASSERT ||
1742 operatorStack[i].op == REOP_ASSERT_NOT ||
1743 operatorStack[i].op == REOP_LPARENNON ||
1744 operatorStack[i].op == REOP_LPAREN) {
1745 break;
1746 }
1747 }
1748 /* FALL THROUGH */
1749
1750 case '|':
1751 /* Expected an operand before these, so make an empty one */
1752 operand = NewRENode(state, REOP_EMPTY);
1753 if (!operand)
1754 goto out;
1755 goto pushOperand;
1756
1757 default:
1758 if (!ParseTerm(state))
1759 goto out;
1760 operand = state->result;
1761pushOperand:
1762 if (operandSP == operandStackSize) {
1763 RENode **tmp;
1764 operandStackSize += operandStackSize;
1765 tmp = heap_realloc(operandStack, sizeof(RENode *) * operandStackSize);
1766 if (!tmp)
1767 goto out;
1768 operandStack = tmp;
1769 }
1770 operandStack[operandSP++] = operand;
1771 break;
1772 }
1773 }
1774
1775 /* At the end; process remaining operators. */
1776restartOperator:
1777 if (state->cp == state->cpend) {
1778 while (operatorSP) {
1779 --operatorSP;
1780 if (!ProcessOp(state, &operatorStack[operatorSP],
1781 operandStack, operandSP))
1782 goto out;
1783 --operandSP;
1784 }
1785 assert(operandSP == 1);
1786 state->result = operandStack[0];
1787 result = TRUE;
1788 goto out;
1789 }
1790
1791 switch (*state->cp) {
1792 case '|':
1793 /* Process any stacked 'concat' operators */
1794 ++state->cp;
1795 while (operatorSP &&
1796 operatorStack[operatorSP - 1].op == REOP_CONCAT) {
1797 --operatorSP;
1798 if (!ProcessOp(state, &operatorStack[operatorSP],
1799 operandStack, operandSP)) {
1800 goto out;
1801 }
1802 --operandSP;
1803 }
1804 op = REOP_ALT;
1805 goto pushOperator;
1806
1807 case ')':
1808 /*
1809 * If there's no stacked open parenthesis, throw syntax error.
1810 */
1811 for (i = operatorSP - 1; ; i--) {
1812 if (i < 0) {
1813 ReportRegExpError(state, JSREPORT_ERROR,
1814 JSMSG_UNMATCHED_RIGHT_PAREN);
1815 goto out;
1816 }
1817 if (operatorStack[i].op == REOP_ASSERT ||
1818 operatorStack[i].op == REOP_ASSERT_NOT ||
1819 operatorStack[i].op == REOP_LPARENNON ||
1820 operatorStack[i].op == REOP_LPAREN) {
1821 break;
1822 }
1823 }
1824 ++state->cp;
1825
1826 /* Process everything on the stack until the open parenthesis. */
1827 for (;;) {
1828 assert(operatorSP);
1829 --operatorSP;
1830 switch (operatorStack[operatorSP].op) {
1831 case REOP_ASSERT:
1832 case REOP_ASSERT_NOT:
1833 case REOP_LPAREN:
1834 operand = NewRENode(state, operatorStack[operatorSP].op);
1835 if (!operand)
1836 goto out;
1837 operand->u.parenIndex =
1838 operatorStack[operatorSP].parenIndex;
1839 assert(operandSP);
1840 operand->kid = operandStack[operandSP - 1];
1841 operandStack[operandSP - 1] = operand;
1842 if (state->treeDepth == TREE_DEPTH_MAX) {
1843 ReportRegExpError(state, JSREPORT_ERROR,
1844 JSMSG_REGEXP_TOO_COMPLEX);
1845 goto out;
1846 }
1847 ++state->treeDepth;
1848 /* FALL THROUGH */
1849
1850 case REOP_LPARENNON:
1851 state->result = operandStack[operandSP - 1];
1852 if (!ParseQuantifier(state))
1853 goto out;
1854 operandStack[operandSP - 1] = state->result;
1855 goto restartOperator;
1856 default:
1857 if (!ProcessOp(state, &operatorStack[operatorSP],
1858 operandStack, operandSP))
1859 goto out;
1860 --operandSP;
1861 break;
1862 }
1863 }
1864 break;
1865
1866 case '{':
1867 {
1868 const WCHAR *errp = state->cp;
1869
1870 if (ParseMinMaxQuantifier(state, TRUE) < 0) {
1871 /*
1872 * This didn't even scan correctly as a quantifier, so we should
1873 * treat it as flat.
1874 */
1875 op = REOP_CONCAT;
1876 goto pushOperator;
1877 }
1878
1879 state->cp = errp;
1880 /* FALL THROUGH */
1881 }
1882
1883 case '+':
1884 case '*':
1885 case '?':
1886 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
1887 state->cp);
1888 result = FALSE;
1889 goto out;
1890
1891 default:
1892 /* Anything else is the start of the next term. */
1893 op = REOP_CONCAT;
1894pushOperator:
1895 if (operatorSP == operatorStackSize) {
1896 REOpData *tmp;
1897 operatorStackSize += operatorStackSize;
1898 tmp = heap_realloc(operatorStack, sizeof(REOpData) * operatorStackSize);
1899 if (!tmp)
1900 goto out;
1901 operatorStack = tmp;
1902 }
1903 operatorStack[operatorSP].op = op;
1904 operatorStack[operatorSP].errPos = state->cp;
1905 operatorStack[operatorSP++].parenIndex = parenIndex;
1906 break;
1907 }
1908 }
1909out:
1910 heap_free(operatorStack);
1911 heap_free(operandStack);
1912 return result;
1913}
static void * heap_realloc(void *mem, size_t len)
Definition: appwiz.h:71
static BOOL ParseTerm(CompilerState *state)
Definition: regexp.c:1341
#define INITIAL_STACK_SIZE
Definition: regexp.c:1648
static BOOL ParseQuantifier(CompilerState *state)
Definition: regexp.c:1217
static BOOL ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack, INT operandSP)
Definition: regexp.c:805
static FILE * out
Definition: regtests2xml.c:44
union RENode::@452 u
size_t parenIndex
Definition: regexp.c:267
const WCHAR * errPos
Definition: regexp.c:448
REOp op
Definition: regexp.c:447
size_t parenIndex
Definition: regexp.c:449

Referenced by FindParenCount(), and regexp_new().

◆ ParseTerm()

static BOOL ParseTerm ( CompilerState state)
static

Definition at line 1341 of file regexp.c.

1342{
1343 WCHAR c = *state->cp++;
1344 UINT nDigits;
1345 UINT num, tmp, n, i;
1346 const WCHAR *termStart;
1347
1348 switch (c) {
1349 /* assertions and atoms */
1350 case '^':
1351 state->result = NewRENode(state, REOP_BOL);
1352 if (!state->result)
1353 return FALSE;
1354 state->progLength++;
1355 return TRUE;
1356 case '$':
1357 state->result = NewRENode(state, REOP_EOL);
1358 if (!state->result)
1359 return FALSE;
1360 state->progLength++;
1361 return TRUE;
1362 case '\\':
1363 if (state->cp >= state->cpend) {
1364 /* a trailing '\' is an error */
1365 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
1366 return FALSE;
1367 }
1368 c = *state->cp++;
1369 switch (c) {
1370 /* assertion escapes */
1371 case 'b' :
1372 state->result = NewRENode(state, REOP_WBDRY);
1373 if (!state->result)
1374 return FALSE;
1375 state->progLength++;
1376 return TRUE;
1377 case 'B':
1378 state->result = NewRENode(state, REOP_WNONBDRY);
1379 if (!state->result)
1380 return FALSE;
1381 state->progLength++;
1382 return TRUE;
1383 /* Decimal escape */
1384 case '0':
1385 /* Give a strict warning. See also the note below. */
1386 WARN("non-octal digit in an escape sequence that doesn't match a back-reference\n");
1387 doOctal:
1388 num = 0;
1389 while (state->cp < state->cpend) {
1390 c = *state->cp;
1391 if (c < '0' || '7' < c)
1392 break;
1393 state->cp++;
1394 tmp = 8 * num + (UINT)JS7_UNDEC(c);
1395 if (tmp > 0377)
1396 break;
1397 num = tmp;
1398 }
1399 c = (WCHAR)num;
1400 doFlat:
1401 state->result = NewRENode(state, REOP_FLAT);
1402 if (!state->result)
1403 return FALSE;
1404 state->result->u.flat.chr = c;
1405 state->result->u.flat.length = 1;
1406 state->progLength += 3;
1407 break;
1408 case '1':
1409 case '2':
1410 case '3':
1411 case '4':
1412 case '5':
1413 case '6':
1414 case '7':
1415 case '8':
1416 case '9':
1417 termStart = state->cp - 1;
1418 num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1419 if (state->flags & JSREG_FIND_PAREN_ERROR)
1420 return FALSE;
1421 if (num == OVERFLOW_VALUE) {
1422 /* Give a strict mode warning. */
1423 WARN("back-reference exceeds number of capturing parentheses\n");
1424
1425 /*
1426 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1427 * error here. However, for compatibility with IE, we treat the
1428 * whole backref as flat if the first character in it is not a
1429 * valid octal character, and as an octal escape otherwise.
1430 */
1431 state->cp = termStart;
1432 if (c >= '8') {
1433 /* Treat this as flat. termStart - 1 is the \. */
1434 c = '\\';
1435 goto asFlat;
1436 }
1437
1438 /* Treat this as an octal escape. */
1439 goto doOctal;
1440 }
1441 assert(1 <= num && num <= 0x10000);
1442 state->result = NewRENode(state, REOP_BACKREF);
1443 if (!state->result)
1444 return FALSE;
1445 state->result->u.parenIndex = num - 1;
1446 state->progLength
1447 += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1448 break;
1449 /* Control escape */
1450 case 'f':
1451 c = 0xC;
1452 goto doFlat;
1453 case 'n':
1454 c = 0xA;
1455 goto doFlat;
1456 case 'r':
1457 c = 0xD;
1458 goto doFlat;
1459 case 't':
1460 c = 0x9;
1461 goto doFlat;
1462 case 'v':
1463 c = 0xB;
1464 goto doFlat;
1465 /* Control letter */
1466 case 'c':
1467 if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
1468 c = (WCHAR) (*state->cp++ & 0x1F);
1469 } else {
1470 /* back off to accepting the original '\' as a literal */
1471 --state->cp;
1472 c = '\\';
1473 }
1474 goto doFlat;
1475 /* HexEscapeSequence */
1476 case 'x':
1477 nDigits = 2;
1478 goto lexHex;
1479 /* UnicodeEscapeSequence */
1480 case 'u':
1481 nDigits = 4;
1482lexHex:
1483 n = 0;
1484 for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1485 UINT digit;
1486 c = *state->cp++;
1487 if (!isASCIIHexDigit(c, &digit)) {
1488 /*
1489 * Back off to accepting the original 'u' or 'x' as a
1490 * literal.
1491 */
1492 state->cp -= i + 2;
1493 n = *state->cp++;
1494 break;
1495 }
1496 n = (n << 4) | digit;
1497 }
1498 c = (WCHAR) n;
1499 goto doFlat;
1500 /* Character class escapes */
1501 case 'd':
1502 state->result = NewRENode(state, REOP_DIGIT);
1503doSimple:
1504 if (!state->result)
1505 return FALSE;
1506 state->progLength++;
1507 break;
1508 case 'D':
1509 state->result = NewRENode(state, REOP_NONDIGIT);
1510 goto doSimple;
1511 case 's':
1512 state->result = NewRENode(state, REOP_SPACE);
1513 goto doSimple;
1514 case 'S':
1515 state->result = NewRENode(state, REOP_NONSPACE);
1516 goto doSimple;
1517 case 'w':
1518 state->result = NewRENode(state, REOP_ALNUM);
1519 goto doSimple;
1520 case 'W':
1521 state->result = NewRENode(state, REOP_NONALNUM);
1522 goto doSimple;
1523 /* IdentityEscape */
1524 default:
1525 state->result = NewRENode(state, REOP_FLAT);
1526 if (!state->result)
1527 return FALSE;
1528 state->result->u.flat.chr = c;
1529 state->result->u.flat.length = 1;
1530 state->result->kid = (void *) (state->cp - 1);
1531 state->progLength += 3;
1532 break;
1533 }
1534 break;
1535 case '[':
1536 state->result = NewRENode(state, REOP_CLASS);
1537 if (!state->result)
1538 return FALSE;
1539 termStart = state->cp;
1540 state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1541 for (;;) {
1542 if (state->cp == state->cpend) {
1543 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1544 JSMSG_UNTERM_CLASS, termStart);
1545
1546 return FALSE;
1547 }
1548 if (*state->cp == '\\') {
1549 state->cp++;
1550 if (state->cp != state->cpend)
1551 state->cp++;
1552 continue;
1553 }
1554 if (*state->cp == ']') {
1555 state->result->u.ucclass.kidlen = state->cp - termStart;
1556 break;
1557 }
1558 state->cp++;
1559 }
1560 for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1561 if (!state->classCache[i].start) {
1562 state->classCache[i].start = termStart;
1563 state->classCache[i].length = state->result->u.ucclass.kidlen;
1564 state->classCache[i].index = state->classCount;
1565 break;
1566 }
1567 if (state->classCache[i].length ==
1568 state->result->u.ucclass.kidlen) {
1569 for (n = 0; ; n++) {
1570 if (n == state->classCache[i].length) {
1571 state->result->u.ucclass.index
1572 = state->classCache[i].index;
1573 goto claim;
1574 }
1575 if (state->classCache[i].start[n] != termStart[n])
1576 break;
1577 }
1578 }
1579 }
1580 state->result->u.ucclass.index = state->classCount++;
1581
1582 claim:
1583 /*
1584 * Call CalculateBitmapSize now as we want any errors it finds
1585 * to be reported during the parse phase, not at execution.
1586 */
1587 if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1588 return FALSE;
1589 /*
1590 * Update classBitmapsMem with number of bytes to hold bmsize bits,
1591 * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1592 * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1593 */
1594 n = (state->result->u.ucclass.bmsize >> 3) + 1;
1595 if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1596 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1597 return FALSE;
1598 }
1599 state->classBitmapsMem += n;
1600 /* CLASS, <index> */
1601 state->progLength
1602 += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1603 break;
1604
1605 case '.':
1606 state->result = NewRENode(state, REOP_DOT);
1607 goto doSimple;
1608
1609 case '{':
1610 {
1611 const WCHAR *errp = state->cp--;
1612 INT err;
1613
1615 state->cp = errp;
1616
1617 if (err < 0)
1618 goto asFlat;
1619
1620 /* FALL THROUGH */
1621 }
1622 case '*':
1623 case '+':
1624 case '?':
1625 ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1626 JSMSG_BAD_QUANTIFIER, state->cp - 1);
1627 return FALSE;
1628 default:
1629asFlat:
1630 state->result = NewRENode(state, REOP_FLAT);
1631 if (!state->result)
1632 return FALSE;
1633 state->result->u.flat.chr = c;
1634 state->result->u.flat.length = 1;
1635 state->result->kid = (void *) (state->cp - 1);
1636 state->progLength += 3;
1637 break;
1638 }
1639 return ParseQuantifier(state);
1640}
#define WARN(fmt,...)
Definition: precomp.h:61
GLuint GLuint num
Definition: glext.h:9618
static UINT FindParenCount(CompilerState *state)
Definition: regexp.c:906
static BOOL CalculateBitmapSize(CompilerState *state, RENode *target, const WCHAR *src, const WCHAR *end)
Definition: regexp.c:969
#define CLASS_BITMAPS_MEM_LIMIT
Definition: regexp.c:353

Referenced by ParseRegExp().

◆ ProcessCharSet()

static BOOL ProcessCharSet ( REGlobalData gData,
RECharSet charSet 
)
static

Definition at line 2087 of file regexp.c.

2088{
2089 const WCHAR *src, *end;
2090 BOOL inRange = FALSE;
2091 WCHAR rangeStart = 0;
2092 UINT byteLength, n;
2093 WCHAR c, thisCh;
2094 INT nDigits, i;
2095
2096 assert(!charSet->converted);
2097 /*
2098 * Assert that startIndex and length points to chars inside [] inside
2099 * source string.
2100 */
2101 assert(1 <= charSet->u.src.startIndex);
2102 assert(charSet->u.src.startIndex < gData->regexp->source_len);
2103 assert(charSet->u.src.length <= gData->regexp->source_len
2104 - 1 - charSet->u.src.startIndex);
2105
2106 charSet->converted = TRUE;
2107 src = gData->regexp->source + charSet->u.src.startIndex;
2108
2109 end = src + charSet->u.src.length;
2110
2111 assert(src[-1] == '[' && end[0] == ']');
2112
2113 byteLength = (charSet->length >> 3) + 1;
2114 charSet->u.bits = heap_alloc(byteLength);
2115 if (!charSet->u.bits) {
2116 JS_ReportOutOfMemory(gData->cx);
2117 gData->ok = FALSE;
2118 return FALSE;
2119 }
2120 memset(charSet->u.bits, 0, byteLength);
2121
2122 if (src == end)
2123 return TRUE;
2124
2125 if (*src == '^') {
2126 assert(charSet->sense == FALSE);
2127 ++src;
2128 } else {
2129 assert(charSet->sense == TRUE);
2130 }
2131
2132 while (src != end) {
2133 switch (*src) {
2134 case '\\':
2135 ++src;
2136 c = *src++;
2137 switch (c) {
2138 case 'b':
2139 thisCh = 0x8;
2140 break;
2141 case 'f':
2142 thisCh = 0xC;
2143 break;
2144 case 'n':
2145 thisCh = 0xA;
2146 break;
2147 case 'r':
2148 thisCh = 0xD;
2149 break;
2150 case 't':
2151 thisCh = 0x9;
2152 break;
2153 case 'v':
2154 thisCh = 0xB;
2155 break;
2156 case 'c':
2157 if (src < end && JS_ISWORD(*src)) {
2158 thisCh = (WCHAR)(*src++ & 0x1F);
2159 } else {
2160 --src;
2161 thisCh = '\\';
2162 }
2163 break;
2164 case 'x':
2165 nDigits = 2;
2166 goto lexHex;
2167 case 'u':
2168 nDigits = 4;
2169 lexHex:
2170 n = 0;
2171 for (i = 0; (i < nDigits) && (src < end); i++) {
2172 UINT digit;
2173 c = *src++;
2174 if (!isASCIIHexDigit(c, &digit)) {
2175 /*
2176 * Back off to accepting the original '\'
2177 * as a literal
2178 */
2179 src -= i + 1;
2180 n = '\\';
2181 break;
2182 }
2183 n = (n << 4) | digit;
2184 }
2185 thisCh = (WCHAR)n;
2186 break;
2187 case '0':
2188 case '1':
2189 case '2':
2190 case '3':
2191 case '4':
2192 case '5':
2193 case '6':
2194 case '7':
2195 /*
2196 * This is a non-ECMA extension - decimal escapes (in this
2197 * case, octal!) are supposed to be an error inside class
2198 * ranges, but supported here for backwards compatibility.
2199 */
2200 n = JS7_UNDEC(c);
2201 c = *src;
2202 if ('0' <= c && c <= '7') {
2203 src++;
2204 n = 8 * n + JS7_UNDEC(c);
2205 c = *src;
2206 if ('0' <= c && c <= '7') {
2207 src++;
2208 i = 8 * n + JS7_UNDEC(c);
2209 if (i <= 0377)
2210 n = i;
2211 else
2212 src--;
2213 }
2214 }
2215 thisCh = (WCHAR)n;
2216 break;
2217
2218 case 'd':
2219 AddCharacterRangeToCharSet(charSet, '0', '9');
2220 continue; /* don't need range processing */
2221 case 'D':
2222 AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2224 (WCHAR)('9' + 1),
2225 (WCHAR)charSet->length);
2226 continue;
2227 case 's':
2228 for (i = (INT)charSet->length; i >= 0; i--)
2229 if (iswspace(i))
2230 AddCharacterToCharSet(charSet, (WCHAR)i);
2231 continue;
2232 case 'S':
2233 for (i = (INT)charSet->length; i >= 0; i--)
2234 if (!iswspace(i))
2235 AddCharacterToCharSet(charSet, (WCHAR)i);
2236 continue;
2237 case 'w':
2238 for (i = (INT)charSet->length; i >= 0; i--)
2239 if (JS_ISWORD(i))
2240 AddCharacterToCharSet(charSet, (WCHAR)i);
2241 continue;
2242 case 'W':
2243 for (i = (INT)charSet->length; i >= 0; i--)
2244 if (!JS_ISWORD(i))
2245 AddCharacterToCharSet(charSet, (WCHAR)i);
2246 continue;
2247 default:
2248 thisCh = c;
2249 break;
2250
2251 }
2252 break;
2253
2254 default:
2255 thisCh = *src++;
2256 break;
2257
2258 }
2259 if (inRange) {
2260 if (gData->regexp->flags & REG_FOLD) {
2261 assert(rangeStart <= thisCh);
2262 for (i = rangeStart; i <= thisCh; i++) {
2263 WCHAR uch, dch;
2264
2265 AddCharacterToCharSet(charSet, i);
2266 uch = towupper(i);
2267 dch = towlower(i);
2268 if (i != uch)
2269 AddCharacterToCharSet(charSet, uch);
2270 if (i != dch)
2271 AddCharacterToCharSet(charSet, dch);
2272 }
2273 } else {
2274 AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2275 }
2276 inRange = FALSE;
2277 } else {
2278 if (gData->regexp->flags & REG_FOLD) {
2279 AddCharacterToCharSet(charSet, towupper(thisCh));
2280 AddCharacterToCharSet(charSet, towlower(thisCh));
2281 } else {
2282 AddCharacterToCharSet(charSet, thisCh);
2283 }
2284 if (src < end - 1) {
2285 if (*src == '-') {
2286 ++src;
2287 inRange = TRUE;
2288 rangeStart = thisCh;
2289 }
2290 }
2291 }
2292 }
2293 return TRUE;
2294}
#define iswspace(_c)
Definition: ctype.h:669
#define JS_ReportOutOfMemory(a)
Definition: regexp.c:49
static void AddCharacterToCharSet(RECharSet *cs, WCHAR c)
Definition: regexp.c:2053
#define JS_ISWORD(c)
Definition: regexp.c:89
static void AddCharacterRangeToCharSet(RECharSet *cs, UINT c1, UINT c2)
Definition: regexp.c:2063
#define memset(x, y, z)
Definition: compat.h:39
const WCHAR * source
Definition: regexp.h:61
DWORD source_len
Definition: regexp.h:62

Referenced by ExecuteREBytecode(), and InitMatch().

◆ ProcessOp()

static BOOL ProcessOp ( CompilerState state,
REOpData opData,
RENode **  operandStack,
INT  operandSP 
)
static

Definition at line 805 of file regexp.c.

807{
808 RENode *result;
809
810 switch (opData->op) {
811 case REOP_ALT:
813 if (!result)
814 return FALSE;
815 result->kid = operandStack[operandSP - 2];
816 result->u.kid2 = operandStack[operandSP - 1];
817 operandStack[operandSP - 2] = result;
818
819 if (state->treeDepth == TREE_DEPTH_MAX) {
820 ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
821 return FALSE;
822 }
823 ++state->treeDepth;
824
825 /*
826 * Look at both alternates to see if there's a FLAT or a CLASS at
827 * the start of each. If so, use a prerequisite match.
828 */
829 if (((RENode *) result->kid)->op == REOP_FLAT &&
830 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
831 (state->flags & REG_FOLD) == 0) {
833 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
834 result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
835 /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
836 JUMP, <end> ... ENDALT */
837 state->progLength += 13;
838 }
839 else
840 if (((RENode *) result->kid)->op == REOP_CLASS &&
841 ((RENode *) result->kid)->u.ucclass.index < 256 &&
842 ((RENode *) result->u.kid2)->op == REOP_FLAT &&
843 (state->flags & REG_FOLD) == 0) {
845 result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
846 result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
847 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
848 JUMP, <end> ... ENDALT */
849 state->progLength += 13;
850 }
851 else
852 if (((RENode *) result->kid)->op == REOP_FLAT &&
853 ((RENode *) result->u.kid2)->op == REOP_CLASS &&
854 ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
855 (state->flags & REG_FOLD) == 0) {
857 result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
858 result->u.altprereq.ch2 =
859 ((RENode *) result->u.kid2)->u.ucclass.index;
860 /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
861 JUMP, <end> ... ENDALT */
862 state->progLength += 13;
863 }
864 else {
865 /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
866 state->progLength += 7;
867 }
868 break;
869
870 case REOP_CONCAT:
871 result = operandStack[operandSP - 2];
872 while (result->next)
873 result = result->next;
874 result->next = operandStack[operandSP - 1];
875 break;
876
877 case REOP_ASSERT:
878 case REOP_ASSERT_NOT:
879 case REOP_LPARENNON:
880 case REOP_LPAREN:
881 /* These should have been processed by a close paren. */
882 ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
883 opData->errPos);
884 return FALSE;
885
886 default:;
887 }
888 return TRUE;
889}
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble * u
Definition: glfuncs.h:240
void * kid2
Definition: regexp.c:265

Referenced by ParseRegExp().

◆ PushBackTrackState()

static REBackTrackData * PushBackTrackState ( REGlobalData gData,
REOp  op,
jsbytecode target,
match_state_t x,
const WCHAR cp,
size_t  parenIndex,
size_t  parenCount 
)
static

Definition at line 1922 of file regexp.c.

1925{
1926 size_t i;
1928 (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
1929
1930 size_t sz = sizeof(REBackTrackData) +
1931 gData->stateStackTop * sizeof(REProgState) +
1932 parenCount * sizeof(RECapture);
1933
1934 ptrdiff_t btsize = gData->backTrackStackSize;
1935 ptrdiff_t btincr = ((char *)result + sz) -
1936 ((char *)gData->backTrackStack + btsize);
1937
1938 TRACE("\tBT_Push: %lu,%lu\n", (ULONG_PTR)parenIndex, (ULONG_PTR)parenCount);
1939
1940 JS_COUNT_OPERATION(gData->cx, JSOW_JUMP * (1 + parenCount));
1941 if (btincr > 0) {
1942 ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
1943
1944 JS_COUNT_OPERATION(gData->cx, JSOW_ALLOCATION);
1945 btincr = ((btincr+btsize-1)/btsize)*btsize;
1946 gData->backTrackStack = heap_pool_grow(gData->pool, gData->backTrackStack, btsize, btincr);
1947 if (!gData->backTrackStack) {
1949 gData->ok = FALSE;
1950 return NULL;
1951 }
1952 gData->backTrackStackSize = btsize + btincr;
1953 result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
1954 }
1955 gData->backTrackSP = result;
1956 result->sz = gData->cursz;
1957 gData->cursz = sz;
1958
1959 result->backtrack_op = op;
1960 result->backtrack_pc = target;
1961 result->cp = cp;
1962 result->parenCount = parenCount;
1963 result->parenIndex = parenIndex;
1964
1965 result->saveStateStackTop = gData->stateStackTop;
1966 assert(gData->stateStackTop);
1967 memcpy(result + 1, gData->stateStack,
1968 sizeof(REProgState) * result->saveStateStackTop);
1969
1970 if (parenCount != 0) {
1971 memcpy((char *)(result + 1) +
1972 sizeof(REProgState) * result->saveStateStackTop,
1973 &x->parens[parenIndex],
1974 sizeof(RECapture) * parenCount);
1975 for (i = 0; i != parenCount; i++)
1976 x->parens[parenIndex + i].index = -1;
1977 }
1978
1979 return result;
1980}
GLintptr offset
Definition: glext.h:5920
#define JS_COUNT_OPERATION(a, b)
Definition: regexp.c:50
struct REBackTrackData REBackTrackData
void * heap_pool_grow(heap_pool_t *, void *, DWORD, DWORD) DECLSPEC_HIDDEN
Definition: jsutils.c:130

Referenced by ExecuteREBytecode().

◆ ReadCompactIndex()

static jsbytecode * ReadCompactIndex ( jsbytecode pc,
size_t result 
)
inlinestatic

Definition at line 385 of file regexp.c.

386{
387 size_t nextByte;
388
389 nextByte = *pc++;
390 if ((nextByte & 0x80) == 0) {
391 /*
392 * Short-circuit the most common case when compact index <= 127.
393 */
394 *result = nextByte;
395 } else {
396 size_t shift = 7;
397 *result = 0x7F & nextByte;
398 do {
399 nextByte = *pc++;
400 *result |= (nextByte & 0x7F) << shift;
401 shift += 7;
402 } while ((nextByte & 0x80) != 0);
403 }
404 return pc;
405}
#define shift
Definition: input.c:1755

Referenced by ExecuteREBytecode(), and SimpleMatch().

◆ ReallocStateStack()

static BOOL ReallocStateStack ( REGlobalData gData)
static

Definition at line 2297 of file regexp.c.

2298{
2299 size_t limit = gData->stateStackLimit;
2300 size_t sz = sizeof(REProgState) * limit;
2301
2302 gData->stateStack = heap_pool_grow(gData->pool, gData->stateStack, sz, sz);
2303 if (!gData->stateStack) {
2305 gData->ok = FALSE;
2306 return FALSE;
2307 }
2308 gData->stateStackLimit = limit + limit;
2309 return TRUE;
2310}
GLint limit
Definition: glext.h:10326
struct REProgState REProgState

◆ regexp_destroy()

void regexp_destroy ( regexp_t re)

Definition at line 3181 of file regexp.c.

3182{
3183 if (re->classList) {
3184 UINT i;
3185 for (i = 0; i < re->classCount; i++) {
3186 if (re->classList[i].converted)
3187 heap_free(re->classList[i].u.bits);
3188 re->classList[i].u.bits = NULL;
3189 }
3190 heap_free(re->classList);
3191 }
3192 heap_free(re);
3193}

Referenced by RegExp2_put_Pattern(), RegExp2_Release(), RegExp_destructor(), regexp_new(), and regexp_set_flags().

◆ regexp_execute()

HRESULT regexp_execute ( regexp_t regexp,
void cx,
heap_pool_t pool,
const WCHAR str,
DWORD  str_len,
match_state_t result 
)

Definition at line 3140 of file regexp.c.

3142{
3144 REGlobalData gData;
3146 const WCHAR *str_beg = result->cp;
3147 HRESULT hres;
3148
3149 assert(result->cp != NULL);
3150
3151 gData.cpbegin = str;
3152 gData.cpend = str+str_len;
3153 gData.start = result->cp-str;
3154 gData.skipped = 0;
3155 gData.pool = pool;
3156
3157 hres = InitMatch(regexp, cx, pool, &gData);
3158 if(FAILED(hres)) {
3159 WARN("InitMatch failed\n");
3160 heap_pool_clear(mark);
3161 return hres;
3162 }
3163
3164 res = MatchRegExp(&gData, result);
3165 heap_pool_clear(mark);
3166 if(!gData.ok) {
3167 WARN("MatchRegExp failed\n");
3168 return E_FAIL;
3169 }
3170
3171 if(!res) {
3172 result->match_len = 0;
3173 return S_FALSE;
3174 }
3175
3176 result->match_len = (result->cp-str_beg) - gData.skipped;
3177 result->paren_count = regexp->parenCount;
3178 return S_OK;
3179}
GLuint res
Definition: glext.h:9613
#define FAILED(hr)
Definition: intsafe.h:51
static HRESULT InitMatch(regexp_t *re, void *cx, heap_pool_t *pool, REGlobalData *gData)
Definition: regexp.c:3100
static match_state_t * MatchRegExp(REGlobalData *gData, match_state_t *x)
Definition: regexp.c:3073
void heap_pool_clear(heap_pool_t *) DECLSPEC_HIDDEN
Definition: jsutils.c:146
heap_pool_t * heap_pool_mark(heap_pool_t *) DECLSPEC_HIDDEN
Definition: jsutils.c:182
HRESULT hres
Definition: protocol.c:465
const WCHAR * str
size_t start
Definition: regexp.c:238
#define str_len
Definition: treelist.c:89
#define S_FALSE
Definition: winerror.h:2357

Referenced by do_regexp_match_next(), RegExp2_Execute(), and RegExp2_Test().

◆ regexp_new()

regexp_t * regexp_new ( void cx,
heap_pool_t pool,
const WCHAR str,
DWORD  str_len,
WORD  flags,
BOOL  flat 
)

Definition at line 3195 of file regexp.c.

3197{
3198 regexp_t *re;
3199 heap_pool_t *mark;
3201 size_t resize;
3202 jsbytecode *endPC;
3203 UINT i;
3204 size_t len;
3205
3206 re = NULL;
3207 mark = heap_pool_mark(pool);
3208 len = str_len;
3209
3210 state.context = cx;
3211 state.pool = pool;
3212 state.cp = str;
3213 if (!state.cp)
3214 goto out;
3215 state.cpbegin = state.cp;
3216 state.cpend = state.cp + len;
3217 state.flags = flags;
3218 state.parenCount = 0;
3219 state.classCount = 0;
3220 state.progLength = 0;
3221 state.treeDepth = 0;
3222 state.classBitmapsMem = 0;
3223 for (i = 0; i < CLASS_CACHE_SIZE; i++)
3224 state.classCache[i].start = NULL;
3225
3226 if (len != 0 && flat) {
3227 state.result = NewRENode(&state, REOP_FLAT);
3228 if (!state.result)
3229 goto out;
3230 state.result->u.flat.chr = *state.cpbegin;
3231 state.result->u.flat.length = len;
3232 state.result->kid = (void *) state.cpbegin;
3233 /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
3234 state.progLength += 1 + GetCompactIndexWidth(0)
3236 } else {
3237 if (!ParseRegExp(&state))
3238 goto out;
3239 }
3240 resize = offsetof(regexp_t, program) + state.progLength + 1;
3241 re = heap_alloc(resize);
3242 if (!re)
3243 goto out;
3244
3245 assert(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
3246 re->classCount = state.classCount;
3247 if (re->classCount) {
3248 re->classList = heap_alloc(re->classCount * sizeof(RECharSet));
3249 if (!re->classList) {
3250 regexp_destroy(re);
3251 re = NULL;
3252 goto out;
3253 }
3254 for (i = 0; i < re->classCount; i++)
3255 re->classList[i].converted = FALSE;
3256 } else {
3257 re->classList = NULL;
3258 }
3259 endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
3260 if (!endPC) {
3261 regexp_destroy(re);
3262 re = NULL;
3263 goto out;
3264 }
3265 *endPC++ = REOP_END;
3266 /*
3267 * Check whether size was overestimated and shrink using realloc.
3268 * This is safe since no pointers to newly parsed regexp or its parts
3269 * besides re exist here.
3270 */
3271 if ((size_t)(endPC - re->program) != state.progLength + 1) {
3272 regexp_t *tmp;
3273 assert((size_t)(endPC - re->program) < state.progLength + 1);
3274 resize = offsetof(regexp_t, program) + (endPC - re->program);
3275 tmp = heap_realloc(re, resize);
3276 if (tmp)
3277 re = tmp;
3278 }
3279
3280 re->flags = flags;
3281 re->parenCount = state.parenCount;
3282 re->source = str;
3283 re->source_len = str_len;
3284
3285out:
3286 heap_pool_clear(mark);
3287 return re;
3288}
GLuint program
Definition: glext.h:6723
GLbitfield flags
Definition: glext.h:7161
static jsbytecode * EmitREBytecode(CompilerState *state, regexp_t *re, size_t treeDepth, jsbytecode *pc, RENode *t)
Definition: regexp.c:475
void regexp_destroy(regexp_t *re)
Definition: regexp.c:3181
#define offsetof(TYPE, MEMBER)

Referenced by create_regexp(), RegExp2_Execute(), RegExp2_Test(), and regexp_set_flags().

◆ SetForwardJumpOffset()

static BOOL SetForwardJumpOffset ( jsbytecode jump,
jsbytecode target 
)
static

Definition at line 456 of file regexp.c.

457{
458 ptrdiff_t offset = target - jump;
459
460 /* Check that target really points forward. */
461 assert(offset >= 2);
462 if ((size_t)offset > OFFSET_MAX)
463 return FALSE;
464
465 jump[0] = JUMP_OFFSET_HI(offset);
466 jump[1] = JUMP_OFFSET_LO(offset);
467 return TRUE;
468}

Referenced by EmitREBytecode().

◆ SimpleMatch()

static match_state_t * SimpleMatch ( REGlobalData gData,
match_state_t x,
REOp  op,
jsbytecode **  startpc,
BOOL  updatecp 
)
inlinestatic

gData->cx->regExpStatics.multiline && FIXME !!!

gData->cx->regExpStatics.multiline &&

Definition at line 2328 of file regexp.c.

2330{
2332 WCHAR matchCh;
2333 size_t parenIndex;
2334 size_t offset, length, index;
2335 jsbytecode *pc = *startpc; /* pc has already been incremented past op */
2336 const WCHAR *source;
2337 const WCHAR *startcp = x->cp;
2338 WCHAR ch;
2339 RECharSet *charSet;
2340
2341 const char *opname = reop_names[op];
2342 TRACE("\n%06d: %*s%s\n", (int)(pc - gData->regexp->program),
2343 (int)gData->stateStackTop * 2, "", opname);
2344
2345 switch (op) {
2346 case REOP_EMPTY:
2347 result = x;
2348 break;
2349 case REOP_BOL:
2350 if (x->cp != gData->cpbegin) {
2351 if (
2352 !(gData->regexp->flags & REG_MULTILINE)) {
2353 break;
2354 }
2355 if (!RE_IS_LINE_TERM(x->cp[-1]))
2356 break;
2357 }
2358 result = x;
2359 break;
2360 case REOP_EOL:
2361 if (x->cp != gData->cpend) {
2362 if (
2363 !(gData->regexp->flags & REG_MULTILINE)) {
2364 break;
2365 }
2366 if (!RE_IS_LINE_TERM(*x->cp))
2367 break;
2368 }
2369 result = x;
2370 break;
2371 case REOP_WBDRY:
2372 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2373 !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2374 result = x;
2375 }
2376 break;
2377 case REOP_WNONBDRY:
2378 if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2379 (x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2380 result = x;
2381 }
2382 break;
2383 case REOP_DOT:
2384 if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
2385 result = x;
2386 result->cp++;
2387 }
2388 break;
2389 case REOP_DIGIT:
2390 if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) {
2391 result = x;
2392 result->cp++;
2393 }
2394 break;
2395 case REOP_NONDIGIT:
2396 if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) {
2397 result = x;
2398 result->cp++;
2399 }
2400 break;
2401 case REOP_ALNUM:
2402 if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
2403 result = x;
2404 result->cp++;
2405 }
2406 break;
2407 case REOP_NONALNUM:
2408 if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
2409 result = x;
2410 result->cp++;
2411 }
2412 break;
2413 case REOP_SPACE:
2414 if (x->cp != gData->cpend && iswspace(*x->cp)) {
2415 result = x;
2416 result->cp++;
2417 }
2418 break;
2419 case REOP_NONSPACE:
2420 if (x->cp != gData->cpend && !iswspace(*x->cp)) {
2421 result = x;
2422 result->cp++;
2423 }
2424 break;
2425 case REOP_BACKREF:
2426 pc = ReadCompactIndex(pc, &parenIndex);
2427 assert(parenIndex < gData->regexp->parenCount);
2428 result = BackrefMatcher(gData, x, parenIndex);
2429 break;
2430 case REOP_FLAT:
2431 pc = ReadCompactIndex(pc, &offset);
2432 assert(offset < gData->regexp->source_len);
2433 pc = ReadCompactIndex(pc, &length);
2434 assert(1 <= length);
2435 assert(length <= gData->regexp->source_len - offset);
2436 if (length <= (size_t)(gData->cpend - x->cp)) {
2437 source = gData->regexp->source + offset;
2438 TRACE("%s\n", debugstr_wn(source, length));
2439 for (index = 0; index != length; index++) {
2440 if (source[index] != x->cp[index])
2441 return NULL;
2442 }
2443 x->cp += length;
2444 result = x;
2445 }
2446 break;
2447 case REOP_FLAT1:
2448 matchCh = *pc++;
2449 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2450 if (x->cp != gData->cpend && *x->cp == matchCh) {
2451 result = x;
2452 result->cp++;
2453 }
2454 break;
2455 case REOP_FLATi:
2456 pc = ReadCompactIndex(pc, &offset);
2457 assert(offset < gData->regexp->source_len);
2458 pc = ReadCompactIndex(pc, &length);
2459 assert(1 <= length);
2460 assert(length <= gData->regexp->source_len - offset);
2461 source = gData->regexp->source;
2462 result = FlatNIMatcher(gData, x, source + offset, length);
2463 break;
2464 case REOP_FLAT1i:
2465 matchCh = *pc++;
2466 if (x->cp != gData->cpend && towupper(*x->cp) == towupper(matchCh)) {
2467 result = x;
2468 result->cp++;
2469 }
2470 break;
2471 case REOP_UCFLAT1:
2472 matchCh = GET_ARG(pc);
2473 TRACE(" '%c' == '%c'\n", (char)matchCh, (char)*x->cp);
2474 pc += ARG_LEN;
2475 if (x->cp != gData->cpend && *x->cp == matchCh) {
2476 result = x;
2477 result->cp++;
2478 }
2479 break;
2480 case REOP_UCFLAT1i:
2481 matchCh = GET_ARG(pc);
2482 pc += ARG_LEN;
2483 if (x->cp != gData->cpend && towupper(*x->cp) == towupper(matchCh)) {
2484 result = x;
2485 result->cp++;
2486 }
2487 break;
2488 case REOP_CLASS:
2489 pc = ReadCompactIndex(pc, &index);
2490 assert(index < gData->regexp->classCount);
2491 if (x->cp != gData->cpend) {
2492 charSet = &gData->regexp->classList[index];
2493 assert(charSet->converted);
2494 ch = *x->cp;
2495 index = ch >> 3;
2496 if (charSet->length != 0 &&
2497 ch <= charSet->length &&
2498 (charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2499 result = x;
2500 result->cp++;
2501 }
2502 }
2503 break;
2504 case REOP_NCLASS:
2505 pc = ReadCompactIndex(pc, &index);
2506 assert(index < gData->regexp->classCount);
2507 if (x->cp != gData->cpend) {
2508 charSet = &gData->regexp->classList[index];
2509 assert(charSet->converted);
2510 ch = *x->cp;
2511 index = ch >> 3;
2512 if (charSet->length == 0 ||
2513 ch > charSet->length ||
2514 !(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2515 result = x;
2516 result->cp++;
2517 }
2518 }
2519 break;
2520
2521 default:
2522 assert(FALSE);
2523 }
2524 if (result) {
2525 if (!updatecp)
2526 x->cp = startcp;
2527 *startpc = pc;
2528 TRACE(" *\n");
2529 return result;
2530 }
2531 x->cp = startcp;
2532 return NULL;
2533}
#define index(s, c)
Definition: various.h:29
static match_state_t * FlatNIMatcher(REGlobalData *gData, match_state_t *x, const WCHAR *matchChars, size_t length)
Definition: regexp.c:1983
#define RE_IS_LINE_TERM(c)
Definition: regexp.c:86
static match_state_t * BackrefMatcher(REGlobalData *gData, match_state_t *x, size_t parenIndex)
Definition: regexp.c:2022
#define REG_MULTILINE
Definition: regexp.h:38
#define debugstr_wn
Definition: kernel32.h:33

Referenced by ExecuteREBytecode().

◆ WINE_DEFAULT_DEBUG_CHANNEL()

WINE_DEFAULT_DEBUG_CHANNEL ( jscript  )

◆ WriteCompactIndex()

static jsbytecode * WriteCompactIndex ( jsbytecode pc,
size_t  index 
)
inlinestatic

Definition at line 372 of file regexp.c.

373{
374 size_t next;
375
376 while ((next = index >> 7) != 0) {
377 *pc++ = (jsbytecode)(index | 0x80);
378 index = next;
379 }
380 *pc++ = (jsbytecode)index;
381 return pc;
382}
static unsigned __int64 next
Definition: rand_nt.c:6

Referenced by EmitREBytecode().

Variable Documentation

◆ reop_names

const char* reop_names[]
static

Definition at line 149 of file regexp.c.

Referenced by ExecuteREBytecode(), and SimpleMatch().