ReactOS 0.4.15-dev-7953-g1f49173
Tokenizer Struct Reference

#include <tokenizer.hpp>

Classes

struct  TOKEN_REF
 

Public Types

typedef int myint
 

Public Member Functions

 Tokenizer (std::vector< TOKEN_DEF > &tokendefs)
 
TOKEN_REF match (std::smatch &matches, const std::string &str) const
 
TOKEN_REF match (std::smatch &matches, const std::string &str, size_t startpos) const
 

Static Public Member Functions

static unsigned int count_captures (const std::string &exp)
 
static std::regex CompileMultiRegex (const std::vector< TOKEN_DEF > &tokendefs)
 

Public Attributes

const std::vector< TOKEN_DEF > & m_tokendefs
 
const std::regex m_re
 

Detailed Description

Definition at line 61 of file tokenizer.hpp.

Member Typedef Documentation

◆ myint

Definition at line 66 of file tokenizer.hpp.

Constructor & Destructor Documentation

◆ Tokenizer()

Tokenizer::Tokenizer ( std::vector< TOKEN_DEF > &  tokendefs)
inline

Definition at line 156 of file tokenizer.hpp.

157 : m_tokendefs(tokendefs),
158 m_re(CompileMultiRegex(tokendefs))
159 {
160 }
const std::regex m_re
Definition: tokenizer.hpp:64
const std::vector< TOKEN_DEF > & m_tokendefs
Definition: tokenizer.hpp:63
static std::regex CompileMultiRegex(const std::vector< TOKEN_DEF > &tokendefs)
Definition: tokenizer.hpp:115

Member Function Documentation

◆ CompileMultiRegex()

static std::regex Tokenizer::CompileMultiRegex ( const std::vector< TOKEN_DEF > &  tokendefs)
inlinestatic

Definition at line 115 of file tokenizer.hpp.

116 {
117 std::string combinedString;
118
119 if (tokendefs.size() == 0)
120 {
121 return std::regex();
122 }
123
124 // Validate all token definitions
125 for (auto def : tokendefs)
126 {
127 size_t found = -1;
128
129 // Count capture groups
130 unsigned int count = count_captures(def.RegExString);
131 if (count != 1)
132 {
133 throw "invalid count!\n";
134 }
135 }
136
137 // Combine all expressions into one (one capture group for each)
138 combinedString = "(?:" + tokendefs[0].RegExString + ")";
139 for (size_t i = 1; i < tokendefs.size(); i++)
140 {
141 combinedString += "|(?:" + tokendefs[i].RegExString + ")";
142 }
143
144 return std::regex(combinedString, std::regex_constants::icase);
145 }
GLuint GLuint GLsizei count
Definition: gl.h:1545
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
static unsigned int count_captures(const std::string &exp)
Definition: tokenizer.hpp:70

◆ count_captures()

static unsigned int Tokenizer::count_captures ( const std::string &  exp)
inlinestatic

Definition at line 70 of file tokenizer.hpp.

71 {
72 bool in_char_group = false;
73 unsigned int count = 0;
74
75 for (size_t i = 0; i < exp.size(); i++)
76 {
77 char c = exp[i];
78
79 // Skip escaped characters
80 if (c == '\\')
81 {
82 i++;
83 continue;
84 }
85
86 if (in_char_group)
87 {
88 if (c == ']')
89 {
90 in_char_group = false;
91 }
92 continue;
93 }
94
95 if (c == '[')
96 {
97 in_char_group = true;
98 continue;
99 }
100
101 if (c == '(')
102 {
103 if (exp[i + 1] != '?')
104 {
105 count++;
106 }
107 }
108 }
109
110 return count;
111 }
const GLubyte * c
Definition: glext.h:8905
DWORD exp
Definition: msg.c:16058

Referenced by CompileMultiRegex().

◆ match() [1/2]

TOKEN_REF Tokenizer::match ( std::smatch &  matches,
const std::string &  str 
) const
inline

Definition at line 162 of file tokenizer.hpp.

163 {
164 return match(matches, str, 0);
165 }
#define matches(FN)
Definition: match.h:70
const WCHAR * str
Definition: match.c:28

Referenced by TokenList::TokenList().

◆ match() [2/2]

TOKEN_REF Tokenizer::match ( std::smatch &  matches,
const std::string &  str,
size_t  startpos 
) const
inline

Definition at line 167 of file tokenizer.hpp.

168 {
169 const std::string::const_iterator first = str.cbegin() + startpos;
170 const std::string::const_iterator last = str.cend();
171
172 // If we reached the end, there is nothing more to do
173 if (first == last)
174 {
175 return TOKEN_REF{ static_cast<unsigned int>(startpos), 0, -1 };
176 }
177
178 time_t start_time = time(NULL);
179
180 // Try to find a match
181 if (!std::regex_search(first, last, matches, m_re))
182 {
183 throw "Failed to match\n";
184 }
185
186 search_time += time(NULL) - start_time;
187
188 // Validate that it's at the start of the string
189 if (matches.prefix().matched)
190 {
191 throw "Failed to match at current position!\n";
192 }
193
194 // We have a match, check which one it is
195 for (size_t i = 1; i < matches.size(); i++)
196 {
197 if (matches[i].matched)
198 {
199 unsigned int len = static_cast<unsigned int>(matches.length(i));
200 int type = m_tokendefs[i - 1].Type;
201 return TOKEN_REF{ static_cast<unsigned int>(startpos), len, type};
202 }
203 }
204
205 // We should never get here
206 throw "Something went wrong!\n";
207 }
#define NULL
Definition: types.h:112
__kernel_time_t time_t
Definition: linux.h:252
GLuint GLuint GLsizei GLenum type
Definition: gl.h:1545
const GLint * first
Definition: glext.h:5794
GLenum GLsizei len
Definition: glext.h:6722
__u16 time
Definition: mkdosfs.c:8
static UINT UINT last
Definition: font.c:45
time_t search_time
Definition: asmpp.cpp:26

Member Data Documentation

◆ m_re

const std::regex Tokenizer::m_re

Definition at line 64 of file tokenizer.hpp.

Referenced by match().

◆ m_tokendefs

const std::vector<TOKEN_DEF>& Tokenizer::m_tokendefs

Definition at line 63 of file tokenizer.hpp.

Referenced by match().


The documentation for this struct was generated from the following file: