ReactOS 0.4.15-dev-8191-gbc6c731
tokenize.cpp
Go to the documentation of this file.
1// tokenize.cpp
2
3#ifdef _MSC_VER
4#pragma warning ( disable : 4786 )
5#endif//_MSC_VER
6
7#include <string>
8#include <vector>
9#include <conio.h>
10
11#include "assert.h"
12#include "tokenize.h"
13#include "skip_ws.h"
14
15using std::string;
16using std::vector;
17
18void tokenize ( const string& text, vector<string>& tokens )
19{
20 tokens.resize ( 0 );
21 string s ( text );
22 char* p = &s[0];
23 while ( *p )
24 {
25 // skip whitespace
26 p = skip_ws ( p );
27 // check for literal string
28 if ( *p == '\"' )
29 {
30 // skip initial quote
31 char* end = p + 1;
32 for ( ;; )
33 {
34 if ( *end == '\\' )
35 {
36 end++;
37 switch ( *end )
38 {
39 case 'x':
40 case 'X':
41 ASSERT(0); // come back to this....
42 break;
43 case '0':
44 ASSERT(0);
45 break;
46 default:
47 end++;
48 break;
49 }
50 }
51 else if ( *end == '\"' )
52 {
53 end++;
54 break;
55 }
56 else
57 end++;
58 }
59 tokens.push_back ( string ( p, end-p ) );
60 p = end;
61 }
62 else if ( __iscsymf(*p) )
63 {
64 char* end = p + 1;
65 while ( __iscsym ( *end ) )
66 end++;
67 tokens.push_back ( string ( p, end-p ) );
68 p = end;
69 }
70 else if ( isdigit(*p) || *p == '.' )
71 {
72 char* end = p;
73 while ( isdigit(*end) )
74 end++;
75 bool f = false;
76 if ( *end == '.' )
77 {
78 end++;
79 while ( isdigit(*end) )
80 end++;
81 f = true;
82 }
83 if ( *end == 'f' || *end == 'F' )
84 end++;
85 else if ( !f && ( *end == 'l' || *end == 'L' ) )
86 end++;
87 tokens.push_back ( string ( p, end-p ) );
88 p = end;
89 }
90 else switch ( *p )
91 {
92 case '.':
93 tokens.push_back ( "." );
94 p++;
95 break;
96 case ',':
97 tokens.push_back ( "," );
98 p++;
99 break;
100 case '(':
101 tokens.push_back ( "(" );
102 p++;
103 break;
104 case ')':
105 tokens.push_back ( ")" );
106 p++;
107 break;
108 case '{':
109 tokens.push_back ( "{" );
110 p++;
111 break;
112 case '}':
113 tokens.push_back ( "}" );
114 p++;
115 break;
116 case '[':
117 tokens.push_back ( "[" );
118 p++;
119 break;
120 case ']':
121 tokens.push_back ( "]" );
122 p++;
123 break;
124 case ';':
125 tokens.push_back ( ";" );
126 p++;
127 break;
128 case '\\':
129 switch ( p[1] )
130 {
131 case '\n':
132 tokens.push_back ( string ( p, 2 ) );
133 p += 2;
134 break;
135 default:
136 ASSERT(0); // shouldn't hit here, I think
137 tokens.push_back ( "\\" );
138 p++;
139 break;
140 }
141 break;
142 case '|':
143 switch ( p[1] )
144 {
145 case '|':
146 tokens.push_back ( string ( p, 2 ) );
147 p += 2;
148 break;
149 default:
150 tokens.push_back ( "|" );
151 p++;
152 break;
153 }
154 break;
155 case '&':
156 switch ( p[1] )
157 {
158 case '&':
159 tokens.push_back ( string ( p, 2 ) );
160 p += 2;
161 break;
162 default:
163 tokens.push_back ( "&" );
164 p++;
165 break;
166 }
167 break;
168 case '<':
169 switch ( p[1] )
170 {
171 case '<':
172 if ( p[2] == '=' )
173 tokens.push_back ( string ( p, 3 ) ), p += 3;
174 else
175 tokens.push_back ( string ( p, 2 ) ), p += 2;
176 break;
177 case '=':
178 tokens.push_back ( string ( p, 2 ) );
179 p += 2;
180 break;
181 default:
182 tokens.push_back ( "<" );
183 p++;
184 break;
185 }
186 break;
187 case '>':
188 switch ( p[1] )
189 {
190 case '>':
191 if ( p[2] == '=' )
192 tokens.push_back ( string ( p, 3 ) ), p += 3;
193 else
194 tokens.push_back ( string ( p, 2 ) ), p += 2;
195 break;
196 case '=':
197 tokens.push_back ( string ( p, 2 ) );
198 p += 2;
199 break;
200 default:
201 tokens.push_back ( ">" );
202 p++;
203 break;
204 }
205 break;
206 case '!':
207 switch ( p[1] )
208 {
209 case '=':
210 tokens.push_back ( string ( p, 2 ) );
211 p += 2;
212 break;
213 default:
214 tokens.push_back ( "!" );
215 p++;
216 break;
217 }
218 break;
219 case '=':
220 switch ( p[1] )
221 {
222 case '=':
223 tokens.push_back ( string ( p, 2 ) );
224 p += 2;
225 break;
226 default:
227 tokens.push_back ( "=" );
228 p++;
229 break;
230 }
231 break;
232 case ':':
233 switch ( p[1] )
234 {
235 case ':':
236 tokens.push_back ( string ( p, 2 ) );
237 p += 2;
238 break;
239 default:
240 tokens.push_back ( ":" );
241 p++;
242 break;
243 }
244 break;
245 case '*':
246 switch ( p[1] )
247 {
248 case '=':
249 tokens.push_back ( string ( p, 2 ) );
250 p += 2;
251 break;
252 default:
253 tokens.push_back ( "*" );
254 p++;
255 break;
256 }
257 break;
258 case '/':
259 switch ( p[1] )
260 {
261 case '=':
262 tokens.push_back ( string ( p, 2 ) );
263 p += 2;
264 break;
265 default:
266 tokens.push_back ( "/" );
267 p++;
268 break;
269 }
270 break;
271 case '+':
272 switch ( p[1] )
273 {
274 case '+':
275 case '=':
276 tokens.push_back ( string ( p, 2 ) );
277 p += 2;
278 break;
279 default:
280 tokens.push_back ( "+" );
281 p++;
282 break;
283 }
284 break;
285 case '-':
286 switch ( p[1] )
287 {
288 case '-':
289 case '=':
290 tokens.push_back ( string ( p, 2 ) );
291 p += 2;
292 break;
293 default:
294 tokens.push_back ( "-" );
295 p++;
296 break;
297 }
298 break;
299 case '#':
300 while ( *p && *p != '\n' )
301 p++;
302 break;
303 case 0:
304 break;
305 default:
306 printf ( "choked on '%c' in tokenize() - press any key to continue\n", *p );
307 getch();
308 p++;
309 break;
310 }
311 }
312}
#define isdigit(c)
Definition: acclib.h:68
const WCHAR * text
Definition: package.c:1799
#define printf
Definition: freeldr.h:97
GLdouble s
Definition: gl.h:2039
GLuint GLuint end
Definition: gl.h:1545
GLfloat f
Definition: glext.h:7540
GLfloat GLfloat p
Definition: glext.h:8902
#define __iscsym(_c)
Definition: ctype.h:691
#define __iscsymf(_c)
Definition: ctype.h:690
#define ASSERT(a)
Definition: mode.c:44
_Check_return_ _CRTIMP int __cdecl getch(void)
static LPCTSTR skip_ws(LPCTSTR p)
Definition: set.c:48
void push_back(const _Tp &__x=_STLP_DEFAULT_CONSTRUCTED(_Tp))
Definition: _vector.h:379
void resize(size_type __new_size, const _Tp &__x=_STLP_DEFAULT_CONSTRUCTED(_Tp))
Definition: _vector.h:639
void tokenize(const string &text, vector< string > &tokens)
Definition: tokenize.cpp:18