ReactOS  0.4.14-dev-608-gd495a4f
tokenize.c
Go to the documentation of this file.
1 /*
2 ** 2001 September 15
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 ** A tokenizer for SQL
13 **
14 ** This file contains C code that splits an SQL input string up into
15 ** individual tokens and sends those tokens one-by-one over to the
16 ** parser for analysis.
17 */
18 
19 #include <ctype.h>
20 #include <stdarg.h>
21 #include <stdlib.h>
22 
23 #include "windef.h"
24 #include "winbase.h"
25 #include "wine/unicode.h"
26 #include "query.h"
27 #include "sql.tab.h"
28 
29 /*
30 ** All the keywords of the SQL language are stored as in a hash
31 ** table composed of instances of the following structure.
32 */
33 typedef struct Keyword Keyword;
34 struct Keyword {
35  const WCHAR *name; /* The keyword name */
36  unsigned int len;
37  int tokenType; /* The token value for this keyword */
38 };
39 
40 #define MAX_TOKEN_LEN 11
41 
42 static const WCHAR addW[] = {'A','D','D'};
43 static const WCHAR alterW[] = {'A','L','T','E','R'};
44 static const WCHAR andW[] = {'A','N','D'};
45 static const WCHAR byW[] = {'B','Y'};
46 static const WCHAR charW[] = {'C','H','A','R'};
47 static const WCHAR characterW[] = {'C','H','A','R','A','C','T','E','R'};
48 static const WCHAR createW[] = {'C','R','E','A','T','E'};
49 static const WCHAR deleteW[] = {'D','E','L','E','T','E'};
50 static const WCHAR distinctW[] = {'D','I','S','T','I','N','C','T'};
51 static const WCHAR dropW[] = {'D','R','O','P'};
52 static const WCHAR freeW[] = {'F','R','E','E'};
53 static const WCHAR fromW[] = {'F','R','O','M'};
54 static const WCHAR holdW[] = {'H','O','L','D'};
55 static const WCHAR insertW[] = {'I','N','S','E','R','T'};
56 static const WCHAR intW[] = {'I','N','T'};
57 static const WCHAR integerW[] = {'I','N','T','E','G','E','R'};
58 static const WCHAR intoW[] = {'I','N','T','O'};
59 static const WCHAR isW[] = {'I','S'};
60 static const WCHAR keyW[] = {'K','E','Y'};
61 static const WCHAR likeW[] = {'L','I','K','E'};
62 static const WCHAR localizableW[] = {'L','O','C','A','L','I','Z','A','B','L','E'};
63 static const WCHAR longW[] = {'L','O','N','G'};
64 static const WCHAR longcharW[] = {'L','O','N','G','C','H','A','R'};
65 static const WCHAR notW[] = {'N','O','T'};
66 static const WCHAR nullW[] = {'N','U','L','L'};
67 static const WCHAR objectW[] = {'O','B','J','E','C','T'};
68 static const WCHAR orW[] = {'O','R'};
69 static const WCHAR orderW[] = {'O','R','D','E','R'};
70 static const WCHAR primaryW[] = {'P','R','I','M','A','R','Y'};
71 static const WCHAR selectW[] = {'S','E','L','E','C','T'};
72 static const WCHAR setW[] = {'S','E','T'};
73 static const WCHAR shortW[] = {'S','H','O','R','T'};
74 static const WCHAR tableW[] = {'T','A','B','L','E'};
75 static const WCHAR temporaryW[] = {'T','E','M','P','O','R','A','R','Y'};
76 static const WCHAR updateW[] = {'U','P','D','A','T','E'};
77 static const WCHAR valuesW[] = {'V','A','L','U','E','S'};
78 static const WCHAR whereW[] = {'W','H','E','R','E'};
79 
80 /*
81 ** These are the keywords
82 ** They MUST be in alphabetical order
83 */
84 static const Keyword aKeywordTable[] = {
85  { addW, ARRAY_SIZE(addW), TK_ADD },
87  { andW, ARRAY_SIZE(andW), TK_AND },
88  { byW, ARRAY_SIZE(byW), TK_BY },
99  { intW, ARRAY_SIZE(intW), TK_INT },
101  { intoW, ARRAY_SIZE(intoW), TK_INTO },
102  { isW, ARRAY_SIZE(isW), TK_IS },
103  { keyW, ARRAY_SIZE(keyW), TK_KEY },
104  { likeW, ARRAY_SIZE(likeW), TK_LIKE },
106  { longW, ARRAY_SIZE(longW), TK_LONG },
108  { notW, ARRAY_SIZE(notW), TK_NOT },
109  { nullW, ARRAY_SIZE(nullW), TK_NULL },
111  { orW, ARRAY_SIZE(orW), TK_OR },
115  { setW, ARRAY_SIZE(setW), TK_SET },
122 };
123 
124 /*
125 ** Comparison function for binary search.
126 */
127 static int compKeyword(const void *m1, const void *m2){
128  const Keyword *k1 = m1, *k2 = m2;
129  int ret, len = min( k1->len, k2->len );
130 
131  if ((ret = memicmpW( k1->name, k2->name, len ))) return ret;
132  if (k1->len < k2->len) return -1;
133  else if (k1->len > k2->len) return 1;
134  return 0;
135 }
136 
137 /*
138 ** This function looks up an identifier to determine if it is a
139 ** keyword. If it is a keyword, the token code of that keyword is
140 ** returned. If the input is not a keyword, TK_ID is returned.
141 */
142 static int sqliteKeywordCode(const WCHAR *z, int n){
143  Keyword key, *r;
144 
145  if( n>MAX_TOKEN_LEN )
146  return TK_ID;
147 
148  key.tokenType = 0;
149  key.name = z;
150  key.len = n;
152  if( r )
153  return r->tokenType;
154  return TK_ID;
155 }
156 
157 
158 /*
159 ** If X is a character that can be used in an identifier then
160 ** isIdChar[X] will be 1. Otherwise isIdChar[X] will be 0.
161 **
162 ** In this implementation, an identifier can be a string of
163 ** alphabetic characters, digits, and "_" plus any character
164 ** with the high-order bit set. The latter rule means that
165 ** any sequence of UTF-8 characters or characters taken from
166 ** an extended ISO8859 character set can form an identifier.
167 */
168 static const char isIdChar[] = {
169 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
170  0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
171  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
172  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, /* 2x */
173  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
174  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
175  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
176  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
177  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
178  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 8x */
179  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 9x */
180  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ax */
181  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Bx */
182  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Cx */
183  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Dx */
184  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ex */
185  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Fx */
186 };
187 
188 /*
189 ** WCHAR safe version of isdigit()
190 */
191 static inline int isDigit(WCHAR c)
192 {
193  return c >= '0' && c <= '9';
194 }
195 
196 /*
197 ** WCHAR safe version of isspace(), except '\r'
198 */
199 static inline int isSpace(WCHAR c)
200 {
201  return c == ' ' || c == '\t' || c == '\n' || c == '\f';
202 }
203 
204 /*
205 ** Return the length of the token that begins at z[0]. Return
206 ** -1 if the token is (or might be) incomplete. Store the token
207 ** type in *tokenType before returning.
208 */
209 int sqliteGetToken(const WCHAR *z, int *tokenType, int *skip){
210  int i;
211 
212  *skip = 0;
213  switch( *z ){
214  case ' ': case '\t': case '\n': case '\f':
215  for(i=1; isSpace(z[i]); i++){}
216  *tokenType = TK_SPACE;
217  return i;
218  case '-':
219  if( z[1]==0 ) return -1;
220  *tokenType = TK_MINUS;
221  return 1;
222  case '(':
223  *tokenType = TK_LP;
224  return 1;
225  case ')':
226  *tokenType = TK_RP;
227  return 1;
228  case '*':
229  *tokenType = TK_STAR;
230  return 1;
231  case '=':
232  *tokenType = TK_EQ;
233  return 1;
234  case '<':
235  if( z[1]=='=' ){
236  *tokenType = TK_LE;
237  return 2;
238  }else if( z[1]=='>' ){
239  *tokenType = TK_NE;
240  return 2;
241  }else{
242  *tokenType = TK_LT;
243  return 1;
244  }
245  case '>':
246  if( z[1]=='=' ){
247  *tokenType = TK_GE;
248  return 2;
249  }else{
250  *tokenType = TK_GT;
251  return 1;
252  }
253  case '!':
254  if( z[1]!='=' ){
255  *tokenType = TK_ILLEGAL;
256  return 2;
257  }else{
258  *tokenType = TK_NE;
259  return 2;
260  }
261  case '?':
262  *tokenType = TK_WILDCARD;
263  return 1;
264  case ',':
265  *tokenType = TK_COMMA;
266  return 1;
267  case '`': case '\'': {
268  int delim = z[0];
269  for(i=1; z[i]; i++){
270  if( z[i]==delim )
271  break;
272  }
273  if( z[i] ) i++;
274  if( delim == '`' )
275  *tokenType = TK_ID;
276  else
277  *tokenType = TK_STRING;
278  return i;
279  }
280  case '.':
281  if( !isDigit(z[1]) ){
282  *tokenType = TK_DOT;
283  return 1;
284  }
285  /* Fall through */
286  case '0': case '1': case '2': case '3': case '4':
287  case '5': case '6': case '7': case '8': case '9':
288  *tokenType = TK_INTEGER;
289  for(i=1; isDigit(z[i]); i++){}
290  return i;
291  case '[':
292  for(i=1; z[i] && z[i-1]!=']'; i++){}
293  *tokenType = TK_ID;
294  return i;
295  default:
296  if( !isIdChar[*z] ){
297  break;
298  }
299  for(i=1; isIdChar[z[i]]; i++){}
300  *tokenType = sqliteKeywordCode(z, i);
301  if( *tokenType == TK_ID && z[i] == '`' ) *skip = 1;
302  return i;
303  }
304  *tokenType = TK_ILLEGAL;
305  return 1;
306 }
static const WCHAR fromW[]
Definition: tokenize.c:53
const WCHAR * name
Definition: tokenize.c:35
static const WCHAR addW[]
Definition: tokenize.c:42
static const WCHAR likeW[]
Definition: tokenize.c:61
static const WCHAR whereW[]
Definition: tokenize.c:78
Definition: sql.tab.c:181
static const WCHAR byW[]
Definition: tokenize.c:45
static const WCHAR alterW[]
Definition: tokenize.c:43
WCHAR * name
Definition: path.c:43
int sqliteGetToken(const WCHAR *z, int *tokenType, int *skip)
Definition: tokenize.c:209
static int isDigit(WCHAR c)
Definition: tokenize.c:191
static const WCHAR holdW[]
Definition: tokenize.c:54
GLdouble GLdouble GLdouble r
Definition: gl.h:2055
static int sqliteKeywordCode(const WCHAR *z, int n)
Definition: tokenize.c:142
#define MAX_TOKEN_LEN
Definition: tokenize.c:40
GLdouble n
Definition: glext.h:7729
static const WCHAR orderW[]
Definition: tokenize.c:69
static const WCHAR intoW[]
Definition: tokenize.c:58
static const WCHAR charW[]
Definition: tokenize.c:46
static const WCHAR objectW[]
Definition: tokenize.c:67
static const WCHAR dropW[]
Definition: tokenize.c:51
Definition: sql.tab.c:197
static const WCHAR orW[]
Definition: tokenize.c:68
Definition: sql.tab.c:193
static const WCHAR deleteW[]
Definition: tokenize.c:49
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
unsigned int len
Definition: tokenize.c:36
GLdouble GLdouble z
Definition: glext.h:5874
static const WCHAR andW[]
Definition: tokenize.c:44
static int compKeyword(const void *m1, const void *m2)
Definition: tokenize.c:127
static const WCHAR setW[]
Definition: tokenize.c:72
Definition: sql.tab.c:170
Definition: sql.tab.c:207
Definition: sql.tab.c:182
Definition: sql.tab.c:185
static int isSpace(WCHAR c)
Definition: tokenize.c:199
static const WCHAR temporaryW[]
Definition: tokenize.c:75
static const WCHAR nullW[]
Definition: tokenize.c:66
__wchar_t WCHAR
Definition: xmlstorage.h:180
static const WCHAR localizableW[]
Definition: tokenize.c:62
static const WCHAR keyW[]
Definition: tokenize.c:60
static const WCHAR selectW[]
Definition: tokenize.c:71
static const WCHAR updateW[]
Definition: tokenize.c:76
Definition: sql.tab.c:204
const GLubyte * c
Definition: glext.h:8905
Definition: sql.tab.c:196
static const WCHAR integerW[]
Definition: tokenize.c:57
static const WCHAR characterW[]
Definition: tokenize.c:47
static const WCHAR tableW[]
Definition: tokenize.c:74
static const WCHAR isW[]
Definition: tokenize.c:59
int ret
int tokenType
Definition: tokenize.c:37
HKEY key
Definition: reg.c:42
Definition: sql.tab.c:200
GLenum GLsizei len
Definition: glext.h:6722
static const WCHAR shortW[]
Definition: tokenize.c:73
static const WCHAR insertW[]
Definition: tokenize.c:55
static const WCHAR intW[]
Definition: tokenize.c:56
#define ARRAY_SIZE(a)
Definition: main.h:24
static const WCHAR primaryW[]
Definition: tokenize.c:70
#define min(a, b)
Definition: monoChain.cc:55
static const WCHAR notW[]
Definition: tokenize.c:65
static const WCHAR longcharW[]
Definition: tokenize.c:64
Definition: sql.tab.c:178
static const Keyword aKeywordTable[]
Definition: tokenize.c:84
#define skip(...)
Definition: atltest.h:64
static const char isIdChar[]
Definition: tokenize.c:168
static const WCHAR longW[]
Definition: tokenize.c:63
#define memicmpW(s1, s2, n)
Definition: unicode.h:27
static const WCHAR distinctW[]
Definition: tokenize.c:50
static const WCHAR freeW[]
Definition: tokenize.c:52
static const WCHAR createW[]
Definition: tokenize.c:48
Definition: sql.tab.c:191
static const WCHAR valuesW[]
Definition: tokenize.c:77
Definition: path.c:41
#define bsearch