ReactOS  0.4.15-dev-5640-g0dde428
tokenize.c
Go to the documentation of this file.
1 /*
2 ** 2001 September 15
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 ** A tokenizer for SQL
13 **
14 ** This file contains C code that splits an SQL input string up into
15 ** individual tokens and sends those tokens one-by-one over to the
16 ** parser for analysis.
17 */
18 
19 #include <ctype.h>
20 #include <stdarg.h>
21 #include <stdlib.h>
22 
23 #include "windef.h"
24 #include "winbase.h"
25 #include "query.h"
26 #include "sql.tab.h"
27 
28 /*
29 ** All the keywords of the SQL language are stored as in a hash
30 ** table composed of instances of the following structure.
31 */
32 typedef struct Keyword Keyword;
33 struct Keyword {
34  const WCHAR *name; /* The keyword name */
35  unsigned int len;
36  int tokenType; /* The token value for this keyword */
37 };
38 
39 #define MAX_TOKEN_LEN 11
40 
41 /*
42 ** These are the keywords
43 ** They MUST be in alphabetical order
44 */
45 #define X(str) str, ARRAY_SIZE(str) - 1
46 static const Keyword aKeywordTable[] = {
47  { X(L"ADD"), TK_ADD },
48  { X(L"ALTER"), TK_ALTER },
49  { X(L"AND"), TK_AND },
50  { X(L"BY"), TK_BY },
51  { X(L"CHAR"), TK_CHAR },
52  { X(L"CHARACTER"), TK_CHAR },
53  { X(L"CREATE"), TK_CREATE },
54  { X(L"DELETE"), TK_DELETE },
55  { X(L"DISTINCT"), TK_DISTINCT },
56  { X(L"DROP"), TK_DROP },
57  { X(L"FREE"), TK_FREE },
58  { X(L"FROM"), TK_FROM },
59  { X(L"HOLD"), TK_HOLD },
60  { X(L"INSERT"), TK_INSERT },
61  { X(L"INT"), TK_INT },
62  { X(L"INTEGER"), TK_INT },
63  { X(L"INTO"), TK_INTO },
64  { X(L"IS"), TK_IS },
65  { X(L"KEY"), TK_KEY },
66  { X(L"LIKE"), TK_LIKE },
67  { X(L"LOCALIZABLE"), TK_LOCALIZABLE },
68  { X(L"LONG"), TK_LONG },
69  { X(L"LONGCHAR"), TK_LONGCHAR },
70  { X(L"NOT"), TK_NOT },
71  { X(L"NULL"), TK_NULL },
72  { X(L"OBJECT"), TK_OBJECT },
73  { X(L"OR"), TK_OR },
74  { X(L"ORDER"), TK_ORDER },
75  { X(L"PRIMARY"), TK_PRIMARY },
76  { X(L"SELECT"), TK_SELECT },
77  { X(L"SET"), TK_SET },
78  { X(L"SHORT"), TK_SHORT },
79  { X(L"TABLE"), TK_TABLE },
80  { X(L"TEMPORARY"), TK_TEMPORARY },
81  { X(L"UPDATE"), TK_UPDATE },
82  { X(L"VALUES"), TK_VALUES },
83  { X(L"WHERE"), TK_WHERE },
84 };
85 #undef X
86 
87 /*
88 ** Comparison function for binary search.
89 */
90 static int __cdecl compKeyword(const void *m1, const void *m2){
91  const Keyword *k1 = m1, *k2 = m2;
92  int ret, len = min( k1->len, k2->len );
93 
94  if ((ret = wcsnicmp( k1->name, k2->name, len ))) return ret;
95  if (k1->len < k2->len) return -1;
96  else if (k1->len > k2->len) return 1;
97  return 0;
98 }
99 
100 /*
101 ** This function looks up an identifier to determine if it is a
102 ** keyword. If it is a keyword, the token code of that keyword is
103 ** returned. If the input is not a keyword, TK_ID is returned.
104 */
105 static int sqliteKeywordCode(const WCHAR *z, int n){
106  Keyword key, *r;
107 
108  if( n>MAX_TOKEN_LEN )
109  return TK_ID;
110 
111  key.tokenType = 0;
112  key.name = z;
113  key.len = n;
115  if( r )
116  return r->tokenType;
117  return TK_ID;
118 }
119 
120 
121 /*
122 ** If X is a character that can be used in an identifier then
123 ** isIdChar[X] will be 1. Otherwise isIdChar[X] will be 0.
124 **
125 ** In this implementation, an identifier can be a string of
126 ** alphabetic characters, digits, and "_" plus any character
127 ** with the high-order bit set. The latter rule means that
128 ** any sequence of UTF-8 characters or characters taken from
129 ** an extended ISO8859 character set can form an identifier.
130 */
131 static const char isIdChar[] = {
132 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
133  0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
134  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
135  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, /* 2x */
136  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
137  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
138  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
139  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
140  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
141  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 8x */
142  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 9x */
143  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ax */
144  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Bx */
145  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Cx */
146  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Dx */
147  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ex */
148  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Fx */
149 };
150 
151 /*
152 ** WCHAR safe version of isdigit()
153 */
154 static inline int isDigit(WCHAR c)
155 {
156  return c >= '0' && c <= '9';
157 }
158 
159 /*
160 ** WCHAR safe version of isspace(), except '\r'
161 */
162 static inline int isSpace(WCHAR c)
163 {
164  return c == ' ' || c == '\t' || c == '\n' || c == '\f';
165 }
166 
167 /*
168 ** Return the length of the token that begins at z[0]. Return
169 ** -1 if the token is (or might be) incomplete. Store the token
170 ** type in *tokenType before returning.
171 */
172 int sqliteGetToken(const WCHAR *z, int *tokenType, int *skip){
173  int i;
174 
175  *skip = 0;
176  switch( *z ){
177  case ' ': case '\t': case '\n': case '\f':
178  for(i=1; isSpace(z[i]); i++){}
179  *tokenType = TK_SPACE;
180  return i;
181  case '-':
182  if( z[1]==0 ) return -1;
183  *tokenType = TK_MINUS;
184  return 1;
185  case '(':
186  *tokenType = TK_LP;
187  return 1;
188  case ')':
189  *tokenType = TK_RP;
190  return 1;
191  case '*':
192  *tokenType = TK_STAR;
193  return 1;
194  case '=':
195  *tokenType = TK_EQ;
196  return 1;
197  case '<':
198  if( z[1]=='=' ){
199  *tokenType = TK_LE;
200  return 2;
201  }else if( z[1]=='>' ){
202  *tokenType = TK_NE;
203  return 2;
204  }else{
205  *tokenType = TK_LT;
206  return 1;
207  }
208  case '>':
209  if( z[1]=='=' ){
210  *tokenType = TK_GE;
211  return 2;
212  }else{
213  *tokenType = TK_GT;
214  return 1;
215  }
216  case '!':
217  if( z[1]!='=' ){
218  *tokenType = TK_ILLEGAL;
219  return 2;
220  }else{
221  *tokenType = TK_NE;
222  return 2;
223  }
224  case '?':
225  *tokenType = TK_WILDCARD;
226  return 1;
227  case ',':
228  *tokenType = TK_COMMA;
229  return 1;
230  case '`': case '\'': {
231  int delim = z[0];
232  for(i=1; z[i]; i++){
233  if( z[i]==delim )
234  break;
235  }
236  if( z[i] ) i++;
237  if( delim == '`' )
238  *tokenType = TK_ID;
239  else
240  *tokenType = TK_STRING;
241  return i;
242  }
243  case '.':
244  if( !isDigit(z[1]) ){
245  *tokenType = TK_DOT;
246  return 1;
247  }
248  /* Fall through */
249  case '0': case '1': case '2': case '3': case '4':
250  case '5': case '6': case '7': case '8': case '9':
251  *tokenType = TK_INTEGER;
252  for(i=1; isDigit(z[i]); i++){}
253  return i;
254  case '[':
255  for(i=1; z[i] && z[i-1]!=']'; i++){}
256  *tokenType = TK_ID;
257  return i;
258  default:
259  if( !isIdChar[*z] ){
260  break;
261  }
262  for(i=1; isIdChar[z[i]]; i++){}
263  *tokenType = sqliteKeywordCode(z, i);
264  if( *tokenType == TK_ID && z[i] == '`' ) *skip = 1;
265  return i;
266  }
267  *tokenType = TK_ILLEGAL;
268  return 1;
269 }
const WCHAR * name
Definition: tokenize.c:34
#define __cdecl
Definition: accygwin.h:79
WCHAR * name
Definition: path.c:43
int sqliteGetToken(const WCHAR *z, int *tokenType, int *skip)
Definition: tokenize.c:172
static int isDigit(WCHAR c)
Definition: tokenize.c:154
GLdouble GLdouble GLdouble r
Definition: gl.h:2055
static int sqliteKeywordCode(const WCHAR *z, int n)
Definition: tokenize.c:105
#define MAX_TOKEN_LEN
Definition: tokenize.c:39
GLdouble n
Definition: glext.h:7729
#define L(x)
Definition: ntvdm.h:50
unsigned int len
Definition: tokenize.c:35
GLdouble GLdouble z
Definition: glext.h:5874
static int isSpace(WCHAR c)
Definition: tokenize.c:162
__wchar_t WCHAR
Definition: xmlstorage.h:180
const GLubyte * c
Definition: glext.h:8905
int ret
#define wcsnicmp
Definition: compat.h:14
int tokenType
Definition: tokenize.c:36
HKEY key
Definition: reg.c:28
GLenum GLsizei len
Definition: glext.h:6722
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
#define ARRAY_SIZE(a)
Definition: main.h:24
#define min(a, b)
Definition: monoChain.cc:55
static const Keyword aKeywordTable[]
Definition: tokenize.c:46
#define X(str)
Definition: tokenize.c:45
#define skip(...)
Definition: atltest.h:64
static const char isIdChar[]
Definition: tokenize.c:131
static int __cdecl compKeyword(const void *m1, const void *m2)
Definition: tokenize.c:90
Definition: copy.c:22
#define bsearch