ReactOS 0.4.15-dev-7924-g5949c20
tokenize.c
Go to the documentation of this file.
1/*
2** 2001 September 15
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11*************************************************************************
12** A tokenizer for SQL
13**
14** This file contains C code that splits an SQL input string up into
15** individual tokens and sends those tokens one-by-one over to the
16** parser for analysis.
17*/
18
19#include <ctype.h>
20#include <stdarg.h>
21#include <stdlib.h>
22
23#include "windef.h"
24#include "winbase.h"
25#include "query.h"
26#include "sql.tab.h"
27
28/*
29** All the keywords of the SQL language are stored as in a hash
30** table composed of instances of the following structure.
31*/
32typedef struct Keyword Keyword;
33struct Keyword {
34 const WCHAR *name; /* The keyword name */
35 unsigned int len;
36 int tokenType; /* The token value for this keyword */
37};
38
39#define MAX_TOKEN_LEN 11
40
41/*
42** These are the keywords
43** They MUST be in alphabetical order
44*/
45#define X(str) str, ARRAY_SIZE(str) - 1
46static const Keyword aKeywordTable[] = {
47 { X(L"ADD"), TK_ADD },
48 { X(L"ALTER"), TK_ALTER },
49 { X(L"AND"), TK_AND },
50 { X(L"BY"), TK_BY },
51 { X(L"CHAR"), TK_CHAR },
52 { X(L"CHARACTER"), TK_CHAR },
53 { X(L"CREATE"), TK_CREATE },
54 { X(L"DELETE"), TK_DELETE },
55 { X(L"DISTINCT"), TK_DISTINCT },
56 { X(L"DROP"), TK_DROP },
57 { X(L"FREE"), TK_FREE },
58 { X(L"FROM"), TK_FROM },
59 { X(L"HOLD"), TK_HOLD },
60 { X(L"INSERT"), TK_INSERT },
61 { X(L"INT"), TK_INT },
62 { X(L"INTEGER"), TK_INT },
63 { X(L"INTO"), TK_INTO },
64 { X(L"IS"), TK_IS },
65 { X(L"KEY"), TK_KEY },
66 { X(L"LIKE"), TK_LIKE },
67 { X(L"LOCALIZABLE"), TK_LOCALIZABLE },
68 { X(L"LONG"), TK_LONG },
69 { X(L"LONGCHAR"), TK_LONGCHAR },
70 { X(L"NOT"), TK_NOT },
71 { X(L"NULL"), TK_NULL },
72 { X(L"OBJECT"), TK_OBJECT },
73 { X(L"OR"), TK_OR },
74 { X(L"ORDER"), TK_ORDER },
75 { X(L"PRIMARY"), TK_PRIMARY },
76 { X(L"SELECT"), TK_SELECT },
77 { X(L"SET"), TK_SET },
78 { X(L"SHORT"), TK_SHORT },
79 { X(L"TABLE"), TK_TABLE },
80 { X(L"TEMPORARY"), TK_TEMPORARY },
81 { X(L"UPDATE"), TK_UPDATE },
82 { X(L"VALUES"), TK_VALUES },
83 { X(L"WHERE"), TK_WHERE },
84};
85#undef X
86
87/*
88** Comparison function for binary search.
89*/
90static int __cdecl compKeyword(const void *m1, const void *m2){
91 const Keyword *k1 = m1, *k2 = m2;
92 int ret, len = min( k1->len, k2->len );
93
94 if ((ret = wcsnicmp( k1->name, k2->name, len ))) return ret;
95 if (k1->len < k2->len) return -1;
96 else if (k1->len > k2->len) return 1;
97 return 0;
98}
99
100/*
101** This function looks up an identifier to determine if it is a
102** keyword. If it is a keyword, the token code of that keyword is
103** returned. If the input is not a keyword, TK_ID is returned.
104*/
105static int sqliteKeywordCode(const WCHAR *z, int n){
106 Keyword key, *r;
107
108 if( n>MAX_TOKEN_LEN )
109 return TK_ID;
110
111 key.tokenType = 0;
112 key.name = z;
113 key.len = n;
115 if( r )
116 return r->tokenType;
117 return TK_ID;
118}
119
120
121/*
122** If X is a character that can be used in an identifier then
123** isIdChar[X] will be 1. Otherwise isIdChar[X] will be 0.
124**
125** In this implementation, an identifier can be a string of
126** alphabetic characters, digits, and "_" plus any character
127** with the high-order bit set. The latter rule means that
128** any sequence of UTF-8 characters or characters taken from
129** an extended ISO8859 character set can form an identifier.
130*/
131static const char isIdChar[] = {
132/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
133 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
134 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
135 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, /* 2x */
136 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
137 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
138 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
139 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 8x */
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 9x */
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ax */
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Bx */
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Cx */
146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Dx */
147 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ex */
148 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Fx */
149};
150
151/*
152** WCHAR safe version of isdigit()
153*/
154static inline int isDigit(WCHAR c)
155{
156 return c >= '0' && c <= '9';
157}
158
159/*
160** WCHAR safe version of isspace(), except '\r'
161*/
162static inline int isSpace(WCHAR c)
163{
164 return c == ' ' || c == '\t' || c == '\n' || c == '\f';
165}
166
167/*
168** Return the length of the token that begins at z[0]. Return
169** -1 if the token is (or might be) incomplete. Store the token
170** type in *tokenType before returning.
171*/
172int sqliteGetToken(const WCHAR *z, int *tokenType, int *skip){
173 int i;
174
175 *skip = 0;
176 switch( *z ){
177 case ' ': case '\t': case '\n': case '\f':
178 for(i=1; isSpace(z[i]); i++){}
179 *tokenType = TK_SPACE;
180 return i;
181 case '-':
182 if( z[1]==0 ) return -1;
183 *tokenType = TK_MINUS;
184 return 1;
185 case '(':
186 *tokenType = TK_LP;
187 return 1;
188 case ')':
189 *tokenType = TK_RP;
190 return 1;
191 case '*':
192 *tokenType = TK_STAR;
193 return 1;
194 case '=':
195 *tokenType = TK_EQ;
196 return 1;
197 case '<':
198 if( z[1]=='=' ){
199 *tokenType = TK_LE;
200 return 2;
201 }else if( z[1]=='>' ){
202 *tokenType = TK_NE;
203 return 2;
204 }else{
205 *tokenType = TK_LT;
206 return 1;
207 }
208 case '>':
209 if( z[1]=='=' ){
210 *tokenType = TK_GE;
211 return 2;
212 }else{
213 *tokenType = TK_GT;
214 return 1;
215 }
216 case '!':
217 if( z[1]!='=' ){
218 *tokenType = TK_ILLEGAL;
219 return 2;
220 }else{
221 *tokenType = TK_NE;
222 return 2;
223 }
224 case '?':
225 *tokenType = TK_WILDCARD;
226 return 1;
227 case ',':
228 *tokenType = TK_COMMA;
229 return 1;
230 case '`': case '\'': {
231 int delim = z[0];
232 for(i=1; z[i]; i++){
233 if( z[i]==delim )
234 break;
235 }
236 if( z[i] ) i++;
237 if( delim == '`' )
238 *tokenType = TK_ID;
239 else
240 *tokenType = TK_STRING;
241 return i;
242 }
243 case '.':
244 if( !isDigit(z[1]) ){
245 *tokenType = TK_DOT;
246 return 1;
247 }
248 /* Fall through */
249 case '0': case '1': case '2': case '3': case '4':
250 case '5': case '6': case '7': case '8': case '9':
251 *tokenType = TK_INTEGER;
252 for(i=1; isDigit(z[i]); i++){}
253 return i;
254 case '[':
255 for(i=1; z[i] && z[i-1]!=']'; i++){}
256 *tokenType = TK_ID;
257 return i;
258 default:
259 if( !isIdChar[*z] ){
260 break;
261 }
262 for(i=1; isIdChar[z[i]]; i++){}
263 *tokenType = sqliteKeywordCode(z, i);
264 if( *tokenType == TK_ID && z[i] == '`' ) *skip = 1;
265 return i;
266 }
267 *tokenType = TK_ILLEGAL;
268 return 1;
269}
#define __cdecl
Definition: accygwin.h:79
#define skip(...)
Definition: atltest.h:64
#define ARRAY_SIZE(A)
Definition: main.h:33
#define wcsnicmp
Definition: compat.h:14
GLdouble GLdouble GLdouble r
Definition: gl.h:2055
GLdouble n
Definition: glext.h:7729
const GLubyte * c
Definition: glext.h:8905
GLenum GLsizei len
Definition: glext.h:6722
GLdouble GLdouble z
Definition: glext.h:5874
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
#define min(a, b)
Definition: monoChain.cc:55
#define L(x)
Definition: ntvdm.h:50
const WCHAR * name
Definition: tokenize.c:34
unsigned int len
Definition: tokenize.c:35
int tokenType
Definition: tokenize.c:36
Definition: copy.c:22
WCHAR * name
Definition: path.c:43
#define bsearch
#define MAX_TOKEN_LEN
Definition: tokenize.c:39
static int sqliteKeywordCode(const WCHAR *z, int n)
Definition: tokenize.c:105
int sqliteGetToken(const WCHAR *z, int *tokenType, int *skip)
Definition: tokenize.c:172
static const Keyword aKeywordTable[]
Definition: tokenize.c:46
static const char isIdChar[]
Definition: tokenize.c:131
static int isDigit(WCHAR c)
Definition: tokenize.c:154
static int isSpace(WCHAR c)
Definition: tokenize.c:162
#define X(str)
Definition: tokenize.c:45
static int __cdecl compKeyword(const void *m1, const void *m2)
Definition: tokenize.c:90
int ret
__wchar_t WCHAR
Definition: xmlstorage.h:180