Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygentokenize.c
Go to the documentation of this file.
00001 /* 00002 ** 2001 September 15 00003 ** 00004 ** The author disclaims copyright to this source code. In place of 00005 ** a legal notice, here is a blessing: 00006 ** 00007 ** May you do good and not evil. 00008 ** May you find forgiveness for yourself and forgive others. 00009 ** May you share freely, never taking more than you give. 00010 ** 00011 ************************************************************************* 00012 ** A tokenizer for SQL 00013 ** 00014 ** This file contains C code that splits an SQL input string up into 00015 ** individual tokens and sends those tokens one-by-one over to the 00016 ** parser for analysis. 00017 */ 00018 00019 #include <ctype.h> 00020 #include <stdarg.h> 00021 #include <stdlib.h> 00022 00023 #include "windef.h" 00024 #include "winbase.h" 00025 #include "wine/unicode.h" 00026 #include "query.h" 00027 #include "sql.tab.h" 00028 00029 /* 00030 ** All the keywords of the SQL language are stored as in a hash 00031 ** table composed of instances of the following structure. 00032 */ 00033 typedef struct Keyword Keyword; 00034 struct Keyword { 00035 const WCHAR *zName; /* The keyword name */ 00036 int tokenType; /* The token value for this keyword */ 00037 }; 00038 00039 #define MAX_TOKEN_LEN 11 00040 00041 static const WCHAR ADD_W[] = { 'A','D','D',0 }; 00042 static const WCHAR ALTER_W[] = { 'A','L','T','E','R',0 }; 00043 static const WCHAR AND_W[] = { 'A','N','D',0 }; 00044 static const WCHAR BY_W[] = { 'B','Y',0 }; 00045 static const WCHAR CHAR_W[] = { 'C','H','A','R',0 }; 00046 static const WCHAR CHARACTER_W[] = { 'C','H','A','R','A','C','T','E','R',0 }; 00047 static const WCHAR CREATE_W[] = { 'C','R','E','A','T','E',0 }; 00048 static const WCHAR DELETE_W[] = { 'D','E','L','E','T','E',0 }; 00049 static const WCHAR DISTINCT_W[] = { 'D','I','S','T','I','N','C','T',0 }; 00050 static const WCHAR DROP_W[] = { 'D','R','O','P',0 }; 00051 static const WCHAR FREE_W[] = { 'F','R','E','E',0 }; 00052 static const WCHAR FROM_W[] = { 'F','R','O','M',0 }; 00053 static const WCHAR HOLD_W[] = { 'H','O','L','D',0 }; 00054 static const WCHAR INSERT_W[] = { 'I','N','S','E','R','T',0 }; 00055 static const WCHAR INT_W[] = { 'I','N','T',0 }; 00056 static const WCHAR INTEGER_W[] = { 'I','N','T','E','G','E','R',0 }; 00057 static const WCHAR INTO_W[] = { 'I','N','T','O',0 }; 00058 static const WCHAR IS_W[] = { 'I','S',0 }; 00059 static const WCHAR KEY_W[] = { 'K','E','Y',0 }; 00060 static const WCHAR LIKE_W[] = { 'L','I','K','E',0 }; 00061 static const WCHAR LOCALIZABLE_W[] = { 'L','O','C','A','L','I','Z','A','B','L','E',0 }; 00062 static const WCHAR LONG_W[] = { 'L','O','N','G',0 }; 00063 static const WCHAR LONGCHAR_W[] = { 'L','O','N','G','C','H','A','R',0 }; 00064 static const WCHAR NOT_W[] = { 'N','O','T',0 }; 00065 static const WCHAR NULL_W[] = { 'N','U','L','L',0 }; 00066 static const WCHAR OBJECT_W[] = { 'O','B','J','E','C','T',0 }; 00067 static const WCHAR OR_W[] = { 'O','R',0 }; 00068 static const WCHAR ORDER_W[] = { 'O','R','D','E','R',0 }; 00069 static const WCHAR PRIMARY_W[] = { 'P','R','I','M','A','R','Y',0 }; 00070 static const WCHAR SELECT_W[] = { 'S','E','L','E','C','T',0 }; 00071 static const WCHAR SET_W[] = { 'S','E','T',0 }; 00072 static const WCHAR SHORT_W[] = { 'S','H','O','R','T',0 }; 00073 static const WCHAR TABLE_W[] = { 'T','A','B','L','E',0 }; 00074 static const WCHAR TEMPORARY_W[] = { 'T','E','M','P','O','R','A','R','Y',0 }; 00075 static const WCHAR UPDATE_W[] = { 'U','P','D','A','T','E',0 }; 00076 static const WCHAR VALUES_W[] = { 'V','A','L','U','E','S',0 }; 00077 static const WCHAR WHERE_W[] = { 'W','H','E','R','E',0 }; 00078 00079 /* 00080 ** These are the keywords 00081 ** They MUST be in alphabetical order 00082 */ 00083 static const Keyword aKeywordTable[] = { 00084 { ADD_W, TK_ADD }, 00085 { ALTER_W, TK_ALTER }, 00086 { AND_W, TK_AND }, 00087 { BY_W, TK_BY }, 00088 { CHAR_W, TK_CHAR }, 00089 { CHARACTER_W, TK_CHAR }, 00090 { CREATE_W, TK_CREATE }, 00091 { DELETE_W, TK_DELETE }, 00092 { DISTINCT_W, TK_DISTINCT }, 00093 { DROP_W, TK_DROP }, 00094 { FREE_W, TK_FREE }, 00095 { FROM_W, TK_FROM }, 00096 { HOLD_W, TK_HOLD }, 00097 { INSERT_W, TK_INSERT }, 00098 { INT_W, TK_INT }, 00099 { INTEGER_W, TK_INT }, 00100 { INTO_W, TK_INTO }, 00101 { IS_W, TK_IS }, 00102 { KEY_W, TK_KEY }, 00103 { LIKE_W, TK_LIKE }, 00104 { LOCALIZABLE_W, TK_LOCALIZABLE }, 00105 { LONG_W, TK_LONG }, 00106 { LONGCHAR_W, TK_LONGCHAR }, 00107 { NOT_W, TK_NOT }, 00108 { NULL_W, TK_NULL }, 00109 { OBJECT_W, TK_OBJECT }, 00110 { OR_W, TK_OR }, 00111 { ORDER_W, TK_ORDER }, 00112 { PRIMARY_W, TK_PRIMARY }, 00113 { SELECT_W, TK_SELECT }, 00114 { SET_W, TK_SET }, 00115 { SHORT_W, TK_SHORT }, 00116 { TABLE_W, TK_TABLE }, 00117 { TEMPORARY_W, TK_TEMPORARY }, 00118 { UPDATE_W, TK_UPDATE }, 00119 { VALUES_W, TK_VALUES }, 00120 { WHERE_W, TK_WHERE }, 00121 }; 00122 00123 #define KEYWORD_COUNT ( sizeof aKeywordTable/sizeof (Keyword) ) 00124 00125 /* 00126 ** Comparison function for binary search. 00127 */ 00128 static int compKeyword(const void *m1, const void *m2){ 00129 const Keyword *k1 = m1, *k2 = m2; 00130 00131 return strcmpiW( k1->zName, k2->zName ); 00132 } 00133 00134 /* 00135 ** This function looks up an identifier to determine if it is a 00136 ** keyword. If it is a keyword, the token code of that keyword is 00137 ** returned. If the input is not a keyword, TK_ID is returned. 00138 */ 00139 static int sqliteKeywordCode(const WCHAR *z, int n){ 00140 WCHAR str[MAX_TOKEN_LEN+1]; 00141 Keyword key, *r; 00142 00143 if( n>MAX_TOKEN_LEN ) 00144 return TK_ID; 00145 00146 memcpy( str, z, n*sizeof (WCHAR) ); 00147 str[n] = 0; 00148 key.tokenType = 0; 00149 key.zName = str; 00150 r = bsearch( &key, aKeywordTable, KEYWORD_COUNT, sizeof (Keyword), compKeyword ); 00151 if( r ) 00152 return r->tokenType; 00153 return TK_ID; 00154 } 00155 00156 00157 /* 00158 ** If X is a character that can be used in an identifier then 00159 ** isIdChar[X] will be 1. Otherwise isIdChar[X] will be 0. 00160 ** 00161 ** In this implementation, an identifier can be a string of 00162 ** alphabetic characters, digits, and "_" plus any character 00163 ** with the high-order bit set. The latter rule means that 00164 ** any sequence of UTF-8 characters or characters taken from 00165 ** an extended ISO8859 character set can form an identifier. 00166 */ 00167 static const char isIdChar[] = { 00168 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ 00169 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ 00170 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ 00171 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, /* 2x */ 00172 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ 00173 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ 00174 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ 00175 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ 00176 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ 00177 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 8x */ 00178 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 9x */ 00179 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ax */ 00180 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Bx */ 00181 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Cx */ 00182 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Dx */ 00183 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ex */ 00184 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Fx */ 00185 }; 00186 00187 00188 /* 00189 ** Return the length of the token that begins at z[0]. Return 00190 ** -1 if the token is (or might be) incomplete. Store the token 00191 ** type in *tokenType before returning. 00192 */ 00193 int sqliteGetToken(const WCHAR *z, int *tokenType, int *skip){ 00194 int i; 00195 00196 *skip = 0; 00197 switch( *z ){ 00198 case ' ': case '\t': case '\n': case '\f': 00199 for(i=1; isspace(z[i]) && z[i] != '\r'; i++){} 00200 *tokenType = TK_SPACE; 00201 return i; 00202 case '-': 00203 if( z[1]==0 ) return -1; 00204 *tokenType = TK_MINUS; 00205 return 1; 00206 case '(': 00207 *tokenType = TK_LP; 00208 return 1; 00209 case ')': 00210 *tokenType = TK_RP; 00211 return 1; 00212 case '*': 00213 *tokenType = TK_STAR; 00214 return 1; 00215 case '=': 00216 *tokenType = TK_EQ; 00217 return 1; 00218 case '<': 00219 if( z[1]=='=' ){ 00220 *tokenType = TK_LE; 00221 return 2; 00222 }else if( z[1]=='>' ){ 00223 *tokenType = TK_NE; 00224 return 2; 00225 }else{ 00226 *tokenType = TK_LT; 00227 return 1; 00228 } 00229 case '>': 00230 if( z[1]=='=' ){ 00231 *tokenType = TK_GE; 00232 return 2; 00233 }else{ 00234 *tokenType = TK_GT; 00235 return 1; 00236 } 00237 case '!': 00238 if( z[1]!='=' ){ 00239 *tokenType = TK_ILLEGAL; 00240 return 2; 00241 }else{ 00242 *tokenType = TK_NE; 00243 return 2; 00244 } 00245 case '?': 00246 *tokenType = TK_WILDCARD; 00247 return 1; 00248 case ',': 00249 *tokenType = TK_COMMA; 00250 return 1; 00251 case '`': case '\'': { 00252 int delim = z[0]; 00253 for(i=1; z[i]; i++){ 00254 if( z[i]==delim ) 00255 break; 00256 } 00257 if( z[i] ) i++; 00258 if( delim == '`' ) 00259 *tokenType = TK_ID; 00260 else 00261 *tokenType = TK_STRING; 00262 return i; 00263 } 00264 case '.': 00265 if( !isdigit(z[1]) ){ 00266 *tokenType = TK_DOT; 00267 return 1; 00268 } 00269 /* Fall thru into the next case */ 00270 case '0': case '1': case '2': case '3': case '4': 00271 case '5': case '6': case '7': case '8': case '9': 00272 *tokenType = TK_INTEGER; 00273 for(i=1; isdigit(z[i]); i++){} 00274 return i; 00275 case '[': 00276 for(i=1; z[i] && z[i-1]!=']'; i++){} 00277 *tokenType = TK_ID; 00278 return i; 00279 default: 00280 if( !isIdChar[*z] ){ 00281 break; 00282 } 00283 for(i=1; isIdChar[z[i]]; i++){} 00284 *tokenType = sqliteKeywordCode(z, i); 00285 if( *tokenType == TK_ID && z[i] == '`' ) *skip = 1; 00286 return i; 00287 } 00288 *tokenType = TK_ILLEGAL; 00289 return 1; 00290 } Generated on Sun May 27 2012 04:25:20 for ReactOS by
1.7.6.1
|