ReactOS Fundraising Campaign 2012
 
€ 4,410 / € 30,000

Information | Donate

Home | Info | Community | Development | myReactOS | Contact Us

  1. Home
  2. Community
  3. Development
  4. myReactOS
  5. Fundraiser 2012

  1. Main Page
  2. Alphabetical List
  3. Data Structures
  4. Directories
  5. File List
  6. Data Fields
  7. Globals
  8. Related Pages

ReactOS Development > Doxygen

tokenize.c
Go to the documentation of this file.
00001 /*
00002 ** 2001 September 15
00003 **
00004 ** The author disclaims copyright to this source code.  In place of
00005 ** a legal notice, here is a blessing:
00006 **
00007 **    May you do good and not evil.
00008 **    May you find forgiveness for yourself and forgive others.
00009 **    May you share freely, never taking more than you give.
00010 **
00011 *************************************************************************
00012 ** A tokenizer for SQL
00013 **
00014 ** This file contains C code that splits an SQL input string up into
00015 ** individual tokens and sends those tokens one-by-one over to the
00016 ** parser for analysis.
00017 */
00018 
00019 #include <ctype.h>
00020 #include <stdarg.h>
00021 #include <stdlib.h>
00022 
00023 #include "windef.h"
00024 #include "winbase.h"
00025 #include "wine/unicode.h"
00026 #include "query.h"
00027 #include "sql.tab.h"
00028 
00029 /*
00030 ** All the keywords of the SQL language are stored as in a hash
00031 ** table composed of instances of the following structure.
00032 */
00033 typedef struct Keyword Keyword;
00034 struct Keyword {
00035   const WCHAR *zName;             /* The keyword name */
00036   int tokenType;           /* The token value for this keyword */
00037 };
00038 
00039 #define MAX_TOKEN_LEN 11
00040 
00041 static const WCHAR ADD_W[] = { 'A','D','D',0 };
00042 static const WCHAR ALTER_W[] = { 'A','L','T','E','R',0 };
00043 static const WCHAR AND_W[] = { 'A','N','D',0 };
00044 static const WCHAR BY_W[] = { 'B','Y',0 };
00045 static const WCHAR CHAR_W[] = { 'C','H','A','R',0 };
00046 static const WCHAR CHARACTER_W[] = { 'C','H','A','R','A','C','T','E','R',0 };
00047 static const WCHAR CREATE_W[] = { 'C','R','E','A','T','E',0 };
00048 static const WCHAR DELETE_W[] = { 'D','E','L','E','T','E',0 };
00049 static const WCHAR DISTINCT_W[] = { 'D','I','S','T','I','N','C','T',0 };
00050 static const WCHAR DROP_W[] = { 'D','R','O','P',0 };
00051 static const WCHAR FREE_W[] = { 'F','R','E','E',0 };
00052 static const WCHAR FROM_W[] = { 'F','R','O','M',0 };
00053 static const WCHAR HOLD_W[] = { 'H','O','L','D',0 };
00054 static const WCHAR INSERT_W[] = { 'I','N','S','E','R','T',0 };
00055 static const WCHAR INT_W[] = { 'I','N','T',0 };
00056 static const WCHAR INTEGER_W[] = { 'I','N','T','E','G','E','R',0 };
00057 static const WCHAR INTO_W[] = { 'I','N','T','O',0 };
00058 static const WCHAR IS_W[] = { 'I','S',0 };
00059 static const WCHAR KEY_W[] = { 'K','E','Y',0 };
00060 static const WCHAR LIKE_W[] = { 'L','I','K','E',0 };
00061 static const WCHAR LOCALIZABLE_W[] = { 'L','O','C','A','L','I','Z','A','B','L','E',0 };
00062 static const WCHAR LONG_W[] = { 'L','O','N','G',0 };
00063 static const WCHAR LONGCHAR_W[] = { 'L','O','N','G','C','H','A','R',0 };
00064 static const WCHAR NOT_W[] = { 'N','O','T',0 };
00065 static const WCHAR NULL_W[] = { 'N','U','L','L',0 };
00066 static const WCHAR OBJECT_W[] = { 'O','B','J','E','C','T',0 };
00067 static const WCHAR OR_W[] = { 'O','R',0 };
00068 static const WCHAR ORDER_W[] = { 'O','R','D','E','R',0 };
00069 static const WCHAR PRIMARY_W[] = { 'P','R','I','M','A','R','Y',0 };
00070 static const WCHAR SELECT_W[] = { 'S','E','L','E','C','T',0 };
00071 static const WCHAR SET_W[] = { 'S','E','T',0 };
00072 static const WCHAR SHORT_W[] = { 'S','H','O','R','T',0 };
00073 static const WCHAR TABLE_W[] = { 'T','A','B','L','E',0 };
00074 static const WCHAR TEMPORARY_W[] = { 'T','E','M','P','O','R','A','R','Y',0 };
00075 static const WCHAR UPDATE_W[] = { 'U','P','D','A','T','E',0 };
00076 static const WCHAR VALUES_W[] = { 'V','A','L','U','E','S',0 };
00077 static const WCHAR WHERE_W[] = { 'W','H','E','R','E',0 };
00078 
00079 /*
00080 ** These are the keywords
00081 ** They MUST be in alphabetical order
00082 */
00083 static const Keyword aKeywordTable[] = {
00084   { ADD_W, TK_ADD },
00085   { ALTER_W, TK_ALTER },
00086   { AND_W, TK_AND },
00087   { BY_W, TK_BY },
00088   { CHAR_W, TK_CHAR },
00089   { CHARACTER_W, TK_CHAR },
00090   { CREATE_W, TK_CREATE },
00091   { DELETE_W, TK_DELETE },
00092   { DISTINCT_W, TK_DISTINCT },
00093   { DROP_W, TK_DROP },
00094   { FREE_W, TK_FREE },
00095   { FROM_W, TK_FROM },
00096   { HOLD_W, TK_HOLD },
00097   { INSERT_W, TK_INSERT },
00098   { INT_W, TK_INT },
00099   { INTEGER_W, TK_INT },
00100   { INTO_W, TK_INTO },
00101   { IS_W, TK_IS },
00102   { KEY_W, TK_KEY },
00103   { LIKE_W, TK_LIKE },
00104   { LOCALIZABLE_W, TK_LOCALIZABLE },
00105   { LONG_W, TK_LONG },
00106   { LONGCHAR_W, TK_LONGCHAR },
00107   { NOT_W, TK_NOT },
00108   { NULL_W, TK_NULL },
00109   { OBJECT_W, TK_OBJECT },
00110   { OR_W, TK_OR },
00111   { ORDER_W, TK_ORDER },
00112   { PRIMARY_W, TK_PRIMARY },
00113   { SELECT_W, TK_SELECT },
00114   { SET_W, TK_SET },
00115   { SHORT_W, TK_SHORT },
00116   { TABLE_W, TK_TABLE },
00117   { TEMPORARY_W, TK_TEMPORARY },
00118   { UPDATE_W, TK_UPDATE },
00119   { VALUES_W, TK_VALUES },
00120   { WHERE_W, TK_WHERE },
00121 };
00122 
00123 #define KEYWORD_COUNT ( sizeof aKeywordTable/sizeof (Keyword) )
00124 
00125 /*
00126 ** Comparison function for binary search.
00127 */
00128 static int compKeyword(const void *m1, const void *m2){
00129   const Keyword *k1 = m1, *k2 = m2;
00130 
00131   return strcmpiW( k1->zName, k2->zName );
00132 }
00133 
00134 /*
00135 ** This function looks up an identifier to determine if it is a
00136 ** keyword.  If it is a keyword, the token code of that keyword is 
00137 ** returned.  If the input is not a keyword, TK_ID is returned.
00138 */
00139 static int sqliteKeywordCode(const WCHAR *z, int n){
00140   WCHAR str[MAX_TOKEN_LEN+1];
00141   Keyword key, *r;
00142 
00143   if( n>MAX_TOKEN_LEN )
00144     return TK_ID;
00145 
00146   memcpy( str, z, n*sizeof (WCHAR) );
00147   str[n] = 0;
00148   key.tokenType = 0;
00149   key.zName = str;
00150   r = bsearch( &key, aKeywordTable, KEYWORD_COUNT, sizeof (Keyword), compKeyword );
00151   if( r )
00152     return r->tokenType;
00153   return TK_ID;
00154 }
00155 
00156 
00157 /*
00158 ** If X is a character that can be used in an identifier then
00159 ** isIdChar[X] will be 1.  Otherwise isIdChar[X] will be 0.
00160 **
00161 ** In this implementation, an identifier can be a string of
00162 ** alphabetic characters, digits, and "_" plus any character
00163 ** with the high-order bit set.  The latter rule means that
00164 ** any sequence of UTF-8 characters or characters taken from
00165 ** an extended ISO8859 character set can form an identifier.
00166 */
00167 static const char isIdChar[] = {
00168 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
00169     0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 0x */
00170     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 1x */
00171     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,  /* 2x */
00172     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
00173     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
00174     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
00175     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
00176     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
00177     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 8x */
00178     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 9x */
00179     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* Ax */
00180     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* Bx */
00181     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* Cx */
00182     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* Dx */
00183     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* Ex */
00184     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* Fx */
00185 };
00186 
00187 
00188 /*
00189 ** Return the length of the token that begins at z[0].  Return
00190 ** -1 if the token is (or might be) incomplete.  Store the token
00191 ** type in *tokenType before returning.
00192 */
00193 int sqliteGetToken(const WCHAR *z, int *tokenType, int *skip){
00194   int i;
00195 
00196   *skip = 0;
00197   switch( *z ){
00198     case ' ': case '\t': case '\n': case '\f':
00199       for(i=1; isspace(z[i]) && z[i] != '\r'; i++){}
00200       *tokenType = TK_SPACE;
00201       return i;
00202     case '-':
00203       if( z[1]==0 ) return -1;
00204       *tokenType = TK_MINUS;
00205       return 1;
00206     case '(':
00207       *tokenType = TK_LP;
00208       return 1;
00209     case ')':
00210       *tokenType = TK_RP;
00211       return 1;
00212     case '*':
00213       *tokenType = TK_STAR;
00214       return 1;
00215     case '=':
00216       *tokenType = TK_EQ;
00217       return 1;
00218     case '<':
00219       if( z[1]=='=' ){
00220         *tokenType = TK_LE;
00221         return 2;
00222       }else if( z[1]=='>' ){
00223         *tokenType = TK_NE;
00224         return 2;
00225       }else{
00226         *tokenType = TK_LT;
00227         return 1;
00228       }
00229     case '>':
00230       if( z[1]=='=' ){
00231         *tokenType = TK_GE;
00232         return 2;
00233       }else{
00234         *tokenType = TK_GT;
00235         return 1;
00236       }
00237     case '!':
00238       if( z[1]!='=' ){
00239         *tokenType = TK_ILLEGAL;
00240         return 2;
00241       }else{
00242         *tokenType = TK_NE;
00243         return 2;
00244       }
00245     case '?':
00246       *tokenType = TK_WILDCARD;
00247       return 1;
00248     case ',':
00249       *tokenType = TK_COMMA;
00250       return 1;
00251     case '`': case '\'': {
00252       int delim = z[0];
00253       for(i=1; z[i]; i++){
00254         if( z[i]==delim )
00255           break;
00256       }
00257       if( z[i] ) i++;
00258       if( delim == '`' )
00259         *tokenType = TK_ID;
00260       else
00261         *tokenType = TK_STRING;
00262       return i;
00263     }
00264     case '.':
00265       if( !isdigit(z[1]) ){
00266         *tokenType = TK_DOT;
00267         return 1;
00268       }
00269       /* Fall thru into the next case */
00270     case '0': case '1': case '2': case '3': case '4':
00271     case '5': case '6': case '7': case '8': case '9':
00272       *tokenType = TK_INTEGER;
00273       for(i=1; isdigit(z[i]); i++){}
00274       return i;
00275     case '[':
00276       for(i=1; z[i] && z[i-1]!=']'; i++){}
00277       *tokenType = TK_ID;
00278       return i;
00279     default:
00280       if( !isIdChar[*z] ){
00281         break;
00282       }
00283       for(i=1; isIdChar[z[i]]; i++){}
00284       *tokenType = sqliteKeywordCode(z, i);
00285       if( *tokenType == TK_ID && z[i] == '`' ) *skip = 1;
00286       return i;
00287   }
00288   *tokenType = TK_ILLEGAL;
00289   return 1;
00290 }

Generated on Sun May 27 2012 04:25:20 for ReactOS by doxygen 1.7.6.1

ReactOS is a registered trademark or a trademark of ReactOS Foundation in the United States and other countries.