ReactOS Fundraising Campaign 2012
 
€ 4,410 / € 30,000

Information | Donate

Home | Info | Community | Development | myReactOS | Contact Us

  1. Home
  2. Community
  3. Development
  4. myReactOS
  5. Fundraiser 2012

  1. Main Page
  2. Alphabetical List
  3. Data Structures
  4. Directories
  5. File List
  6. Data Fields
  7. Globals
  8. Related Pages

ReactOS Development > Doxygen

xmltok_impl.c
Go to the documentation of this file.
00001 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
00002    See the file COPYING for copying permission.
00003 */
00004 
00005 #ifndef IS_INVALID_CHAR
00006 #define IS_INVALID_CHAR(enc, ptr, n) (0)
00007 #endif
00008 
00009 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
00010     case BT_LEAD ## n: \
00011       if (end - ptr < n) \
00012         return XML_TOK_PARTIAL_CHAR; \
00013       if (IS_INVALID_CHAR(enc, ptr, n)) { \
00014         *(nextTokPtr) = (ptr); \
00015         return XML_TOK_INVALID; \
00016       } \
00017       ptr += n; \
00018       break;
00019 
00020 #define INVALID_CASES(ptr, nextTokPtr) \
00021   INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
00022   INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
00023   INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
00024   case BT_NONXML: \
00025   case BT_MALFORM: \
00026   case BT_TRAIL: \
00027     *(nextTokPtr) = (ptr); \
00028     return XML_TOK_INVALID;
00029 
00030 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
00031    case BT_LEAD ## n: \
00032      if (end - ptr < n) \
00033        return XML_TOK_PARTIAL_CHAR; \
00034      if (!IS_NAME_CHAR(enc, ptr, n)) { \
00035        *nextTokPtr = ptr; \
00036        return XML_TOK_INVALID; \
00037      } \
00038      ptr += n; \
00039      break;
00040 
00041 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
00042   case BT_NONASCII: \
00043     if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
00044       *nextTokPtr = ptr; \
00045       return XML_TOK_INVALID; \
00046     } \
00047   case BT_NMSTRT: \
00048   case BT_HEX: \
00049   case BT_DIGIT: \
00050   case BT_NAME: \
00051   case BT_MINUS: \
00052     ptr += MINBPC(enc); \
00053     break; \
00054   CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
00055   CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
00056   CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
00057 
00058 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
00059    case BT_LEAD ## n: \
00060      if (end - ptr < n) \
00061        return XML_TOK_PARTIAL_CHAR; \
00062      if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
00063        *nextTokPtr = ptr; \
00064        return XML_TOK_INVALID; \
00065      } \
00066      ptr += n; \
00067      break;
00068 
00069 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
00070   case BT_NONASCII: \
00071     if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
00072       *nextTokPtr = ptr; \
00073       return XML_TOK_INVALID; \
00074     } \
00075   case BT_NMSTRT: \
00076   case BT_HEX: \
00077     ptr += MINBPC(enc); \
00078     break; \
00079   CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
00080   CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
00081   CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
00082 
00083 #ifndef PREFIX
00084 #define PREFIX(ident) ident
00085 #endif
00086 
00087 /* ptr points to character following "<!-" */
00088 
00089 static int PTRCALL
00090 PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
00091                     const char *end, const char **nextTokPtr)
00092 {
00093   if (ptr != end) {
00094     if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
00095       *nextTokPtr = ptr;
00096       return XML_TOK_INVALID;
00097     }
00098     ptr += MINBPC(enc);
00099     while (ptr != end) {
00100       switch (BYTE_TYPE(enc, ptr)) {
00101       INVALID_CASES(ptr, nextTokPtr)
00102       case BT_MINUS:
00103         if ((ptr += MINBPC(enc)) == end)
00104           return XML_TOK_PARTIAL;
00105         if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
00106           if ((ptr += MINBPC(enc)) == end)
00107             return XML_TOK_PARTIAL;
00108           if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00109             *nextTokPtr = ptr;
00110             return XML_TOK_INVALID;
00111           }
00112           *nextTokPtr = ptr + MINBPC(enc);
00113           return XML_TOK_COMMENT;
00114         }
00115         break;
00116       default:
00117         ptr += MINBPC(enc);
00118         break;
00119       }
00120     }
00121   }
00122   return XML_TOK_PARTIAL;
00123 }
00124 
00125 /* ptr points to character following "<!" */
00126 
00127 static int PTRCALL
00128 PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
00129                  const char *end, const char **nextTokPtr)
00130 {
00131   if (ptr == end)
00132     return XML_TOK_PARTIAL;
00133   switch (BYTE_TYPE(enc, ptr)) {
00134   case BT_MINUS:
00135     return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00136   case BT_LSQB:
00137     *nextTokPtr = ptr + MINBPC(enc);
00138     return XML_TOK_COND_SECT_OPEN;
00139   case BT_NMSTRT:
00140   case BT_HEX:
00141     ptr += MINBPC(enc);
00142     break;
00143   default:
00144     *nextTokPtr = ptr;
00145     return XML_TOK_INVALID;
00146   }
00147   while (ptr != end) {
00148     switch (BYTE_TYPE(enc, ptr)) {
00149     case BT_PERCNT:
00150       if (ptr + MINBPC(enc) == end)
00151         return XML_TOK_PARTIAL;
00152       /* don't allow <!ENTITY% foo "whatever"> */
00153       switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
00154       case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
00155         *nextTokPtr = ptr;
00156         return XML_TOK_INVALID;
00157       }
00158       /* fall through */
00159     case BT_S: case BT_CR: case BT_LF:
00160       *nextTokPtr = ptr;
00161       return XML_TOK_DECL_OPEN;
00162     case BT_NMSTRT:
00163     case BT_HEX:
00164       ptr += MINBPC(enc);
00165       break;
00166     default:
00167       *nextTokPtr = ptr;
00168       return XML_TOK_INVALID;
00169     }
00170   }
00171   return XML_TOK_PARTIAL;
00172 }
00173 
00174 static int PTRCALL
00175 PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
00176                       const char *end, int *tokPtr)
00177 {
00178   int upper = 0;
00179   *tokPtr = XML_TOK_PI;
00180   if (end - ptr != MINBPC(enc)*3)
00181     return 1;
00182   switch (BYTE_TO_ASCII(enc, ptr)) {
00183   case ASCII_x:
00184     break;
00185   case ASCII_X:
00186     upper = 1;
00187     break;
00188   default:
00189     return 1;
00190   }
00191   ptr += MINBPC(enc);
00192   switch (BYTE_TO_ASCII(enc, ptr)) {
00193   case ASCII_m:
00194     break;
00195   case ASCII_M:
00196     upper = 1;
00197     break;
00198   default:
00199     return 1;
00200   }
00201   ptr += MINBPC(enc);
00202   switch (BYTE_TO_ASCII(enc, ptr)) {
00203   case ASCII_l:
00204     break;
00205   case ASCII_L:
00206     upper = 1;
00207     break;
00208   default:
00209     return 1;
00210   }
00211   if (upper)
00212     return 0;
00213   *tokPtr = XML_TOK_XML_DECL;
00214   return 1;
00215 }
00216 
00217 /* ptr points to character following "<?" */
00218 
00219 static int PTRCALL
00220 PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
00221                const char *end, const char **nextTokPtr)
00222 {
00223   int tok;
00224   const char *target = ptr;
00225   if (ptr == end)
00226     return XML_TOK_PARTIAL;
00227   switch (BYTE_TYPE(enc, ptr)) {
00228   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00229   default:
00230     *nextTokPtr = ptr;
00231     return XML_TOK_INVALID;
00232   }
00233   while (ptr != end) {
00234     switch (BYTE_TYPE(enc, ptr)) {
00235     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00236     case BT_S: case BT_CR: case BT_LF:
00237       if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
00238         *nextTokPtr = ptr;
00239         return XML_TOK_INVALID;
00240       }
00241       ptr += MINBPC(enc);
00242       while (ptr != end) {
00243         switch (BYTE_TYPE(enc, ptr)) {
00244         INVALID_CASES(ptr, nextTokPtr)
00245         case BT_QUEST:
00246           ptr += MINBPC(enc);
00247           if (ptr == end)
00248             return XML_TOK_PARTIAL;
00249           if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00250             *nextTokPtr = ptr + MINBPC(enc);
00251             return tok;
00252           }
00253           break;
00254         default:
00255           ptr += MINBPC(enc);
00256           break;
00257         }
00258       }
00259       return XML_TOK_PARTIAL;
00260     case BT_QUEST:
00261       if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
00262         *nextTokPtr = ptr;
00263         return XML_TOK_INVALID;
00264       }
00265       ptr += MINBPC(enc);
00266       if (ptr == end)
00267         return XML_TOK_PARTIAL;
00268       if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00269         *nextTokPtr = ptr + MINBPC(enc);
00270         return tok;
00271       }
00272       /* fall through */
00273     default:
00274       *nextTokPtr = ptr;
00275       return XML_TOK_INVALID;
00276     }
00277   }
00278   return XML_TOK_PARTIAL;
00279 }
00280 
00281 static int PTRCALL
00282 PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
00283                          const char *end, const char **nextTokPtr)
00284 {
00285   static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
00286                                      ASCII_T, ASCII_A, ASCII_LSQB };
00287   int i;
00288   /* CDATA[ */
00289   if (end - ptr < 6 * MINBPC(enc))
00290     return XML_TOK_PARTIAL;
00291   for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
00292     if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
00293       *nextTokPtr = ptr;
00294       return XML_TOK_INVALID;
00295     }
00296   }
00297   *nextTokPtr = ptr;
00298   return XML_TOK_CDATA_SECT_OPEN;
00299 }
00300 
00301 static int PTRCALL
00302 PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
00303                         const char *end, const char **nextTokPtr)
00304 {
00305   if (ptr == end)
00306     return XML_TOK_NONE;
00307   if (MINBPC(enc) > 1) {
00308     size_t n = end - ptr;
00309     if (n & (MINBPC(enc) - 1)) {
00310       n &= ~(MINBPC(enc) - 1);
00311       if (n == 0)
00312         return XML_TOK_PARTIAL;
00313       end = ptr + n;
00314     }
00315   }
00316   switch (BYTE_TYPE(enc, ptr)) {
00317   case BT_RSQB:
00318     ptr += MINBPC(enc);
00319     if (ptr == end)
00320       return XML_TOK_PARTIAL;
00321     if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
00322       break;
00323     ptr += MINBPC(enc);
00324     if (ptr == end)
00325       return XML_TOK_PARTIAL;
00326     if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00327       ptr -= MINBPC(enc);
00328       break;
00329     }
00330     *nextTokPtr = ptr + MINBPC(enc);
00331     return XML_TOK_CDATA_SECT_CLOSE;
00332   case BT_CR:
00333     ptr += MINBPC(enc);
00334     if (ptr == end)
00335       return XML_TOK_PARTIAL;
00336     if (BYTE_TYPE(enc, ptr) == BT_LF)
00337       ptr += MINBPC(enc);
00338     *nextTokPtr = ptr;
00339     return XML_TOK_DATA_NEWLINE;
00340   case BT_LF:
00341     *nextTokPtr = ptr + MINBPC(enc);
00342     return XML_TOK_DATA_NEWLINE;
00343   INVALID_CASES(ptr, nextTokPtr)
00344   default:
00345     ptr += MINBPC(enc);
00346     break;
00347   }
00348   while (ptr != end) {
00349     switch (BYTE_TYPE(enc, ptr)) {
00350 #define LEAD_CASE(n) \
00351     case BT_LEAD ## n: \
00352       if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
00353         *nextTokPtr = ptr; \
00354         return XML_TOK_DATA_CHARS; \
00355       } \
00356       ptr += n; \
00357       break;
00358     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
00359 #undef LEAD_CASE
00360     case BT_NONXML:
00361     case BT_MALFORM:
00362     case BT_TRAIL:
00363     case BT_CR:
00364     case BT_LF:
00365     case BT_RSQB:
00366       *nextTokPtr = ptr;
00367       return XML_TOK_DATA_CHARS;
00368     default:
00369       ptr += MINBPC(enc);
00370       break;
00371     }
00372   }
00373   *nextTokPtr = ptr;
00374   return XML_TOK_DATA_CHARS;
00375 }
00376 
00377 /* ptr points to character following "</" */
00378 
00379 static int PTRCALL
00380 PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
00381                    const char *end, const char **nextTokPtr)
00382 {
00383   if (ptr == end)
00384     return XML_TOK_PARTIAL;
00385   switch (BYTE_TYPE(enc, ptr)) {
00386   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00387   default:
00388     *nextTokPtr = ptr;
00389     return XML_TOK_INVALID;
00390   }
00391   while (ptr != end) {
00392     switch (BYTE_TYPE(enc, ptr)) {
00393     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00394     case BT_S: case BT_CR: case BT_LF:
00395       for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
00396         switch (BYTE_TYPE(enc, ptr)) {
00397         case BT_S: case BT_CR: case BT_LF:
00398           break;
00399         case BT_GT:
00400           *nextTokPtr = ptr + MINBPC(enc);
00401           return XML_TOK_END_TAG;
00402         default:
00403           *nextTokPtr = ptr;
00404           return XML_TOK_INVALID;
00405         }
00406       }
00407       return XML_TOK_PARTIAL;
00408 #ifdef XML_NS
00409     case BT_COLON:
00410       /* no need to check qname syntax here,
00411          since end-tag must match exactly */
00412       ptr += MINBPC(enc);
00413       break;
00414 #endif
00415     case BT_GT:
00416       *nextTokPtr = ptr + MINBPC(enc);
00417       return XML_TOK_END_TAG;
00418     default:
00419       *nextTokPtr = ptr;
00420       return XML_TOK_INVALID;
00421     }
00422   }
00423   return XML_TOK_PARTIAL;
00424 }
00425 
00426 /* ptr points to character following "&#X" */
00427 
00428 static int PTRCALL
00429 PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
00430                        const char *end, const char **nextTokPtr)
00431 {
00432   if (ptr != end) {
00433     switch (BYTE_TYPE(enc, ptr)) {
00434     case BT_DIGIT:
00435     case BT_HEX:
00436       break;
00437     default:
00438       *nextTokPtr = ptr;
00439       return XML_TOK_INVALID;
00440     }
00441     for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
00442       switch (BYTE_TYPE(enc, ptr)) {
00443       case BT_DIGIT:
00444       case BT_HEX:
00445         break;
00446       case BT_SEMI:
00447         *nextTokPtr = ptr + MINBPC(enc);
00448         return XML_TOK_CHAR_REF;
00449       default:
00450         *nextTokPtr = ptr;
00451         return XML_TOK_INVALID;
00452       }
00453     }
00454   }
00455   return XML_TOK_PARTIAL;
00456 }
00457 
00458 /* ptr points to character following "&#" */
00459 
00460 static int PTRCALL
00461 PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
00462                     const char *end, const char **nextTokPtr)
00463 {
00464   if (ptr != end) {
00465     if (CHAR_MATCHES(enc, ptr, ASCII_x))
00466       return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00467     switch (BYTE_TYPE(enc, ptr)) {
00468     case BT_DIGIT:
00469       break;
00470     default:
00471       *nextTokPtr = ptr;
00472       return XML_TOK_INVALID;
00473     }
00474     for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
00475       switch (BYTE_TYPE(enc, ptr)) {
00476       case BT_DIGIT:
00477         break;
00478       case BT_SEMI:
00479         *nextTokPtr = ptr + MINBPC(enc);
00480         return XML_TOK_CHAR_REF;
00481       default:
00482         *nextTokPtr = ptr;
00483         return XML_TOK_INVALID;
00484       }
00485     }
00486   }
00487   return XML_TOK_PARTIAL;
00488 }
00489 
00490 /* ptr points to character following "&" */
00491 
00492 static int PTRCALL
00493 PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
00494                 const char **nextTokPtr)
00495 {
00496   if (ptr == end)
00497     return XML_TOK_PARTIAL;
00498   switch (BYTE_TYPE(enc, ptr)) {
00499   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00500   case BT_NUM:
00501     return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00502   default:
00503     *nextTokPtr = ptr;
00504     return XML_TOK_INVALID;
00505   }
00506   while (ptr != end) {
00507     switch (BYTE_TYPE(enc, ptr)) {
00508     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00509     case BT_SEMI:
00510       *nextTokPtr = ptr + MINBPC(enc);
00511       return XML_TOK_ENTITY_REF;
00512     default:
00513       *nextTokPtr = ptr;
00514       return XML_TOK_INVALID;
00515     }
00516   }
00517   return XML_TOK_PARTIAL;
00518 }
00519 
00520 /* ptr points to character following first character of attribute name */
00521 
00522 static int PTRCALL
00523 PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
00524                  const char **nextTokPtr)
00525 {
00526 #ifdef XML_NS
00527   int hadColon = 0;
00528 #endif
00529   while (ptr != end) {
00530     switch (BYTE_TYPE(enc, ptr)) {
00531     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00532 #ifdef XML_NS
00533     case BT_COLON:
00534       if (hadColon) {
00535         *nextTokPtr = ptr;
00536         return XML_TOK_INVALID;
00537       }
00538       hadColon = 1;
00539       ptr += MINBPC(enc);
00540       if (ptr == end)
00541         return XML_TOK_PARTIAL;
00542       switch (BYTE_TYPE(enc, ptr)) {
00543       CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00544       default:
00545         *nextTokPtr = ptr;
00546         return XML_TOK_INVALID;
00547       }
00548       break;
00549 #endif
00550     case BT_S: case BT_CR: case BT_LF:
00551       for (;;) {
00552         int t;
00553 
00554         ptr += MINBPC(enc);
00555         if (ptr == end)
00556           return XML_TOK_PARTIAL;
00557         t = BYTE_TYPE(enc, ptr);
00558         if (t == BT_EQUALS)
00559           break;
00560         switch (t) {
00561         case BT_S:
00562         case BT_LF:
00563         case BT_CR:
00564           break;
00565         default:
00566           *nextTokPtr = ptr;
00567           return XML_TOK_INVALID;
00568         }
00569       }
00570     /* fall through */
00571     case BT_EQUALS:
00572       {
00573         int open;
00574 #ifdef XML_NS
00575         hadColon = 0;
00576 #endif
00577         for (;;) {
00578           ptr += MINBPC(enc);
00579           if (ptr == end)
00580             return XML_TOK_PARTIAL;
00581           open = BYTE_TYPE(enc, ptr);
00582           if (open == BT_QUOT || open == BT_APOS)
00583             break;
00584           switch (open) {
00585           case BT_S:
00586           case BT_LF:
00587           case BT_CR:
00588             break;
00589           default:
00590             *nextTokPtr = ptr;
00591             return XML_TOK_INVALID;
00592           }
00593         }
00594         ptr += MINBPC(enc);
00595         /* in attribute value */
00596         for (;;) {
00597           int t;
00598           if (ptr == end)
00599             return XML_TOK_PARTIAL;
00600           t = BYTE_TYPE(enc, ptr);
00601           if (t == open)
00602             break;
00603           switch (t) {
00604           INVALID_CASES(ptr, nextTokPtr)
00605           case BT_AMP:
00606             {
00607               int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
00608               if (tok <= 0) {
00609                 if (tok == XML_TOK_INVALID)
00610                   *nextTokPtr = ptr;
00611                 return tok;
00612               }
00613               break;
00614             }
00615           case BT_LT:
00616             *nextTokPtr = ptr;
00617             return XML_TOK_INVALID;
00618           default:
00619             ptr += MINBPC(enc);
00620             break;
00621           }
00622         }
00623         ptr += MINBPC(enc);
00624         if (ptr == end)
00625           return XML_TOK_PARTIAL;
00626         switch (BYTE_TYPE(enc, ptr)) {
00627         case BT_S:
00628         case BT_CR:
00629         case BT_LF:
00630           break;
00631         case BT_SOL:
00632           goto sol;
00633         case BT_GT:
00634           goto gt;
00635         default:
00636           *nextTokPtr = ptr;
00637           return XML_TOK_INVALID;
00638         }
00639         /* ptr points to closing quote */
00640         for (;;) {
00641           ptr += MINBPC(enc);
00642           if (ptr == end)
00643             return XML_TOK_PARTIAL;
00644           switch (BYTE_TYPE(enc, ptr)) {
00645           CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00646           case BT_S: case BT_CR: case BT_LF:
00647             continue;
00648           case BT_GT:
00649           gt:
00650             *nextTokPtr = ptr + MINBPC(enc);
00651             return XML_TOK_START_TAG_WITH_ATTS;
00652           case BT_SOL:
00653           sol:
00654             ptr += MINBPC(enc);
00655             if (ptr == end)
00656               return XML_TOK_PARTIAL;
00657             if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00658               *nextTokPtr = ptr;
00659               return XML_TOK_INVALID;
00660             }
00661             *nextTokPtr = ptr + MINBPC(enc);
00662             return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
00663           default:
00664             *nextTokPtr = ptr;
00665             return XML_TOK_INVALID;
00666           }
00667           break;
00668         }
00669         break;
00670       }
00671     default:
00672       *nextTokPtr = ptr;
00673       return XML_TOK_INVALID;
00674     }
00675   }
00676   return XML_TOK_PARTIAL;
00677 }
00678 
00679 /* ptr points to character following "<" */
00680 
00681 static int PTRCALL
00682 PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
00683                const char **nextTokPtr)
00684 {
00685 #ifdef XML_NS
00686   int hadColon;
00687 #endif
00688   if (ptr == end)
00689     return XML_TOK_PARTIAL;
00690   switch (BYTE_TYPE(enc, ptr)) {
00691   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00692   case BT_EXCL:
00693     if ((ptr += MINBPC(enc)) == end)
00694       return XML_TOK_PARTIAL;
00695     switch (BYTE_TYPE(enc, ptr)) {
00696     case BT_MINUS:
00697       return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00698     case BT_LSQB:
00699       return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
00700                                       end, nextTokPtr);
00701     }
00702     *nextTokPtr = ptr;
00703     return XML_TOK_INVALID;
00704   case BT_QUEST:
00705     return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00706   case BT_SOL:
00707     return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00708   default:
00709     *nextTokPtr = ptr;
00710     return XML_TOK_INVALID;
00711   }
00712 #ifdef XML_NS
00713   hadColon = 0;
00714 #endif
00715   /* we have a start-tag */
00716   while (ptr != end) {
00717     switch (BYTE_TYPE(enc, ptr)) {
00718     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00719 #ifdef XML_NS
00720     case BT_COLON:
00721       if (hadColon) {
00722         *nextTokPtr = ptr;
00723         return XML_TOK_INVALID;
00724       }
00725       hadColon = 1;
00726       ptr += MINBPC(enc);
00727       if (ptr == end)
00728         return XML_TOK_PARTIAL;
00729       switch (BYTE_TYPE(enc, ptr)) {
00730       CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00731       default:
00732         *nextTokPtr = ptr;
00733         return XML_TOK_INVALID;
00734       }
00735       break;
00736 #endif
00737     case BT_S: case BT_CR: case BT_LF:
00738       {
00739         ptr += MINBPC(enc);
00740         while (ptr != end) {
00741           switch (BYTE_TYPE(enc, ptr)) {
00742           CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00743           case BT_GT:
00744             goto gt;
00745           case BT_SOL:
00746             goto sol;
00747           case BT_S: case BT_CR: case BT_LF:
00748             ptr += MINBPC(enc);
00749             continue;
00750           default:
00751             *nextTokPtr = ptr;
00752             return XML_TOK_INVALID;
00753           }
00754           return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
00755         }
00756         return XML_TOK_PARTIAL;
00757       }
00758     case BT_GT:
00759     gt:
00760       *nextTokPtr = ptr + MINBPC(enc);
00761       return XML_TOK_START_TAG_NO_ATTS;
00762     case BT_SOL:
00763     sol:
00764       ptr += MINBPC(enc);
00765       if (ptr == end)
00766         return XML_TOK_PARTIAL;
00767       if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00768         *nextTokPtr = ptr;
00769         return XML_TOK_INVALID;
00770       }
00771       *nextTokPtr = ptr + MINBPC(enc);
00772       return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
00773     default:
00774       *nextTokPtr = ptr;
00775       return XML_TOK_INVALID;
00776     }
00777   }
00778   return XML_TOK_PARTIAL;
00779 }
00780 
00781 static int PTRCALL
00782 PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
00783                    const char **nextTokPtr)
00784 {
00785   if (ptr == end)
00786     return XML_TOK_NONE;
00787   if (MINBPC(enc) > 1) {
00788     size_t n = end - ptr;
00789     if (n & (MINBPC(enc) - 1)) {
00790       n &= ~(MINBPC(enc) - 1);
00791       if (n == 0)
00792         return XML_TOK_PARTIAL;
00793       end = ptr + n;
00794     }
00795   }
00796   switch (BYTE_TYPE(enc, ptr)) {
00797   case BT_LT:
00798     return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00799   case BT_AMP:
00800     return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00801   case BT_CR:
00802     ptr += MINBPC(enc);
00803     if (ptr == end)
00804       return XML_TOK_TRAILING_CR;
00805     if (BYTE_TYPE(enc, ptr) == BT_LF)
00806       ptr += MINBPC(enc);
00807     *nextTokPtr = ptr;
00808     return XML_TOK_DATA_NEWLINE;
00809   case BT_LF:
00810     *nextTokPtr = ptr + MINBPC(enc);
00811     return XML_TOK_DATA_NEWLINE;
00812   case BT_RSQB:
00813     ptr += MINBPC(enc);
00814     if (ptr == end)
00815       return XML_TOK_TRAILING_RSQB;
00816     if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
00817       break;
00818     ptr += MINBPC(enc);
00819     if (ptr == end)
00820       return XML_TOK_TRAILING_RSQB;
00821     if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00822       ptr -= MINBPC(enc);
00823       break;
00824     }
00825     *nextTokPtr = ptr;
00826     return XML_TOK_INVALID;
00827   INVALID_CASES(ptr, nextTokPtr)
00828   default:
00829     ptr += MINBPC(enc);
00830     break;
00831   }
00832   while (ptr != end) {
00833     switch (BYTE_TYPE(enc, ptr)) {
00834 #define LEAD_CASE(n) \
00835     case BT_LEAD ## n: \
00836       if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
00837         *nextTokPtr = ptr; \
00838         return XML_TOK_DATA_CHARS; \
00839       } \
00840       ptr += n; \
00841       break;
00842     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
00843 #undef LEAD_CASE
00844     case BT_RSQB:
00845       if (ptr + MINBPC(enc) != end) {
00846          if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
00847            ptr += MINBPC(enc);
00848            break;
00849          }
00850          if (ptr + 2*MINBPC(enc) != end) {
00851            if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
00852              ptr += MINBPC(enc);
00853              break;
00854            }
00855            *nextTokPtr = ptr + 2*MINBPC(enc);
00856            return XML_TOK_INVALID;
00857          }
00858       }
00859       /* fall through */
00860     case BT_AMP:
00861     case BT_LT:
00862     case BT_NONXML:
00863     case BT_MALFORM:
00864     case BT_TRAIL:
00865     case BT_CR:
00866     case BT_LF:
00867       *nextTokPtr = ptr;
00868       return XML_TOK_DATA_CHARS;
00869     default:
00870       ptr += MINBPC(enc);
00871       break;
00872     }
00873   }
00874   *nextTokPtr = ptr;
00875   return XML_TOK_DATA_CHARS;
00876 }
00877 
00878 /* ptr points to character following "%" */
00879 
00880 static int PTRCALL
00881 PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
00882                     const char **nextTokPtr)
00883 {
00884   if (ptr == end)
00885     return -XML_TOK_PERCENT;
00886   switch (BYTE_TYPE(enc, ptr)) {
00887   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00888   case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
00889     *nextTokPtr = ptr;
00890     return XML_TOK_PERCENT;
00891   default:
00892     *nextTokPtr = ptr;
00893     return XML_TOK_INVALID;
00894   }
00895   while (ptr != end) {
00896     switch (BYTE_TYPE(enc, ptr)) {
00897     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00898     case BT_SEMI:
00899       *nextTokPtr = ptr + MINBPC(enc);
00900       return XML_TOK_PARAM_ENTITY_REF;
00901     default:
00902       *nextTokPtr = ptr;
00903       return XML_TOK_INVALID;
00904     }
00905   }
00906   return XML_TOK_PARTIAL;
00907 }
00908 
00909 static int PTRCALL
00910 PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
00911                       const char **nextTokPtr)
00912 {
00913   if (ptr == end)
00914     return XML_TOK_PARTIAL;
00915   switch (BYTE_TYPE(enc, ptr)) {
00916   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00917   default:
00918     *nextTokPtr = ptr;
00919     return XML_TOK_INVALID;
00920   }
00921   while (ptr != end) {
00922     switch (BYTE_TYPE(enc, ptr)) {
00923     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00924     case BT_CR: case BT_LF: case BT_S:
00925     case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
00926       *nextTokPtr = ptr;
00927       return XML_TOK_POUND_NAME;
00928     default:
00929       *nextTokPtr = ptr;
00930       return XML_TOK_INVALID;
00931     }
00932   }
00933   return -XML_TOK_POUND_NAME;
00934 }
00935 
00936 static int PTRCALL
00937 PREFIX(scanLit)(int open, const ENCODING *enc,
00938                 const char *ptr, const char *end,
00939                 const char **nextTokPtr)
00940 {
00941   while (ptr != end) {
00942     int t = BYTE_TYPE(enc, ptr);
00943     switch (t) {
00944     INVALID_CASES(ptr, nextTokPtr)
00945     case BT_QUOT:
00946     case BT_APOS:
00947       ptr += MINBPC(enc);
00948       if (t != open)
00949         break;
00950       if (ptr == end)
00951         return -XML_TOK_LITERAL;
00952       *nextTokPtr = ptr;
00953       switch (BYTE_TYPE(enc, ptr)) {
00954       case BT_S: case BT_CR: case BT_LF:
00955       case BT_GT: case BT_PERCNT: case BT_LSQB:
00956         return XML_TOK_LITERAL;
00957       default:
00958         return XML_TOK_INVALID;
00959       }
00960     default:
00961       ptr += MINBPC(enc);
00962       break;
00963     }
00964   }
00965   return XML_TOK_PARTIAL;
00966 }
00967 
00968 static int PTRCALL
00969 PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
00970                   const char **nextTokPtr)
00971 {
00972   int tok;
00973   if (ptr == end)
00974     return XML_TOK_NONE;
00975   if (MINBPC(enc) > 1) {
00976     size_t n = end - ptr;
00977     if (n & (MINBPC(enc) - 1)) {
00978       n &= ~(MINBPC(enc) - 1);
00979       if (n == 0)
00980         return XML_TOK_PARTIAL;
00981       end = ptr + n;
00982     }
00983   }
00984   switch (BYTE_TYPE(enc, ptr)) {
00985   case BT_QUOT:
00986     return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
00987   case BT_APOS:
00988     return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
00989   case BT_LT:
00990     {
00991       ptr += MINBPC(enc);
00992       if (ptr == end)
00993         return XML_TOK_PARTIAL;
00994       switch (BYTE_TYPE(enc, ptr)) {
00995       case BT_EXCL:
00996         return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00997       case BT_QUEST:
00998         return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00999       case BT_NMSTRT:
01000       case BT_HEX:
01001       case BT_NONASCII:
01002       case BT_LEAD2:
01003       case BT_LEAD3:
01004       case BT_LEAD4:
01005         *nextTokPtr = ptr - MINBPC(enc);
01006         return XML_TOK_INSTANCE_START;
01007       }
01008       *nextTokPtr = ptr;
01009       return XML_TOK_INVALID;
01010     }
01011   case BT_CR:
01012     if (ptr + MINBPC(enc) == end) {
01013       *nextTokPtr = end;
01014       /* indicate that this might be part of a CR/LF pair */
01015       return -XML_TOK_PROLOG_S;
01016     }
01017     /* fall through */
01018   case BT_S: case BT_LF:
01019     for (;;) {
01020       ptr += MINBPC(enc);
01021       if (ptr == end)
01022         break;
01023       switch (BYTE_TYPE(enc, ptr)) {
01024       case BT_S: case BT_LF:
01025         break;
01026       case BT_CR:
01027         /* don't split CR/LF pair */
01028         if (ptr + MINBPC(enc) != end)
01029           break;
01030         /* fall through */
01031       default:
01032         *nextTokPtr = ptr;
01033         return XML_TOK_PROLOG_S;
01034       }
01035     }
01036     *nextTokPtr = ptr;
01037     return XML_TOK_PROLOG_S;
01038   case BT_PERCNT:
01039     return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
01040   case BT_COMMA:
01041     *nextTokPtr = ptr + MINBPC(enc);
01042     return XML_TOK_COMMA;
01043   case BT_LSQB:
01044     *nextTokPtr = ptr + MINBPC(enc);
01045     return XML_TOK_OPEN_BRACKET;
01046   case BT_RSQB:
01047     ptr += MINBPC(enc);
01048     if (ptr == end)
01049       return -XML_TOK_CLOSE_BRACKET;
01050     if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
01051       if (ptr + MINBPC(enc) == end)
01052         return XML_TOK_PARTIAL;
01053       if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
01054         *nextTokPtr = ptr + 2*MINBPC(enc);
01055         return XML_TOK_COND_SECT_CLOSE;
01056       }
01057     }
01058     *nextTokPtr = ptr;
01059     return XML_TOK_CLOSE_BRACKET;
01060   case BT_LPAR:
01061     *nextTokPtr = ptr + MINBPC(enc);
01062     return XML_TOK_OPEN_PAREN;
01063   case BT_RPAR:
01064     ptr += MINBPC(enc);
01065     if (ptr == end)
01066       return -XML_TOK_CLOSE_PAREN;
01067     switch (BYTE_TYPE(enc, ptr)) {
01068     case BT_AST:
01069       *nextTokPtr = ptr + MINBPC(enc);
01070       return XML_TOK_CLOSE_PAREN_ASTERISK;
01071     case BT_QUEST:
01072       *nextTokPtr = ptr + MINBPC(enc);
01073       return XML_TOK_CLOSE_PAREN_QUESTION;
01074     case BT_PLUS:
01075       *nextTokPtr = ptr + MINBPC(enc);
01076       return XML_TOK_CLOSE_PAREN_PLUS;
01077     case BT_CR: case BT_LF: case BT_S:
01078     case BT_GT: case BT_COMMA: case BT_VERBAR:
01079     case BT_RPAR:
01080       *nextTokPtr = ptr;
01081       return XML_TOK_CLOSE_PAREN;
01082     }
01083     *nextTokPtr = ptr;
01084     return XML_TOK_INVALID;
01085   case BT_VERBAR:
01086     *nextTokPtr = ptr + MINBPC(enc);
01087     return XML_TOK_OR;
01088   case BT_GT:
01089     *nextTokPtr = ptr + MINBPC(enc);
01090     return XML_TOK_DECL_CLOSE;
01091   case BT_NUM:
01092     return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
01093 #define LEAD_CASE(n) \
01094   case BT_LEAD ## n: \
01095     if (end - ptr < n) \
01096       return XML_TOK_PARTIAL_CHAR; \
01097     if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
01098       ptr += n; \
01099       tok = XML_TOK_NAME; \
01100       break; \
01101     } \
01102     if (IS_NAME_CHAR(enc, ptr, n)) { \
01103       ptr += n; \
01104       tok = XML_TOK_NMTOKEN; \
01105       break; \
01106     } \
01107     *nextTokPtr = ptr; \
01108     return XML_TOK_INVALID;
01109     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
01110 #undef LEAD_CASE
01111   case BT_NMSTRT:
01112   case BT_HEX:
01113     tok = XML_TOK_NAME;
01114     ptr += MINBPC(enc);
01115     break;
01116   case BT_DIGIT:
01117   case BT_NAME:
01118   case BT_MINUS:
01119 #ifdef XML_NS
01120   case BT_COLON:
01121 #endif
01122     tok = XML_TOK_NMTOKEN;
01123     ptr += MINBPC(enc);
01124     break;
01125   case BT_NONASCII:
01126     if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
01127       ptr += MINBPC(enc);
01128       tok = XML_TOK_NAME;
01129       break;
01130     }
01131     if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
01132       ptr += MINBPC(enc);
01133       tok = XML_TOK_NMTOKEN;
01134       break;
01135     }
01136     /* fall through */
01137   default:
01138     *nextTokPtr = ptr;
01139     return XML_TOK_INVALID;
01140   }
01141   while (ptr != end) {
01142     switch (BYTE_TYPE(enc, ptr)) {
01143     CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
01144     case BT_GT: case BT_RPAR: case BT_COMMA:
01145     case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
01146     case BT_S: case BT_CR: case BT_LF:
01147       *nextTokPtr = ptr;
01148       return tok;
01149 #ifdef XML_NS
01150     case BT_COLON:
01151       ptr += MINBPC(enc);
01152       switch (tok) {
01153       case XML_TOK_NAME:
01154         if (ptr == end)
01155           return XML_TOK_PARTIAL;
01156         tok = XML_TOK_PREFIXED_NAME;
01157         switch (BYTE_TYPE(enc, ptr)) {
01158         CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
01159         default:
01160           tok = XML_TOK_NMTOKEN;
01161           break;
01162         }
01163         break;
01164       case XML_TOK_PREFIXED_NAME:
01165         tok = XML_TOK_NMTOKEN;
01166         break;
01167       }
01168       break;
01169 #endif
01170     case BT_PLUS:
01171       if (tok == XML_TOK_NMTOKEN)  {
01172         *nextTokPtr = ptr;
01173         return XML_TOK_INVALID;
01174       }
01175       *nextTokPtr = ptr + MINBPC(enc);
01176       return XML_TOK_NAME_PLUS;
01177     case BT_AST:
01178       if (tok == XML_TOK_NMTOKEN)  {
01179         *nextTokPtr = ptr;
01180         return XML_TOK_INVALID;
01181       }
01182       *nextTokPtr = ptr + MINBPC(enc);
01183       return XML_TOK_NAME_ASTERISK;
01184     case BT_QUEST:
01185       if (tok == XML_TOK_NMTOKEN)  {
01186         *nextTokPtr = ptr;
01187         return XML_TOK_INVALID;
01188       }
01189       *nextTokPtr = ptr + MINBPC(enc);
01190       return XML_TOK_NAME_QUESTION;
01191     default:
01192       *nextTokPtr = ptr;
01193       return XML_TOK_INVALID;
01194     }
01195   }
01196   return -tok;
01197 }
01198 
01199 static int PTRCALL
01200 PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
01201                           const char *end, const char **nextTokPtr)
01202 {
01203   const char *start;
01204   if (ptr == end)
01205     return XML_TOK_NONE;
01206   start = ptr;
01207   while (ptr != end) {
01208     switch (BYTE_TYPE(enc, ptr)) {
01209 #define LEAD_CASE(n) \
01210     case BT_LEAD ## n: ptr += n; break;
01211     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
01212 #undef LEAD_CASE
01213     case BT_AMP:
01214       if (ptr == start)
01215         return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
01216       *nextTokPtr = ptr;
01217       return XML_TOK_DATA_CHARS;
01218     case BT_LT:
01219       /* this is for inside entity references */
01220       *nextTokPtr = ptr;
01221       return XML_TOK_INVALID;
01222     case BT_LF:
01223       if (ptr == start) {
01224         *nextTokPtr = ptr + MINBPC(enc);
01225         return XML_TOK_DATA_NEWLINE;
01226       }
01227       *nextTokPtr = ptr;
01228       return XML_TOK_DATA_CHARS;
01229     case BT_CR:
01230       if (ptr == start) {
01231         ptr += MINBPC(enc);
01232         if (ptr == end)
01233           return XML_TOK_TRAILING_CR;
01234         if (BYTE_TYPE(enc, ptr) == BT_LF)
01235           ptr += MINBPC(enc);
01236         *nextTokPtr = ptr;
01237         return XML_TOK_DATA_NEWLINE;
01238       }
01239       *nextTokPtr = ptr;
01240       return XML_TOK_DATA_CHARS;
01241     case BT_S:
01242       if (ptr == start) {
01243         *nextTokPtr = ptr + MINBPC(enc);
01244         return XML_TOK_ATTRIBUTE_VALUE_S;
01245       }
01246       *nextTokPtr = ptr;
01247       return XML_TOK_DATA_CHARS;
01248     default:
01249       ptr += MINBPC(enc);
01250       break;
01251     }
01252   }
01253   *nextTokPtr = ptr;
01254   return XML_TOK_DATA_CHARS;
01255 }
01256 
01257 static int PTRCALL
01258 PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
01259                        const char *end, const char **nextTokPtr)
01260 {
01261   const char *start;
01262   if (ptr == end)
01263     return XML_TOK_NONE;
01264   start = ptr;
01265   while (ptr != end) {
01266     switch (BYTE_TYPE(enc, ptr)) {
01267 #define LEAD_CASE(n) \
01268     case BT_LEAD ## n: ptr += n; break;
01269     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
01270 #undef LEAD_CASE
01271     case BT_AMP:
01272       if (ptr == start)
01273         return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
01274       *nextTokPtr = ptr;
01275       return XML_TOK_DATA_CHARS;
01276     case BT_PERCNT:
01277       if (ptr == start) {
01278         int tok =  PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
01279                                        end, nextTokPtr);
01280         return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
01281       }
01282       *nextTokPtr = ptr;
01283       return XML_TOK_DATA_CHARS;
01284     case BT_LF:
01285       if (ptr == start) {
01286         *nextTokPtr = ptr + MINBPC(enc);
01287         return XML_TOK_DATA_NEWLINE;
01288       }
01289       *nextTokPtr = ptr;
01290       return XML_TOK_DATA_CHARS;
01291     case BT_CR:
01292       if (ptr == start) {
01293         ptr += MINBPC(enc);
01294         if (ptr == end)
01295           return XML_TOK_TRAILING_CR;
01296         if (BYTE_TYPE(enc, ptr) == BT_LF)
01297           ptr += MINBPC(enc);
01298         *nextTokPtr = ptr;
01299         return XML_TOK_DATA_NEWLINE;
01300       }
01301       *nextTokPtr = ptr;
01302       return XML_TOK_DATA_CHARS;
01303     default:
01304       ptr += MINBPC(enc);
01305       break;
01306     }
01307   }
01308   *nextTokPtr = ptr;
01309   return XML_TOK_DATA_CHARS;
01310 }
01311 
01312 #ifdef XML_DTD
01313 
01314 static int PTRCALL
01315 PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
01316                          const char *end, const char **nextTokPtr)
01317 {
01318   int level = 0;
01319   if (MINBPC(enc) > 1) {
01320     size_t n = end - ptr;
01321     if (n & (MINBPC(enc) - 1)) {
01322       n &= ~(MINBPC(enc) - 1);
01323       end = ptr + n;
01324     }
01325   }
01326   while (ptr != end) {
01327     switch (BYTE_TYPE(enc, ptr)) {
01328     INVALID_CASES(ptr, nextTokPtr)
01329     case BT_LT:
01330       if ((ptr += MINBPC(enc)) == end)
01331         return XML_TOK_PARTIAL;
01332       if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
01333         if ((ptr += MINBPC(enc)) == end)
01334           return XML_TOK_PARTIAL;
01335         if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
01336           ++level;
01337           ptr += MINBPC(enc);
01338         }
01339       }
01340       break;
01341     case BT_RSQB:
01342       if ((ptr += MINBPC(enc)) == end)
01343         return XML_TOK_PARTIAL;
01344       if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
01345         if ((ptr += MINBPC(enc)) == end)
01346           return XML_TOK_PARTIAL;
01347         if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
01348           ptr += MINBPC(enc);
01349           if (level == 0) {
01350             *nextTokPtr = ptr;
01351             return XML_TOK_IGNORE_SECT;
01352           }
01353           --level;
01354         }
01355       }
01356       break;
01357     default:
01358       ptr += MINBPC(enc);
01359       break;
01360     }
01361   }
01362   return XML_TOK_PARTIAL;
01363 }
01364 
01365 #endif /* XML_DTD */
01366 
01367 static int PTRCALL
01368 PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
01369                    const char **badPtr)
01370 {
01371   ptr += MINBPC(enc);
01372   end -= MINBPC(enc);
01373   for (; ptr != end; ptr += MINBPC(enc)) {
01374     switch (BYTE_TYPE(enc, ptr)) {
01375     case BT_DIGIT:
01376     case BT_HEX:
01377     case BT_MINUS:
01378     case BT_APOS:
01379     case BT_LPAR:
01380     case BT_RPAR:
01381     case BT_PLUS:
01382     case BT_COMMA:
01383     case BT_SOL:
01384     case BT_EQUALS:
01385     case BT_QUEST:
01386     case BT_CR:
01387     case BT_LF:
01388     case BT_SEMI:
01389     case BT_EXCL:
01390     case BT_AST:
01391     case BT_PERCNT:
01392     case BT_NUM:
01393 #ifdef XML_NS
01394     case BT_COLON:
01395 #endif
01396       break;
01397     case BT_S:
01398       if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
01399         *badPtr = ptr;
01400         return 0;
01401       }
01402       break;
01403     case BT_NAME:
01404     case BT_NMSTRT:
01405       if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
01406         break;
01407     default:
01408       switch (BYTE_TO_ASCII(enc, ptr)) {
01409       case 0x24: /* $ */
01410       case 0x40: /* @ */
01411         break;
01412       default:
01413         *badPtr = ptr;
01414         return 0;
01415       }
01416       break;
01417     }
01418   }
01419   return 1;
01420 }
01421 
01422 /* This must only be called for a well-formed start-tag or empty
01423    element tag.  Returns the number of attributes.  Pointers to the
01424    first attsMax attributes are stored in atts.
01425 */
01426 
01427 static int PTRCALL
01428 PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
01429                 int attsMax, ATTRIBUTE *atts)
01430 {
01431   enum { other, inName, inValue } state = inName;
01432   int nAtts = 0;
01433   int open = 0; /* defined when state == inValue;
01434                    initialization just to shut up compilers */
01435 
01436   for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
01437     switch (BYTE_TYPE(enc, ptr)) {
01438 #define START_NAME \
01439       if (state == other) { \
01440         if (nAtts < attsMax) { \
01441           atts[nAtts].name = ptr; \
01442           atts[nAtts].normalized = 1; \
01443         } \
01444         state = inName; \
01445       }
01446 #define LEAD_CASE(n) \
01447     case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
01448     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
01449 #undef LEAD_CASE
01450     case BT_NONASCII:
01451     case BT_NMSTRT:
01452     case BT_HEX:
01453       START_NAME
01454       break;
01455 #undef START_NAME
01456     case BT_QUOT:
01457       if (state != inValue) {
01458         if (nAtts < attsMax)
01459           atts[nAtts].valuePtr = ptr + MINBPC(enc);
01460         state = inValue;
01461         open = BT_QUOT;
01462       }
01463       else if (open == BT_QUOT) {
01464         state = other;
01465         if (nAtts < attsMax)
01466           atts[nAtts].valueEnd = ptr;
01467         nAtts++;
01468       }
01469       break;
01470     case BT_APOS:
01471       if (state != inValue) {
01472         if (nAtts < attsMax)
01473           atts[nAtts].valuePtr = ptr + MINBPC(enc);
01474         state = inValue;
01475         open = BT_APOS;
01476       }
01477       else if (open == BT_APOS) {
01478         state = other;
01479         if (nAtts < attsMax)
01480           atts[nAtts].valueEnd = ptr;
01481         nAtts++;
01482       }
01483       break;
01484     case BT_AMP:
01485       if (nAtts < attsMax)
01486         atts[nAtts].normalized = 0;
01487       break;
01488     case BT_S:
01489       if (state == inName)
01490         state = other;
01491       else if (state == inValue
01492                && nAtts < attsMax
01493                && atts[nAtts].normalized
01494                && (ptr == atts[nAtts].valuePtr
01495                    || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
01496                    || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
01497                    || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
01498         atts[nAtts].normalized = 0;
01499       break;
01500     case BT_CR: case BT_LF:
01501       /* This case ensures that the first attribute name is counted
01502          Apart from that we could just change state on the quote. */
01503       if (state == inName)
01504         state = other;
01505       else if (state == inValue && nAtts < attsMax)
01506         atts[nAtts].normalized = 0;
01507       break;
01508     case BT_GT:
01509     case BT_SOL:
01510       if (state != inValue)
01511         return nAtts;
01512       break;
01513     default:
01514       break;
01515     }
01516   }
01517   /* not reached */
01518 }
01519 
01520 static int PTRFASTCALL
01521 PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
01522 {
01523   int result = 0;
01524   /* skip &# */
01525   ptr += 2*MINBPC(enc);
01526   if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
01527     for (ptr += MINBPC(enc);
01528          !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
01529          ptr += MINBPC(enc)) {
01530       int c = BYTE_TO_ASCII(enc, ptr);
01531       switch (c) {
01532       case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
01533       case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
01534         result <<= 4;
01535         result |= (c - ASCII_0);
01536         break;
01537       case ASCII_A: case ASCII_B: case ASCII_C:
01538       case ASCII_D: case ASCII_E: case ASCII_F:
01539         result <<= 4;
01540         result += 10 + (c - ASCII_A);
01541         break;
01542       case ASCII_a: case ASCII_b: case ASCII_c:
01543       case ASCII_d: case ASCII_e: case ASCII_f:
01544         result <<= 4;
01545         result += 10 + (c - ASCII_a);
01546         break;
01547       }
01548       if (result >= 0x110000)
01549         return -1;
01550     }
01551   }
01552   else {
01553     for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
01554       int c = BYTE_TO_ASCII(enc, ptr);
01555       result *= 10;
01556       result += (c - ASCII_0);
01557       if (result >= 0x110000)
01558         return -1;
01559     }
01560   }
01561   return checkCharRefNumber(result);
01562 }
01563 
01564 static int PTRCALL
01565 PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
01566                              const char *end)
01567 {
01568   switch ((end - ptr)/MINBPC(enc)) {
01569   case 2:
01570     if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
01571       switch (BYTE_TO_ASCII(enc, ptr)) {
01572       case ASCII_l:
01573         return ASCII_LT;
01574       case ASCII_g:
01575         return ASCII_GT;
01576       }
01577     }
01578     break;
01579   case 3:
01580     if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
01581       ptr += MINBPC(enc);
01582       if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
01583         ptr += MINBPC(enc);
01584         if (CHAR_MATCHES(enc, ptr, ASCII_p))
01585           return ASCII_AMP;
01586       }
01587     }
01588     break;
01589   case 4:
01590     switch (BYTE_TO_ASCII(enc, ptr)) {
01591     case ASCII_q:
01592       ptr += MINBPC(enc);
01593       if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
01594         ptr += MINBPC(enc);
01595         if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
01596           ptr += MINBPC(enc);
01597           if (CHAR_MATCHES(enc, ptr, ASCII_t))
01598             return ASCII_QUOT;
01599         }
01600       }
01601       break;
01602     case ASCII_a:
01603       ptr += MINBPC(enc);
01604       if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
01605         ptr += MINBPC(enc);
01606         if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
01607           ptr += MINBPC(enc);
01608           if (CHAR_MATCHES(enc, ptr, ASCII_s))
01609             return ASCII_APOS;
01610         }
01611       }
01612       break;
01613     }
01614   }
01615   return 0;
01616 }
01617 
01618 static int PTRCALL
01619 PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
01620 {
01621   for (;;) {
01622     switch (BYTE_TYPE(enc, ptr1)) {
01623 #define LEAD_CASE(n) \
01624     case BT_LEAD ## n: \
01625       if (*ptr1++ != *ptr2++) \
01626         return 0;
01627     LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
01628 #undef LEAD_CASE
01629       /* fall through */
01630       if (*ptr1++ != *ptr2++)
01631         return 0;
01632       break;
01633     case BT_NONASCII:
01634     case BT_NMSTRT:
01635 #ifdef XML_NS
01636     case BT_COLON:
01637 #endif
01638     case BT_HEX:
01639     case BT_DIGIT:
01640     case BT_NAME:
01641     case BT_MINUS:
01642       if (*ptr2++ != *ptr1++)
01643         return 0;
01644       if (MINBPC(enc) > 1) {
01645         if (*ptr2++ != *ptr1++)
01646           return 0;
01647         if (MINBPC(enc) > 2) {
01648           if (*ptr2++ != *ptr1++)
01649             return 0;
01650           if (MINBPC(enc) > 3) {
01651             if (*ptr2++ != *ptr1++)
01652               return 0;
01653           }
01654         }
01655       }
01656       break;
01657     default:
01658       if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
01659         return 1;
01660       switch (BYTE_TYPE(enc, ptr2)) {
01661       case BT_LEAD2:
01662       case BT_LEAD3:
01663       case BT_LEAD4:
01664       case BT_NONASCII:
01665       case BT_NMSTRT:
01666 #ifdef XML_NS
01667       case BT_COLON:
01668 #endif
01669       case BT_HEX:
01670       case BT_DIGIT:
01671       case BT_NAME:
01672       case BT_MINUS:
01673         return 0;
01674       default:
01675         return 1;
01676       }
01677     }
01678   }
01679   /* not reached */
01680 }
01681 
01682 static int PTRCALL
01683 PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
01684                          const char *end1, const char *ptr2)
01685 {
01686   for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
01687     if (ptr1 == end1)
01688       return 0;
01689     if (!CHAR_MATCHES(enc, ptr1, *ptr2))
01690       return 0;
01691   }
01692   return ptr1 == end1;
01693 }
01694 
01695 static int PTRFASTCALL
01696 PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
01697 {
01698   const char *start = ptr;
01699   for (;;) {
01700     switch (BYTE_TYPE(enc, ptr)) {
01701 #define LEAD_CASE(n) \
01702     case BT_LEAD ## n: ptr += n; break;
01703     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
01704 #undef LEAD_CASE
01705     case BT_NONASCII:
01706     case BT_NMSTRT:
01707 #ifdef XML_NS
01708     case BT_COLON:
01709 #endif
01710     case BT_HEX:
01711     case BT_DIGIT:
01712     case BT_NAME:
01713     case BT_MINUS:
01714       ptr += MINBPC(enc);
01715       break;
01716     default:
01717       return (int)(ptr - start);
01718     }
01719   }
01720 }
01721 
01722 static const char * PTRFASTCALL
01723 PREFIX(skipS)(const ENCODING *enc, const char *ptr)
01724 {
01725   for (;;) {
01726     switch (BYTE_TYPE(enc, ptr)) {
01727     case BT_LF:
01728     case BT_CR:
01729     case BT_S:
01730       ptr += MINBPC(enc);
01731       break;
01732     default:
01733       return ptr;
01734     }
01735   }
01736 }
01737 
01738 static void PTRCALL
01739 PREFIX(updatePosition)(const ENCODING *enc,
01740                        const char *ptr,
01741                        const char *end,
01742                        POSITION *pos)
01743 {
01744   while (ptr != end) {
01745     switch (BYTE_TYPE(enc, ptr)) {
01746 #define LEAD_CASE(n) \
01747     case BT_LEAD ## n: \
01748       ptr += n; \
01749       break;
01750     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
01751 #undef LEAD_CASE
01752     case BT_LF:
01753       pos->columnNumber = (XML_Size)-1;
01754       pos->lineNumber++;
01755       ptr += MINBPC(enc);
01756       break;
01757     case BT_CR:
01758       pos->lineNumber++;
01759       ptr += MINBPC(enc);
01760       if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
01761         ptr += MINBPC(enc);
01762       pos->columnNumber = (XML_Size)-1;
01763       break;
01764     default:
01765       ptr += MINBPC(enc);
01766       break;
01767     }
01768     pos->columnNumber++;
01769   }
01770 }
01771 
01772 #undef DO_LEAD_CASE
01773 #undef MULTIBYTE_CASES
01774 #undef INVALID_CASES
01775 #undef CHECK_NAME_CASE
01776 #undef CHECK_NAME_CASES
01777 #undef CHECK_NMSTRT_CASE
01778 #undef CHECK_NMSTRT_CASES
01779 

Generated on Fri May 25 2012 04:32:04 for ReactOS by doxygen 1.7.6.1

ReactOS is a registered trademark or a trademark of ReactOS Foundation in the United States and other countries.