Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygenxmltok_impl.c
Go to the documentation of this file.
00001 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd 00002 See the file COPYING for copying permission. 00003 */ 00004 00005 #ifndef IS_INVALID_CHAR 00006 #define IS_INVALID_CHAR(enc, ptr, n) (0) 00007 #endif 00008 00009 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ 00010 case BT_LEAD ## n: \ 00011 if (end - ptr < n) \ 00012 return XML_TOK_PARTIAL_CHAR; \ 00013 if (IS_INVALID_CHAR(enc, ptr, n)) { \ 00014 *(nextTokPtr) = (ptr); \ 00015 return XML_TOK_INVALID; \ 00016 } \ 00017 ptr += n; \ 00018 break; 00019 00020 #define INVALID_CASES(ptr, nextTokPtr) \ 00021 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ 00022 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ 00023 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ 00024 case BT_NONXML: \ 00025 case BT_MALFORM: \ 00026 case BT_TRAIL: \ 00027 *(nextTokPtr) = (ptr); \ 00028 return XML_TOK_INVALID; 00029 00030 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ 00031 case BT_LEAD ## n: \ 00032 if (end - ptr < n) \ 00033 return XML_TOK_PARTIAL_CHAR; \ 00034 if (!IS_NAME_CHAR(enc, ptr, n)) { \ 00035 *nextTokPtr = ptr; \ 00036 return XML_TOK_INVALID; \ 00037 } \ 00038 ptr += n; \ 00039 break; 00040 00041 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ 00042 case BT_NONASCII: \ 00043 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ 00044 *nextTokPtr = ptr; \ 00045 return XML_TOK_INVALID; \ 00046 } \ 00047 case BT_NMSTRT: \ 00048 case BT_HEX: \ 00049 case BT_DIGIT: \ 00050 case BT_NAME: \ 00051 case BT_MINUS: \ 00052 ptr += MINBPC(enc); \ 00053 break; \ 00054 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ 00055 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ 00056 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) 00057 00058 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ 00059 case BT_LEAD ## n: \ 00060 if (end - ptr < n) \ 00061 return XML_TOK_PARTIAL_CHAR; \ 00062 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ 00063 *nextTokPtr = ptr; \ 00064 return XML_TOK_INVALID; \ 00065 } \ 00066 ptr += n; \ 00067 break; 00068 00069 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ 00070 case BT_NONASCII: \ 00071 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ 00072 *nextTokPtr = ptr; \ 00073 return XML_TOK_INVALID; \ 00074 } \ 00075 case BT_NMSTRT: \ 00076 case BT_HEX: \ 00077 ptr += MINBPC(enc); \ 00078 break; \ 00079 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ 00080 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ 00081 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) 00082 00083 #ifndef PREFIX 00084 #define PREFIX(ident) ident 00085 #endif 00086 00087 /* ptr points to character following "<!-" */ 00088 00089 static int PTRCALL 00090 PREFIX(scanComment)(const ENCODING *enc, const char *ptr, 00091 const char *end, const char **nextTokPtr) 00092 { 00093 if (ptr != end) { 00094 if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { 00095 *nextTokPtr = ptr; 00096 return XML_TOK_INVALID; 00097 } 00098 ptr += MINBPC(enc); 00099 while (ptr != end) { 00100 switch (BYTE_TYPE(enc, ptr)) { 00101 INVALID_CASES(ptr, nextTokPtr) 00102 case BT_MINUS: 00103 if ((ptr += MINBPC(enc)) == end) 00104 return XML_TOK_PARTIAL; 00105 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { 00106 if ((ptr += MINBPC(enc)) == end) 00107 return XML_TOK_PARTIAL; 00108 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 00109 *nextTokPtr = ptr; 00110 return XML_TOK_INVALID; 00111 } 00112 *nextTokPtr = ptr + MINBPC(enc); 00113 return XML_TOK_COMMENT; 00114 } 00115 break; 00116 default: 00117 ptr += MINBPC(enc); 00118 break; 00119 } 00120 } 00121 } 00122 return XML_TOK_PARTIAL; 00123 } 00124 00125 /* ptr points to character following "<!" */ 00126 00127 static int PTRCALL 00128 PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, 00129 const char *end, const char **nextTokPtr) 00130 { 00131 if (ptr == end) 00132 return XML_TOK_PARTIAL; 00133 switch (BYTE_TYPE(enc, ptr)) { 00134 case BT_MINUS: 00135 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); 00136 case BT_LSQB: 00137 *nextTokPtr = ptr + MINBPC(enc); 00138 return XML_TOK_COND_SECT_OPEN; 00139 case BT_NMSTRT: 00140 case BT_HEX: 00141 ptr += MINBPC(enc); 00142 break; 00143 default: 00144 *nextTokPtr = ptr; 00145 return XML_TOK_INVALID; 00146 } 00147 while (ptr != end) { 00148 switch (BYTE_TYPE(enc, ptr)) { 00149 case BT_PERCNT: 00150 if (ptr + MINBPC(enc) == end) 00151 return XML_TOK_PARTIAL; 00152 /* don't allow <!ENTITY% foo "whatever"> */ 00153 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { 00154 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: 00155 *nextTokPtr = ptr; 00156 return XML_TOK_INVALID; 00157 } 00158 /* fall through */ 00159 case BT_S: case BT_CR: case BT_LF: 00160 *nextTokPtr = ptr; 00161 return XML_TOK_DECL_OPEN; 00162 case BT_NMSTRT: 00163 case BT_HEX: 00164 ptr += MINBPC(enc); 00165 break; 00166 default: 00167 *nextTokPtr = ptr; 00168 return XML_TOK_INVALID; 00169 } 00170 } 00171 return XML_TOK_PARTIAL; 00172 } 00173 00174 static int PTRCALL 00175 PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, 00176 const char *end, int *tokPtr) 00177 { 00178 int upper = 0; 00179 *tokPtr = XML_TOK_PI; 00180 if (end - ptr != MINBPC(enc)*3) 00181 return 1; 00182 switch (BYTE_TO_ASCII(enc, ptr)) { 00183 case ASCII_x: 00184 break; 00185 case ASCII_X: 00186 upper = 1; 00187 break; 00188 default: 00189 return 1; 00190 } 00191 ptr += MINBPC(enc); 00192 switch (BYTE_TO_ASCII(enc, ptr)) { 00193 case ASCII_m: 00194 break; 00195 case ASCII_M: 00196 upper = 1; 00197 break; 00198 default: 00199 return 1; 00200 } 00201 ptr += MINBPC(enc); 00202 switch (BYTE_TO_ASCII(enc, ptr)) { 00203 case ASCII_l: 00204 break; 00205 case ASCII_L: 00206 upper = 1; 00207 break; 00208 default: 00209 return 1; 00210 } 00211 if (upper) 00212 return 0; 00213 *tokPtr = XML_TOK_XML_DECL; 00214 return 1; 00215 } 00216 00217 /* ptr points to character following "<?" */ 00218 00219 static int PTRCALL 00220 PREFIX(scanPi)(const ENCODING *enc, const char *ptr, 00221 const char *end, const char **nextTokPtr) 00222 { 00223 int tok; 00224 const char *target = ptr; 00225 if (ptr == end) 00226 return XML_TOK_PARTIAL; 00227 switch (BYTE_TYPE(enc, ptr)) { 00228 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 00229 default: 00230 *nextTokPtr = ptr; 00231 return XML_TOK_INVALID; 00232 } 00233 while (ptr != end) { 00234 switch (BYTE_TYPE(enc, ptr)) { 00235 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 00236 case BT_S: case BT_CR: case BT_LF: 00237 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { 00238 *nextTokPtr = ptr; 00239 return XML_TOK_INVALID; 00240 } 00241 ptr += MINBPC(enc); 00242 while (ptr != end) { 00243 switch (BYTE_TYPE(enc, ptr)) { 00244 INVALID_CASES(ptr, nextTokPtr) 00245 case BT_QUEST: 00246 ptr += MINBPC(enc); 00247 if (ptr == end) 00248 return XML_TOK_PARTIAL; 00249 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 00250 *nextTokPtr = ptr + MINBPC(enc); 00251 return tok; 00252 } 00253 break; 00254 default: 00255 ptr += MINBPC(enc); 00256 break; 00257 } 00258 } 00259 return XML_TOK_PARTIAL; 00260 case BT_QUEST: 00261 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { 00262 *nextTokPtr = ptr; 00263 return XML_TOK_INVALID; 00264 } 00265 ptr += MINBPC(enc); 00266 if (ptr == end) 00267 return XML_TOK_PARTIAL; 00268 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 00269 *nextTokPtr = ptr + MINBPC(enc); 00270 return tok; 00271 } 00272 /* fall through */ 00273 default: 00274 *nextTokPtr = ptr; 00275 return XML_TOK_INVALID; 00276 } 00277 } 00278 return XML_TOK_PARTIAL; 00279 } 00280 00281 static int PTRCALL 00282 PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, 00283 const char *end, const char **nextTokPtr) 00284 { 00285 static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, 00286 ASCII_T, ASCII_A, ASCII_LSQB }; 00287 int i; 00288 /* CDATA[ */ 00289 if (end - ptr < 6 * MINBPC(enc)) 00290 return XML_TOK_PARTIAL; 00291 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { 00292 if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { 00293 *nextTokPtr = ptr; 00294 return XML_TOK_INVALID; 00295 } 00296 } 00297 *nextTokPtr = ptr; 00298 return XML_TOK_CDATA_SECT_OPEN; 00299 } 00300 00301 static int PTRCALL 00302 PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, 00303 const char *end, const char **nextTokPtr) 00304 { 00305 if (ptr == end) 00306 return XML_TOK_NONE; 00307 if (MINBPC(enc) > 1) { 00308 size_t n = end - ptr; 00309 if (n & (MINBPC(enc) - 1)) { 00310 n &= ~(MINBPC(enc) - 1); 00311 if (n == 0) 00312 return XML_TOK_PARTIAL; 00313 end = ptr + n; 00314 } 00315 } 00316 switch (BYTE_TYPE(enc, ptr)) { 00317 case BT_RSQB: 00318 ptr += MINBPC(enc); 00319 if (ptr == end) 00320 return XML_TOK_PARTIAL; 00321 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 00322 break; 00323 ptr += MINBPC(enc); 00324 if (ptr == end) 00325 return XML_TOK_PARTIAL; 00326 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 00327 ptr -= MINBPC(enc); 00328 break; 00329 } 00330 *nextTokPtr = ptr + MINBPC(enc); 00331 return XML_TOK_CDATA_SECT_CLOSE; 00332 case BT_CR: 00333 ptr += MINBPC(enc); 00334 if (ptr == end) 00335 return XML_TOK_PARTIAL; 00336 if (BYTE_TYPE(enc, ptr) == BT_LF) 00337 ptr += MINBPC(enc); 00338 *nextTokPtr = ptr; 00339 return XML_TOK_DATA_NEWLINE; 00340 case BT_LF: 00341 *nextTokPtr = ptr + MINBPC(enc); 00342 return XML_TOK_DATA_NEWLINE; 00343 INVALID_CASES(ptr, nextTokPtr) 00344 default: 00345 ptr += MINBPC(enc); 00346 break; 00347 } 00348 while (ptr != end) { 00349 switch (BYTE_TYPE(enc, ptr)) { 00350 #define LEAD_CASE(n) \ 00351 case BT_LEAD ## n: \ 00352 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ 00353 *nextTokPtr = ptr; \ 00354 return XML_TOK_DATA_CHARS; \ 00355 } \ 00356 ptr += n; \ 00357 break; 00358 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 00359 #undef LEAD_CASE 00360 case BT_NONXML: 00361 case BT_MALFORM: 00362 case BT_TRAIL: 00363 case BT_CR: 00364 case BT_LF: 00365 case BT_RSQB: 00366 *nextTokPtr = ptr; 00367 return XML_TOK_DATA_CHARS; 00368 default: 00369 ptr += MINBPC(enc); 00370 break; 00371 } 00372 } 00373 *nextTokPtr = ptr; 00374 return XML_TOK_DATA_CHARS; 00375 } 00376 00377 /* ptr points to character following "</" */ 00378 00379 static int PTRCALL 00380 PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, 00381 const char *end, const char **nextTokPtr) 00382 { 00383 if (ptr == end) 00384 return XML_TOK_PARTIAL; 00385 switch (BYTE_TYPE(enc, ptr)) { 00386 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 00387 default: 00388 *nextTokPtr = ptr; 00389 return XML_TOK_INVALID; 00390 } 00391 while (ptr != end) { 00392 switch (BYTE_TYPE(enc, ptr)) { 00393 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 00394 case BT_S: case BT_CR: case BT_LF: 00395 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { 00396 switch (BYTE_TYPE(enc, ptr)) { 00397 case BT_S: case BT_CR: case BT_LF: 00398 break; 00399 case BT_GT: 00400 *nextTokPtr = ptr + MINBPC(enc); 00401 return XML_TOK_END_TAG; 00402 default: 00403 *nextTokPtr = ptr; 00404 return XML_TOK_INVALID; 00405 } 00406 } 00407 return XML_TOK_PARTIAL; 00408 #ifdef XML_NS 00409 case BT_COLON: 00410 /* no need to check qname syntax here, 00411 since end-tag must match exactly */ 00412 ptr += MINBPC(enc); 00413 break; 00414 #endif 00415 case BT_GT: 00416 *nextTokPtr = ptr + MINBPC(enc); 00417 return XML_TOK_END_TAG; 00418 default: 00419 *nextTokPtr = ptr; 00420 return XML_TOK_INVALID; 00421 } 00422 } 00423 return XML_TOK_PARTIAL; 00424 } 00425 00426 /* ptr points to character following "&#X" */ 00427 00428 static int PTRCALL 00429 PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, 00430 const char *end, const char **nextTokPtr) 00431 { 00432 if (ptr != end) { 00433 switch (BYTE_TYPE(enc, ptr)) { 00434 case BT_DIGIT: 00435 case BT_HEX: 00436 break; 00437 default: 00438 *nextTokPtr = ptr; 00439 return XML_TOK_INVALID; 00440 } 00441 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { 00442 switch (BYTE_TYPE(enc, ptr)) { 00443 case BT_DIGIT: 00444 case BT_HEX: 00445 break; 00446 case BT_SEMI: 00447 *nextTokPtr = ptr + MINBPC(enc); 00448 return XML_TOK_CHAR_REF; 00449 default: 00450 *nextTokPtr = ptr; 00451 return XML_TOK_INVALID; 00452 } 00453 } 00454 } 00455 return XML_TOK_PARTIAL; 00456 } 00457 00458 /* ptr points to character following "&#" */ 00459 00460 static int PTRCALL 00461 PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, 00462 const char *end, const char **nextTokPtr) 00463 { 00464 if (ptr != end) { 00465 if (CHAR_MATCHES(enc, ptr, ASCII_x)) 00466 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 00467 switch (BYTE_TYPE(enc, ptr)) { 00468 case BT_DIGIT: 00469 break; 00470 default: 00471 *nextTokPtr = ptr; 00472 return XML_TOK_INVALID; 00473 } 00474 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { 00475 switch (BYTE_TYPE(enc, ptr)) { 00476 case BT_DIGIT: 00477 break; 00478 case BT_SEMI: 00479 *nextTokPtr = ptr + MINBPC(enc); 00480 return XML_TOK_CHAR_REF; 00481 default: 00482 *nextTokPtr = ptr; 00483 return XML_TOK_INVALID; 00484 } 00485 } 00486 } 00487 return XML_TOK_PARTIAL; 00488 } 00489 00490 /* ptr points to character following "&" */ 00491 00492 static int PTRCALL 00493 PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, 00494 const char **nextTokPtr) 00495 { 00496 if (ptr == end) 00497 return XML_TOK_PARTIAL; 00498 switch (BYTE_TYPE(enc, ptr)) { 00499 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 00500 case BT_NUM: 00501 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 00502 default: 00503 *nextTokPtr = ptr; 00504 return XML_TOK_INVALID; 00505 } 00506 while (ptr != end) { 00507 switch (BYTE_TYPE(enc, ptr)) { 00508 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 00509 case BT_SEMI: 00510 *nextTokPtr = ptr + MINBPC(enc); 00511 return XML_TOK_ENTITY_REF; 00512 default: 00513 *nextTokPtr = ptr; 00514 return XML_TOK_INVALID; 00515 } 00516 } 00517 return XML_TOK_PARTIAL; 00518 } 00519 00520 /* ptr points to character following first character of attribute name */ 00521 00522 static int PTRCALL 00523 PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, 00524 const char **nextTokPtr) 00525 { 00526 #ifdef XML_NS 00527 int hadColon = 0; 00528 #endif 00529 while (ptr != end) { 00530 switch (BYTE_TYPE(enc, ptr)) { 00531 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 00532 #ifdef XML_NS 00533 case BT_COLON: 00534 if (hadColon) { 00535 *nextTokPtr = ptr; 00536 return XML_TOK_INVALID; 00537 } 00538 hadColon = 1; 00539 ptr += MINBPC(enc); 00540 if (ptr == end) 00541 return XML_TOK_PARTIAL; 00542 switch (BYTE_TYPE(enc, ptr)) { 00543 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 00544 default: 00545 *nextTokPtr = ptr; 00546 return XML_TOK_INVALID; 00547 } 00548 break; 00549 #endif 00550 case BT_S: case BT_CR: case BT_LF: 00551 for (;;) { 00552 int t; 00553 00554 ptr += MINBPC(enc); 00555 if (ptr == end) 00556 return XML_TOK_PARTIAL; 00557 t = BYTE_TYPE(enc, ptr); 00558 if (t == BT_EQUALS) 00559 break; 00560 switch (t) { 00561 case BT_S: 00562 case BT_LF: 00563 case BT_CR: 00564 break; 00565 default: 00566 *nextTokPtr = ptr; 00567 return XML_TOK_INVALID; 00568 } 00569 } 00570 /* fall through */ 00571 case BT_EQUALS: 00572 { 00573 int open; 00574 #ifdef XML_NS 00575 hadColon = 0; 00576 #endif 00577 for (;;) { 00578 ptr += MINBPC(enc); 00579 if (ptr == end) 00580 return XML_TOK_PARTIAL; 00581 open = BYTE_TYPE(enc, ptr); 00582 if (open == BT_QUOT || open == BT_APOS) 00583 break; 00584 switch (open) { 00585 case BT_S: 00586 case BT_LF: 00587 case BT_CR: 00588 break; 00589 default: 00590 *nextTokPtr = ptr; 00591 return XML_TOK_INVALID; 00592 } 00593 } 00594 ptr += MINBPC(enc); 00595 /* in attribute value */ 00596 for (;;) { 00597 int t; 00598 if (ptr == end) 00599 return XML_TOK_PARTIAL; 00600 t = BYTE_TYPE(enc, ptr); 00601 if (t == open) 00602 break; 00603 switch (t) { 00604 INVALID_CASES(ptr, nextTokPtr) 00605 case BT_AMP: 00606 { 00607 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); 00608 if (tok <= 0) { 00609 if (tok == XML_TOK_INVALID) 00610 *nextTokPtr = ptr; 00611 return tok; 00612 } 00613 break; 00614 } 00615 case BT_LT: 00616 *nextTokPtr = ptr; 00617 return XML_TOK_INVALID; 00618 default: 00619 ptr += MINBPC(enc); 00620 break; 00621 } 00622 } 00623 ptr += MINBPC(enc); 00624 if (ptr == end) 00625 return XML_TOK_PARTIAL; 00626 switch (BYTE_TYPE(enc, ptr)) { 00627 case BT_S: 00628 case BT_CR: 00629 case BT_LF: 00630 break; 00631 case BT_SOL: 00632 goto sol; 00633 case BT_GT: 00634 goto gt; 00635 default: 00636 *nextTokPtr = ptr; 00637 return XML_TOK_INVALID; 00638 } 00639 /* ptr points to closing quote */ 00640 for (;;) { 00641 ptr += MINBPC(enc); 00642 if (ptr == end) 00643 return XML_TOK_PARTIAL; 00644 switch (BYTE_TYPE(enc, ptr)) { 00645 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 00646 case BT_S: case BT_CR: case BT_LF: 00647 continue; 00648 case BT_GT: 00649 gt: 00650 *nextTokPtr = ptr + MINBPC(enc); 00651 return XML_TOK_START_TAG_WITH_ATTS; 00652 case BT_SOL: 00653 sol: 00654 ptr += MINBPC(enc); 00655 if (ptr == end) 00656 return XML_TOK_PARTIAL; 00657 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 00658 *nextTokPtr = ptr; 00659 return XML_TOK_INVALID; 00660 } 00661 *nextTokPtr = ptr + MINBPC(enc); 00662 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; 00663 default: 00664 *nextTokPtr = ptr; 00665 return XML_TOK_INVALID; 00666 } 00667 break; 00668 } 00669 break; 00670 } 00671 default: 00672 *nextTokPtr = ptr; 00673 return XML_TOK_INVALID; 00674 } 00675 } 00676 return XML_TOK_PARTIAL; 00677 } 00678 00679 /* ptr points to character following "<" */ 00680 00681 static int PTRCALL 00682 PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, 00683 const char **nextTokPtr) 00684 { 00685 #ifdef XML_NS 00686 int hadColon; 00687 #endif 00688 if (ptr == end) 00689 return XML_TOK_PARTIAL; 00690 switch (BYTE_TYPE(enc, ptr)) { 00691 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 00692 case BT_EXCL: 00693 if ((ptr += MINBPC(enc)) == end) 00694 return XML_TOK_PARTIAL; 00695 switch (BYTE_TYPE(enc, ptr)) { 00696 case BT_MINUS: 00697 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); 00698 case BT_LSQB: 00699 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), 00700 end, nextTokPtr); 00701 } 00702 *nextTokPtr = ptr; 00703 return XML_TOK_INVALID; 00704 case BT_QUEST: 00705 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); 00706 case BT_SOL: 00707 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); 00708 default: 00709 *nextTokPtr = ptr; 00710 return XML_TOK_INVALID; 00711 } 00712 #ifdef XML_NS 00713 hadColon = 0; 00714 #endif 00715 /* we have a start-tag */ 00716 while (ptr != end) { 00717 switch (BYTE_TYPE(enc, ptr)) { 00718 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 00719 #ifdef XML_NS 00720 case BT_COLON: 00721 if (hadColon) { 00722 *nextTokPtr = ptr; 00723 return XML_TOK_INVALID; 00724 } 00725 hadColon = 1; 00726 ptr += MINBPC(enc); 00727 if (ptr == end) 00728 return XML_TOK_PARTIAL; 00729 switch (BYTE_TYPE(enc, ptr)) { 00730 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 00731 default: 00732 *nextTokPtr = ptr; 00733 return XML_TOK_INVALID; 00734 } 00735 break; 00736 #endif 00737 case BT_S: case BT_CR: case BT_LF: 00738 { 00739 ptr += MINBPC(enc); 00740 while (ptr != end) { 00741 switch (BYTE_TYPE(enc, ptr)) { 00742 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 00743 case BT_GT: 00744 goto gt; 00745 case BT_SOL: 00746 goto sol; 00747 case BT_S: case BT_CR: case BT_LF: 00748 ptr += MINBPC(enc); 00749 continue; 00750 default: 00751 *nextTokPtr = ptr; 00752 return XML_TOK_INVALID; 00753 } 00754 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); 00755 } 00756 return XML_TOK_PARTIAL; 00757 } 00758 case BT_GT: 00759 gt: 00760 *nextTokPtr = ptr + MINBPC(enc); 00761 return XML_TOK_START_TAG_NO_ATTS; 00762 case BT_SOL: 00763 sol: 00764 ptr += MINBPC(enc); 00765 if (ptr == end) 00766 return XML_TOK_PARTIAL; 00767 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 00768 *nextTokPtr = ptr; 00769 return XML_TOK_INVALID; 00770 } 00771 *nextTokPtr = ptr + MINBPC(enc); 00772 return XML_TOK_EMPTY_ELEMENT_NO_ATTS; 00773 default: 00774 *nextTokPtr = ptr; 00775 return XML_TOK_INVALID; 00776 } 00777 } 00778 return XML_TOK_PARTIAL; 00779 } 00780 00781 static int PTRCALL 00782 PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, 00783 const char **nextTokPtr) 00784 { 00785 if (ptr == end) 00786 return XML_TOK_NONE; 00787 if (MINBPC(enc) > 1) { 00788 size_t n = end - ptr; 00789 if (n & (MINBPC(enc) - 1)) { 00790 n &= ~(MINBPC(enc) - 1); 00791 if (n == 0) 00792 return XML_TOK_PARTIAL; 00793 end = ptr + n; 00794 } 00795 } 00796 switch (BYTE_TYPE(enc, ptr)) { 00797 case BT_LT: 00798 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); 00799 case BT_AMP: 00800 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 00801 case BT_CR: 00802 ptr += MINBPC(enc); 00803 if (ptr == end) 00804 return XML_TOK_TRAILING_CR; 00805 if (BYTE_TYPE(enc, ptr) == BT_LF) 00806 ptr += MINBPC(enc); 00807 *nextTokPtr = ptr; 00808 return XML_TOK_DATA_NEWLINE; 00809 case BT_LF: 00810 *nextTokPtr = ptr + MINBPC(enc); 00811 return XML_TOK_DATA_NEWLINE; 00812 case BT_RSQB: 00813 ptr += MINBPC(enc); 00814 if (ptr == end) 00815 return XML_TOK_TRAILING_RSQB; 00816 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 00817 break; 00818 ptr += MINBPC(enc); 00819 if (ptr == end) 00820 return XML_TOK_TRAILING_RSQB; 00821 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 00822 ptr -= MINBPC(enc); 00823 break; 00824 } 00825 *nextTokPtr = ptr; 00826 return XML_TOK_INVALID; 00827 INVALID_CASES(ptr, nextTokPtr) 00828 default: 00829 ptr += MINBPC(enc); 00830 break; 00831 } 00832 while (ptr != end) { 00833 switch (BYTE_TYPE(enc, ptr)) { 00834 #define LEAD_CASE(n) \ 00835 case BT_LEAD ## n: \ 00836 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ 00837 *nextTokPtr = ptr; \ 00838 return XML_TOK_DATA_CHARS; \ 00839 } \ 00840 ptr += n; \ 00841 break; 00842 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 00843 #undef LEAD_CASE 00844 case BT_RSQB: 00845 if (ptr + MINBPC(enc) != end) { 00846 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { 00847 ptr += MINBPC(enc); 00848 break; 00849 } 00850 if (ptr + 2*MINBPC(enc) != end) { 00851 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { 00852 ptr += MINBPC(enc); 00853 break; 00854 } 00855 *nextTokPtr = ptr + 2*MINBPC(enc); 00856 return XML_TOK_INVALID; 00857 } 00858 } 00859 /* fall through */ 00860 case BT_AMP: 00861 case BT_LT: 00862 case BT_NONXML: 00863 case BT_MALFORM: 00864 case BT_TRAIL: 00865 case BT_CR: 00866 case BT_LF: 00867 *nextTokPtr = ptr; 00868 return XML_TOK_DATA_CHARS; 00869 default: 00870 ptr += MINBPC(enc); 00871 break; 00872 } 00873 } 00874 *nextTokPtr = ptr; 00875 return XML_TOK_DATA_CHARS; 00876 } 00877 00878 /* ptr points to character following "%" */ 00879 00880 static int PTRCALL 00881 PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, 00882 const char **nextTokPtr) 00883 { 00884 if (ptr == end) 00885 return -XML_TOK_PERCENT; 00886 switch (BYTE_TYPE(enc, ptr)) { 00887 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 00888 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: 00889 *nextTokPtr = ptr; 00890 return XML_TOK_PERCENT; 00891 default: 00892 *nextTokPtr = ptr; 00893 return XML_TOK_INVALID; 00894 } 00895 while (ptr != end) { 00896 switch (BYTE_TYPE(enc, ptr)) { 00897 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 00898 case BT_SEMI: 00899 *nextTokPtr = ptr + MINBPC(enc); 00900 return XML_TOK_PARAM_ENTITY_REF; 00901 default: 00902 *nextTokPtr = ptr; 00903 return XML_TOK_INVALID; 00904 } 00905 } 00906 return XML_TOK_PARTIAL; 00907 } 00908 00909 static int PTRCALL 00910 PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, 00911 const char **nextTokPtr) 00912 { 00913 if (ptr == end) 00914 return XML_TOK_PARTIAL; 00915 switch (BYTE_TYPE(enc, ptr)) { 00916 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 00917 default: 00918 *nextTokPtr = ptr; 00919 return XML_TOK_INVALID; 00920 } 00921 while (ptr != end) { 00922 switch (BYTE_TYPE(enc, ptr)) { 00923 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 00924 case BT_CR: case BT_LF: case BT_S: 00925 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: 00926 *nextTokPtr = ptr; 00927 return XML_TOK_POUND_NAME; 00928 default: 00929 *nextTokPtr = ptr; 00930 return XML_TOK_INVALID; 00931 } 00932 } 00933 return -XML_TOK_POUND_NAME; 00934 } 00935 00936 static int PTRCALL 00937 PREFIX(scanLit)(int open, const ENCODING *enc, 00938 const char *ptr, const char *end, 00939 const char **nextTokPtr) 00940 { 00941 while (ptr != end) { 00942 int t = BYTE_TYPE(enc, ptr); 00943 switch (t) { 00944 INVALID_CASES(ptr, nextTokPtr) 00945 case BT_QUOT: 00946 case BT_APOS: 00947 ptr += MINBPC(enc); 00948 if (t != open) 00949 break; 00950 if (ptr == end) 00951 return -XML_TOK_LITERAL; 00952 *nextTokPtr = ptr; 00953 switch (BYTE_TYPE(enc, ptr)) { 00954 case BT_S: case BT_CR: case BT_LF: 00955 case BT_GT: case BT_PERCNT: case BT_LSQB: 00956 return XML_TOK_LITERAL; 00957 default: 00958 return XML_TOK_INVALID; 00959 } 00960 default: 00961 ptr += MINBPC(enc); 00962 break; 00963 } 00964 } 00965 return XML_TOK_PARTIAL; 00966 } 00967 00968 static int PTRCALL 00969 PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, 00970 const char **nextTokPtr) 00971 { 00972 int tok; 00973 if (ptr == end) 00974 return XML_TOK_NONE; 00975 if (MINBPC(enc) > 1) { 00976 size_t n = end - ptr; 00977 if (n & (MINBPC(enc) - 1)) { 00978 n &= ~(MINBPC(enc) - 1); 00979 if (n == 0) 00980 return XML_TOK_PARTIAL; 00981 end = ptr + n; 00982 } 00983 } 00984 switch (BYTE_TYPE(enc, ptr)) { 00985 case BT_QUOT: 00986 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); 00987 case BT_APOS: 00988 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); 00989 case BT_LT: 00990 { 00991 ptr += MINBPC(enc); 00992 if (ptr == end) 00993 return XML_TOK_PARTIAL; 00994 switch (BYTE_TYPE(enc, ptr)) { 00995 case BT_EXCL: 00996 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); 00997 case BT_QUEST: 00998 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); 00999 case BT_NMSTRT: 01000 case BT_HEX: 01001 case BT_NONASCII: 01002 case BT_LEAD2: 01003 case BT_LEAD3: 01004 case BT_LEAD4: 01005 *nextTokPtr = ptr - MINBPC(enc); 01006 return XML_TOK_INSTANCE_START; 01007 } 01008 *nextTokPtr = ptr; 01009 return XML_TOK_INVALID; 01010 } 01011 case BT_CR: 01012 if (ptr + MINBPC(enc) == end) { 01013 *nextTokPtr = end; 01014 /* indicate that this might be part of a CR/LF pair */ 01015 return -XML_TOK_PROLOG_S; 01016 } 01017 /* fall through */ 01018 case BT_S: case BT_LF: 01019 for (;;) { 01020 ptr += MINBPC(enc); 01021 if (ptr == end) 01022 break; 01023 switch (BYTE_TYPE(enc, ptr)) { 01024 case BT_S: case BT_LF: 01025 break; 01026 case BT_CR: 01027 /* don't split CR/LF pair */ 01028 if (ptr + MINBPC(enc) != end) 01029 break; 01030 /* fall through */ 01031 default: 01032 *nextTokPtr = ptr; 01033 return XML_TOK_PROLOG_S; 01034 } 01035 } 01036 *nextTokPtr = ptr; 01037 return XML_TOK_PROLOG_S; 01038 case BT_PERCNT: 01039 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); 01040 case BT_COMMA: 01041 *nextTokPtr = ptr + MINBPC(enc); 01042 return XML_TOK_COMMA; 01043 case BT_LSQB: 01044 *nextTokPtr = ptr + MINBPC(enc); 01045 return XML_TOK_OPEN_BRACKET; 01046 case BT_RSQB: 01047 ptr += MINBPC(enc); 01048 if (ptr == end) 01049 return -XML_TOK_CLOSE_BRACKET; 01050 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { 01051 if (ptr + MINBPC(enc) == end) 01052 return XML_TOK_PARTIAL; 01053 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { 01054 *nextTokPtr = ptr + 2*MINBPC(enc); 01055 return XML_TOK_COND_SECT_CLOSE; 01056 } 01057 } 01058 *nextTokPtr = ptr; 01059 return XML_TOK_CLOSE_BRACKET; 01060 case BT_LPAR: 01061 *nextTokPtr = ptr + MINBPC(enc); 01062 return XML_TOK_OPEN_PAREN; 01063 case BT_RPAR: 01064 ptr += MINBPC(enc); 01065 if (ptr == end) 01066 return -XML_TOK_CLOSE_PAREN; 01067 switch (BYTE_TYPE(enc, ptr)) { 01068 case BT_AST: 01069 *nextTokPtr = ptr + MINBPC(enc); 01070 return XML_TOK_CLOSE_PAREN_ASTERISK; 01071 case BT_QUEST: 01072 *nextTokPtr = ptr + MINBPC(enc); 01073 return XML_TOK_CLOSE_PAREN_QUESTION; 01074 case BT_PLUS: 01075 *nextTokPtr = ptr + MINBPC(enc); 01076 return XML_TOK_CLOSE_PAREN_PLUS; 01077 case BT_CR: case BT_LF: case BT_S: 01078 case BT_GT: case BT_COMMA: case BT_VERBAR: 01079 case BT_RPAR: 01080 *nextTokPtr = ptr; 01081 return XML_TOK_CLOSE_PAREN; 01082 } 01083 *nextTokPtr = ptr; 01084 return XML_TOK_INVALID; 01085 case BT_VERBAR: 01086 *nextTokPtr = ptr + MINBPC(enc); 01087 return XML_TOK_OR; 01088 case BT_GT: 01089 *nextTokPtr = ptr + MINBPC(enc); 01090 return XML_TOK_DECL_CLOSE; 01091 case BT_NUM: 01092 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); 01093 #define LEAD_CASE(n) \ 01094 case BT_LEAD ## n: \ 01095 if (end - ptr < n) \ 01096 return XML_TOK_PARTIAL_CHAR; \ 01097 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ 01098 ptr += n; \ 01099 tok = XML_TOK_NAME; \ 01100 break; \ 01101 } \ 01102 if (IS_NAME_CHAR(enc, ptr, n)) { \ 01103 ptr += n; \ 01104 tok = XML_TOK_NMTOKEN; \ 01105 break; \ 01106 } \ 01107 *nextTokPtr = ptr; \ 01108 return XML_TOK_INVALID; 01109 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 01110 #undef LEAD_CASE 01111 case BT_NMSTRT: 01112 case BT_HEX: 01113 tok = XML_TOK_NAME; 01114 ptr += MINBPC(enc); 01115 break; 01116 case BT_DIGIT: 01117 case BT_NAME: 01118 case BT_MINUS: 01119 #ifdef XML_NS 01120 case BT_COLON: 01121 #endif 01122 tok = XML_TOK_NMTOKEN; 01123 ptr += MINBPC(enc); 01124 break; 01125 case BT_NONASCII: 01126 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { 01127 ptr += MINBPC(enc); 01128 tok = XML_TOK_NAME; 01129 break; 01130 } 01131 if (IS_NAME_CHAR_MINBPC(enc, ptr)) { 01132 ptr += MINBPC(enc); 01133 tok = XML_TOK_NMTOKEN; 01134 break; 01135 } 01136 /* fall through */ 01137 default: 01138 *nextTokPtr = ptr; 01139 return XML_TOK_INVALID; 01140 } 01141 while (ptr != end) { 01142 switch (BYTE_TYPE(enc, ptr)) { 01143 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 01144 case BT_GT: case BT_RPAR: case BT_COMMA: 01145 case BT_VERBAR: case BT_LSQB: case BT_PERCNT: 01146 case BT_S: case BT_CR: case BT_LF: 01147 *nextTokPtr = ptr; 01148 return tok; 01149 #ifdef XML_NS 01150 case BT_COLON: 01151 ptr += MINBPC(enc); 01152 switch (tok) { 01153 case XML_TOK_NAME: 01154 if (ptr == end) 01155 return XML_TOK_PARTIAL; 01156 tok = XML_TOK_PREFIXED_NAME; 01157 switch (BYTE_TYPE(enc, ptr)) { 01158 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 01159 default: 01160 tok = XML_TOK_NMTOKEN; 01161 break; 01162 } 01163 break; 01164 case XML_TOK_PREFIXED_NAME: 01165 tok = XML_TOK_NMTOKEN; 01166 break; 01167 } 01168 break; 01169 #endif 01170 case BT_PLUS: 01171 if (tok == XML_TOK_NMTOKEN) { 01172 *nextTokPtr = ptr; 01173 return XML_TOK_INVALID; 01174 } 01175 *nextTokPtr = ptr + MINBPC(enc); 01176 return XML_TOK_NAME_PLUS; 01177 case BT_AST: 01178 if (tok == XML_TOK_NMTOKEN) { 01179 *nextTokPtr = ptr; 01180 return XML_TOK_INVALID; 01181 } 01182 *nextTokPtr = ptr + MINBPC(enc); 01183 return XML_TOK_NAME_ASTERISK; 01184 case BT_QUEST: 01185 if (tok == XML_TOK_NMTOKEN) { 01186 *nextTokPtr = ptr; 01187 return XML_TOK_INVALID; 01188 } 01189 *nextTokPtr = ptr + MINBPC(enc); 01190 return XML_TOK_NAME_QUESTION; 01191 default: 01192 *nextTokPtr = ptr; 01193 return XML_TOK_INVALID; 01194 } 01195 } 01196 return -tok; 01197 } 01198 01199 static int PTRCALL 01200 PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, 01201 const char *end, const char **nextTokPtr) 01202 { 01203 const char *start; 01204 if (ptr == end) 01205 return XML_TOK_NONE; 01206 start = ptr; 01207 while (ptr != end) { 01208 switch (BYTE_TYPE(enc, ptr)) { 01209 #define LEAD_CASE(n) \ 01210 case BT_LEAD ## n: ptr += n; break; 01211 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 01212 #undef LEAD_CASE 01213 case BT_AMP: 01214 if (ptr == start) 01215 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 01216 *nextTokPtr = ptr; 01217 return XML_TOK_DATA_CHARS; 01218 case BT_LT: 01219 /* this is for inside entity references */ 01220 *nextTokPtr = ptr; 01221 return XML_TOK_INVALID; 01222 case BT_LF: 01223 if (ptr == start) { 01224 *nextTokPtr = ptr + MINBPC(enc); 01225 return XML_TOK_DATA_NEWLINE; 01226 } 01227 *nextTokPtr = ptr; 01228 return XML_TOK_DATA_CHARS; 01229 case BT_CR: 01230 if (ptr == start) { 01231 ptr += MINBPC(enc); 01232 if (ptr == end) 01233 return XML_TOK_TRAILING_CR; 01234 if (BYTE_TYPE(enc, ptr) == BT_LF) 01235 ptr += MINBPC(enc); 01236 *nextTokPtr = ptr; 01237 return XML_TOK_DATA_NEWLINE; 01238 } 01239 *nextTokPtr = ptr; 01240 return XML_TOK_DATA_CHARS; 01241 case BT_S: 01242 if (ptr == start) { 01243 *nextTokPtr = ptr + MINBPC(enc); 01244 return XML_TOK_ATTRIBUTE_VALUE_S; 01245 } 01246 *nextTokPtr = ptr; 01247 return XML_TOK_DATA_CHARS; 01248 default: 01249 ptr += MINBPC(enc); 01250 break; 01251 } 01252 } 01253 *nextTokPtr = ptr; 01254 return XML_TOK_DATA_CHARS; 01255 } 01256 01257 static int PTRCALL 01258 PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, 01259 const char *end, const char **nextTokPtr) 01260 { 01261 const char *start; 01262 if (ptr == end) 01263 return XML_TOK_NONE; 01264 start = ptr; 01265 while (ptr != end) { 01266 switch (BYTE_TYPE(enc, ptr)) { 01267 #define LEAD_CASE(n) \ 01268 case BT_LEAD ## n: ptr += n; break; 01269 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 01270 #undef LEAD_CASE 01271 case BT_AMP: 01272 if (ptr == start) 01273 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 01274 *nextTokPtr = ptr; 01275 return XML_TOK_DATA_CHARS; 01276 case BT_PERCNT: 01277 if (ptr == start) { 01278 int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), 01279 end, nextTokPtr); 01280 return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; 01281 } 01282 *nextTokPtr = ptr; 01283 return XML_TOK_DATA_CHARS; 01284 case BT_LF: 01285 if (ptr == start) { 01286 *nextTokPtr = ptr + MINBPC(enc); 01287 return XML_TOK_DATA_NEWLINE; 01288 } 01289 *nextTokPtr = ptr; 01290 return XML_TOK_DATA_CHARS; 01291 case BT_CR: 01292 if (ptr == start) { 01293 ptr += MINBPC(enc); 01294 if (ptr == end) 01295 return XML_TOK_TRAILING_CR; 01296 if (BYTE_TYPE(enc, ptr) == BT_LF) 01297 ptr += MINBPC(enc); 01298 *nextTokPtr = ptr; 01299 return XML_TOK_DATA_NEWLINE; 01300 } 01301 *nextTokPtr = ptr; 01302 return XML_TOK_DATA_CHARS; 01303 default: 01304 ptr += MINBPC(enc); 01305 break; 01306 } 01307 } 01308 *nextTokPtr = ptr; 01309 return XML_TOK_DATA_CHARS; 01310 } 01311 01312 #ifdef XML_DTD 01313 01314 static int PTRCALL 01315 PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, 01316 const char *end, const char **nextTokPtr) 01317 { 01318 int level = 0; 01319 if (MINBPC(enc) > 1) { 01320 size_t n = end - ptr; 01321 if (n & (MINBPC(enc) - 1)) { 01322 n &= ~(MINBPC(enc) - 1); 01323 end = ptr + n; 01324 } 01325 } 01326 while (ptr != end) { 01327 switch (BYTE_TYPE(enc, ptr)) { 01328 INVALID_CASES(ptr, nextTokPtr) 01329 case BT_LT: 01330 if ((ptr += MINBPC(enc)) == end) 01331 return XML_TOK_PARTIAL; 01332 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { 01333 if ((ptr += MINBPC(enc)) == end) 01334 return XML_TOK_PARTIAL; 01335 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { 01336 ++level; 01337 ptr += MINBPC(enc); 01338 } 01339 } 01340 break; 01341 case BT_RSQB: 01342 if ((ptr += MINBPC(enc)) == end) 01343 return XML_TOK_PARTIAL; 01344 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { 01345 if ((ptr += MINBPC(enc)) == end) 01346 return XML_TOK_PARTIAL; 01347 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 01348 ptr += MINBPC(enc); 01349 if (level == 0) { 01350 *nextTokPtr = ptr; 01351 return XML_TOK_IGNORE_SECT; 01352 } 01353 --level; 01354 } 01355 } 01356 break; 01357 default: 01358 ptr += MINBPC(enc); 01359 break; 01360 } 01361 } 01362 return XML_TOK_PARTIAL; 01363 } 01364 01365 #endif /* XML_DTD */ 01366 01367 static int PTRCALL 01368 PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, 01369 const char **badPtr) 01370 { 01371 ptr += MINBPC(enc); 01372 end -= MINBPC(enc); 01373 for (; ptr != end; ptr += MINBPC(enc)) { 01374 switch (BYTE_TYPE(enc, ptr)) { 01375 case BT_DIGIT: 01376 case BT_HEX: 01377 case BT_MINUS: 01378 case BT_APOS: 01379 case BT_LPAR: 01380 case BT_RPAR: 01381 case BT_PLUS: 01382 case BT_COMMA: 01383 case BT_SOL: 01384 case BT_EQUALS: 01385 case BT_QUEST: 01386 case BT_CR: 01387 case BT_LF: 01388 case BT_SEMI: 01389 case BT_EXCL: 01390 case BT_AST: 01391 case BT_PERCNT: 01392 case BT_NUM: 01393 #ifdef XML_NS 01394 case BT_COLON: 01395 #endif 01396 break; 01397 case BT_S: 01398 if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { 01399 *badPtr = ptr; 01400 return 0; 01401 } 01402 break; 01403 case BT_NAME: 01404 case BT_NMSTRT: 01405 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) 01406 break; 01407 default: 01408 switch (BYTE_TO_ASCII(enc, ptr)) { 01409 case 0x24: /* $ */ 01410 case 0x40: /* @ */ 01411 break; 01412 default: 01413 *badPtr = ptr; 01414 return 0; 01415 } 01416 break; 01417 } 01418 } 01419 return 1; 01420 } 01421 01422 /* This must only be called for a well-formed start-tag or empty 01423 element tag. Returns the number of attributes. Pointers to the 01424 first attsMax attributes are stored in atts. 01425 */ 01426 01427 static int PTRCALL 01428 PREFIX(getAtts)(const ENCODING *enc, const char *ptr, 01429 int attsMax, ATTRIBUTE *atts) 01430 { 01431 enum { other, inName, inValue } state = inName; 01432 int nAtts = 0; 01433 int open = 0; /* defined when state == inValue; 01434 initialization just to shut up compilers */ 01435 01436 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { 01437 switch (BYTE_TYPE(enc, ptr)) { 01438 #define START_NAME \ 01439 if (state == other) { \ 01440 if (nAtts < attsMax) { \ 01441 atts[nAtts].name = ptr; \ 01442 atts[nAtts].normalized = 1; \ 01443 } \ 01444 state = inName; \ 01445 } 01446 #define LEAD_CASE(n) \ 01447 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; 01448 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 01449 #undef LEAD_CASE 01450 case BT_NONASCII: 01451 case BT_NMSTRT: 01452 case BT_HEX: 01453 START_NAME 01454 break; 01455 #undef START_NAME 01456 case BT_QUOT: 01457 if (state != inValue) { 01458 if (nAtts < attsMax) 01459 atts[nAtts].valuePtr = ptr + MINBPC(enc); 01460 state = inValue; 01461 open = BT_QUOT; 01462 } 01463 else if (open == BT_QUOT) { 01464 state = other; 01465 if (nAtts < attsMax) 01466 atts[nAtts].valueEnd = ptr; 01467 nAtts++; 01468 } 01469 break; 01470 case BT_APOS: 01471 if (state != inValue) { 01472 if (nAtts < attsMax) 01473 atts[nAtts].valuePtr = ptr + MINBPC(enc); 01474 state = inValue; 01475 open = BT_APOS; 01476 } 01477 else if (open == BT_APOS) { 01478 state = other; 01479 if (nAtts < attsMax) 01480 atts[nAtts].valueEnd = ptr; 01481 nAtts++; 01482 } 01483 break; 01484 case BT_AMP: 01485 if (nAtts < attsMax) 01486 atts[nAtts].normalized = 0; 01487 break; 01488 case BT_S: 01489 if (state == inName) 01490 state = other; 01491 else if (state == inValue 01492 && nAtts < attsMax 01493 && atts[nAtts].normalized 01494 && (ptr == atts[nAtts].valuePtr 01495 || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE 01496 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE 01497 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) 01498 atts[nAtts].normalized = 0; 01499 break; 01500 case BT_CR: case BT_LF: 01501 /* This case ensures that the first attribute name is counted 01502 Apart from that we could just change state on the quote. */ 01503 if (state == inName) 01504 state = other; 01505 else if (state == inValue && nAtts < attsMax) 01506 atts[nAtts].normalized = 0; 01507 break; 01508 case BT_GT: 01509 case BT_SOL: 01510 if (state != inValue) 01511 return nAtts; 01512 break; 01513 default: 01514 break; 01515 } 01516 } 01517 /* not reached */ 01518 } 01519 01520 static int PTRFASTCALL 01521 PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) 01522 { 01523 int result = 0; 01524 /* skip &# */ 01525 ptr += 2*MINBPC(enc); 01526 if (CHAR_MATCHES(enc, ptr, ASCII_x)) { 01527 for (ptr += MINBPC(enc); 01528 !CHAR_MATCHES(enc, ptr, ASCII_SEMI); 01529 ptr += MINBPC(enc)) { 01530 int c = BYTE_TO_ASCII(enc, ptr); 01531 switch (c) { 01532 case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: 01533 case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: 01534 result <<= 4; 01535 result |= (c - ASCII_0); 01536 break; 01537 case ASCII_A: case ASCII_B: case ASCII_C: 01538 case ASCII_D: case ASCII_E: case ASCII_F: 01539 result <<= 4; 01540 result += 10 + (c - ASCII_A); 01541 break; 01542 case ASCII_a: case ASCII_b: case ASCII_c: 01543 case ASCII_d: case ASCII_e: case ASCII_f: 01544 result <<= 4; 01545 result += 10 + (c - ASCII_a); 01546 break; 01547 } 01548 if (result >= 0x110000) 01549 return -1; 01550 } 01551 } 01552 else { 01553 for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { 01554 int c = BYTE_TO_ASCII(enc, ptr); 01555 result *= 10; 01556 result += (c - ASCII_0); 01557 if (result >= 0x110000) 01558 return -1; 01559 } 01560 } 01561 return checkCharRefNumber(result); 01562 } 01563 01564 static int PTRCALL 01565 PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, 01566 const char *end) 01567 { 01568 switch ((end - ptr)/MINBPC(enc)) { 01569 case 2: 01570 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { 01571 switch (BYTE_TO_ASCII(enc, ptr)) { 01572 case ASCII_l: 01573 return ASCII_LT; 01574 case ASCII_g: 01575 return ASCII_GT; 01576 } 01577 } 01578 break; 01579 case 3: 01580 if (CHAR_MATCHES(enc, ptr, ASCII_a)) { 01581 ptr += MINBPC(enc); 01582 if (CHAR_MATCHES(enc, ptr, ASCII_m)) { 01583 ptr += MINBPC(enc); 01584 if (CHAR_MATCHES(enc, ptr, ASCII_p)) 01585 return ASCII_AMP; 01586 } 01587 } 01588 break; 01589 case 4: 01590 switch (BYTE_TO_ASCII(enc, ptr)) { 01591 case ASCII_q: 01592 ptr += MINBPC(enc); 01593 if (CHAR_MATCHES(enc, ptr, ASCII_u)) { 01594 ptr += MINBPC(enc); 01595 if (CHAR_MATCHES(enc, ptr, ASCII_o)) { 01596 ptr += MINBPC(enc); 01597 if (CHAR_MATCHES(enc, ptr, ASCII_t)) 01598 return ASCII_QUOT; 01599 } 01600 } 01601 break; 01602 case ASCII_a: 01603 ptr += MINBPC(enc); 01604 if (CHAR_MATCHES(enc, ptr, ASCII_p)) { 01605 ptr += MINBPC(enc); 01606 if (CHAR_MATCHES(enc, ptr, ASCII_o)) { 01607 ptr += MINBPC(enc); 01608 if (CHAR_MATCHES(enc, ptr, ASCII_s)) 01609 return ASCII_APOS; 01610 } 01611 } 01612 break; 01613 } 01614 } 01615 return 0; 01616 } 01617 01618 static int PTRCALL 01619 PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) 01620 { 01621 for (;;) { 01622 switch (BYTE_TYPE(enc, ptr1)) { 01623 #define LEAD_CASE(n) \ 01624 case BT_LEAD ## n: \ 01625 if (*ptr1++ != *ptr2++) \ 01626 return 0; 01627 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) 01628 #undef LEAD_CASE 01629 /* fall through */ 01630 if (*ptr1++ != *ptr2++) 01631 return 0; 01632 break; 01633 case BT_NONASCII: 01634 case BT_NMSTRT: 01635 #ifdef XML_NS 01636 case BT_COLON: 01637 #endif 01638 case BT_HEX: 01639 case BT_DIGIT: 01640 case BT_NAME: 01641 case BT_MINUS: 01642 if (*ptr2++ != *ptr1++) 01643 return 0; 01644 if (MINBPC(enc) > 1) { 01645 if (*ptr2++ != *ptr1++) 01646 return 0; 01647 if (MINBPC(enc) > 2) { 01648 if (*ptr2++ != *ptr1++) 01649 return 0; 01650 if (MINBPC(enc) > 3) { 01651 if (*ptr2++ != *ptr1++) 01652 return 0; 01653 } 01654 } 01655 } 01656 break; 01657 default: 01658 if (MINBPC(enc) == 1 && *ptr1 == *ptr2) 01659 return 1; 01660 switch (BYTE_TYPE(enc, ptr2)) { 01661 case BT_LEAD2: 01662 case BT_LEAD3: 01663 case BT_LEAD4: 01664 case BT_NONASCII: 01665 case BT_NMSTRT: 01666 #ifdef XML_NS 01667 case BT_COLON: 01668 #endif 01669 case BT_HEX: 01670 case BT_DIGIT: 01671 case BT_NAME: 01672 case BT_MINUS: 01673 return 0; 01674 default: 01675 return 1; 01676 } 01677 } 01678 } 01679 /* not reached */ 01680 } 01681 01682 static int PTRCALL 01683 PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, 01684 const char *end1, const char *ptr2) 01685 { 01686 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { 01687 if (ptr1 == end1) 01688 return 0; 01689 if (!CHAR_MATCHES(enc, ptr1, *ptr2)) 01690 return 0; 01691 } 01692 return ptr1 == end1; 01693 } 01694 01695 static int PTRFASTCALL 01696 PREFIX(nameLength)(const ENCODING *enc, const char *ptr) 01697 { 01698 const char *start = ptr; 01699 for (;;) { 01700 switch (BYTE_TYPE(enc, ptr)) { 01701 #define LEAD_CASE(n) \ 01702 case BT_LEAD ## n: ptr += n; break; 01703 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 01704 #undef LEAD_CASE 01705 case BT_NONASCII: 01706 case BT_NMSTRT: 01707 #ifdef XML_NS 01708 case BT_COLON: 01709 #endif 01710 case BT_HEX: 01711 case BT_DIGIT: 01712 case BT_NAME: 01713 case BT_MINUS: 01714 ptr += MINBPC(enc); 01715 break; 01716 default: 01717 return (int)(ptr - start); 01718 } 01719 } 01720 } 01721 01722 static const char * PTRFASTCALL 01723 PREFIX(skipS)(const ENCODING *enc, const char *ptr) 01724 { 01725 for (;;) { 01726 switch (BYTE_TYPE(enc, ptr)) { 01727 case BT_LF: 01728 case BT_CR: 01729 case BT_S: 01730 ptr += MINBPC(enc); 01731 break; 01732 default: 01733 return ptr; 01734 } 01735 } 01736 } 01737 01738 static void PTRCALL 01739 PREFIX(updatePosition)(const ENCODING *enc, 01740 const char *ptr, 01741 const char *end, 01742 POSITION *pos) 01743 { 01744 while (ptr != end) { 01745 switch (BYTE_TYPE(enc, ptr)) { 01746 #define LEAD_CASE(n) \ 01747 case BT_LEAD ## n: \ 01748 ptr += n; \ 01749 break; 01750 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 01751 #undef LEAD_CASE 01752 case BT_LF: 01753 pos->columnNumber = (XML_Size)-1; 01754 pos->lineNumber++; 01755 ptr += MINBPC(enc); 01756 break; 01757 case BT_CR: 01758 pos->lineNumber++; 01759 ptr += MINBPC(enc); 01760 if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) 01761 ptr += MINBPC(enc); 01762 pos->columnNumber = (XML_Size)-1; 01763 break; 01764 default: 01765 ptr += MINBPC(enc); 01766 break; 01767 } 01768 pos->columnNumber++; 01769 } 01770 } 01771 01772 #undef DO_LEAD_CASE 01773 #undef MULTIBYTE_CASES 01774 #undef INVALID_CASES 01775 #undef CHECK_NAME_CASE 01776 #undef CHECK_NAME_CASES 01777 #undef CHECK_NMSTRT_CASE 01778 #undef CHECK_NMSTRT_CASES 01779 Generated on Fri May 25 2012 04:32:04 for ReactOS by
1.7.6.1
|