Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > DoxygenparserInternals.c
Go to the documentation of this file.
00001 /* 00002 * parserInternals.c : Internal routines (and obsolete ones) needed for the 00003 * XML and HTML parsers. 00004 * 00005 * See Copyright for the status of this software. 00006 * 00007 * daniel@veillard.com 00008 */ 00009 00010 #define IN_LIBXML 00011 #include "libxml.h" 00012 00013 #if defined(WIN32) && !defined (__CYGWIN__) 00014 #define XML_DIR_SEP '\\' 00015 #else 00016 #define XML_DIR_SEP '/' 00017 #endif 00018 00019 #include <string.h> 00020 #ifdef HAVE_CTYPE_H 00021 #include <ctype.h> 00022 #endif 00023 #ifdef HAVE_STDLIB_H 00024 #include <stdlib.h> 00025 #endif 00026 #ifdef HAVE_SYS_STAT_H 00027 #include <sys/stat.h> 00028 #endif 00029 #ifdef HAVE_FCNTL_H 00030 #include <fcntl.h> 00031 #endif 00032 #ifdef HAVE_UNISTD_H 00033 #include <unistd.h> 00034 #endif 00035 #ifdef HAVE_ZLIB_H 00036 #include <zlib.h> 00037 #endif 00038 00039 #include <libxml/xmlmemory.h> 00040 #include <libxml/tree.h> 00041 #include <libxml/parser.h> 00042 #include <libxml/parserInternals.h> 00043 #include <libxml/valid.h> 00044 #include <libxml/entities.h> 00045 #include <libxml/xmlerror.h> 00046 #include <libxml/encoding.h> 00047 #include <libxml/valid.h> 00048 #include <libxml/xmlIO.h> 00049 #include <libxml/uri.h> 00050 #include <libxml/dict.h> 00051 #include <libxml/SAX.h> 00052 #ifdef LIBXML_CATALOG_ENABLED 00053 #include <libxml/catalog.h> 00054 #endif 00055 #include <libxml/globals.h> 00056 #include <libxml/chvalid.h> 00057 00058 /* 00059 * Various global defaults for parsing 00060 */ 00061 00069 void 00070 xmlCheckVersion(int version) { 00071 int myversion = (int) LIBXML_VERSION; 00072 00073 xmlInitParser(); 00074 00075 if ((myversion / 10000) != (version / 10000)) { 00076 xmlGenericError(xmlGenericErrorContext, 00077 "Fatal: program compiled against libxml %d using libxml %d\n", 00078 (version / 10000), (myversion / 10000)); 00079 fprintf(stderr, 00080 "Fatal: program compiled against libxml %d using libxml %d\n", 00081 (version / 10000), (myversion / 10000)); 00082 } 00083 if ((myversion / 100) < (version / 100)) { 00084 xmlGenericError(xmlGenericErrorContext, 00085 "Warning: program compiled against libxml %d using older %d\n", 00086 (version / 100), (myversion / 100)); 00087 } 00088 } 00089 00090 00091 /************************************************************************ 00092 * * 00093 * Some factorized error routines * 00094 * * 00095 ************************************************************************/ 00096 00097 00105 void 00106 xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra) 00107 { 00108 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 00109 (ctxt->instate == XML_PARSER_EOF)) 00110 return; 00111 if (ctxt != NULL) { 00112 ctxt->errNo = XML_ERR_NO_MEMORY; 00113 ctxt->instate = XML_PARSER_EOF; 00114 ctxt->disableSAX = 1; 00115 } 00116 if (extra) 00117 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 00118 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra, 00119 NULL, NULL, 0, 0, 00120 "Memory allocation failed : %s\n", extra); 00121 else 00122 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, 00123 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL, 00124 NULL, NULL, 0, 0, "Memory allocation failed\n"); 00125 } 00126 00137 void 00138 __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr, 00139 const char *msg, const xmlChar * str1, const xmlChar * str2) 00140 { 00141 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 00142 (ctxt->instate == XML_PARSER_EOF)) 00143 return; 00144 if (ctxt != NULL) 00145 ctxt->errNo = xmlerr; 00146 __xmlRaiseError(NULL, NULL, NULL, 00147 ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL, 00148 NULL, 0, (const char *) str1, (const char *) str2, 00149 NULL, 0, 0, msg, str1, str2); 00150 if (ctxt != NULL) { 00151 ctxt->wellFormed = 0; 00152 if (ctxt->recovery == 0) 00153 ctxt->disableSAX = 1; 00154 } 00155 } 00156 00165 static void 00166 xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str) 00167 { 00168 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 00169 (ctxt->instate == XML_PARSER_EOF)) 00170 return; 00171 if (ctxt != NULL) 00172 ctxt->errNo = XML_ERR_INTERNAL_ERROR; 00173 __xmlRaiseError(NULL, NULL, NULL, 00174 ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR, 00175 XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL, 00176 0, 0, msg, str); 00177 if (ctxt != NULL) { 00178 ctxt->wellFormed = 0; 00179 if (ctxt->recovery == 0) 00180 ctxt->disableSAX = 1; 00181 } 00182 } 00183 00193 static void 00194 xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, 00195 const char *msg, int val) 00196 { 00197 if ((ctxt != NULL) && (ctxt->disableSAX != 0) && 00198 (ctxt->instate == XML_PARSER_EOF)) 00199 return; 00200 if (ctxt != NULL) 00201 ctxt->errNo = error; 00202 __xmlRaiseError(NULL, NULL, NULL, 00203 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, 00204 NULL, 0, NULL, NULL, NULL, val, 0, msg, val); 00205 if (ctxt != NULL) { 00206 ctxt->wellFormed = 0; 00207 if (ctxt->recovery == 0) 00208 ctxt->disableSAX = 1; 00209 } 00210 } 00211 00221 int 00222 xmlIsLetter(int c) { 00223 return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); 00224 } 00225 00226 /************************************************************************ 00227 * * 00228 * Input handling functions for progressive parsing * 00229 * * 00230 ************************************************************************/ 00231 00232 /* #define DEBUG_INPUT */ 00233 /* #define DEBUG_STACK */ 00234 /* #define DEBUG_PUSH */ 00235 00236 00237 /* we need to keep enough input to show errors in context */ 00238 #define LINE_LEN 80 00239 00240 #ifdef DEBUG_INPUT 00241 #define CHECK_BUFFER(in) check_buffer(in) 00242 00243 static 00244 void check_buffer(xmlParserInputPtr in) { 00245 if (in->base != in->buf->buffer->content) { 00246 xmlGenericError(xmlGenericErrorContext, 00247 "xmlParserInput: base mismatch problem\n"); 00248 } 00249 if (in->cur < in->base) { 00250 xmlGenericError(xmlGenericErrorContext, 00251 "xmlParserInput: cur < base problem\n"); 00252 } 00253 if (in->cur > in->base + in->buf->buffer->use) { 00254 xmlGenericError(xmlGenericErrorContext, 00255 "xmlParserInput: cur > base + use problem\n"); 00256 } 00257 xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n", 00258 (int) in, (int) in->buf->buffer->content, in->cur - in->base, 00259 in->buf->buffer->use, in->buf->buffer->size); 00260 } 00261 00262 #else 00263 #define CHECK_BUFFER(in) 00264 #endif 00265 00266 00278 int 00279 xmlParserInputRead(xmlParserInputPtr in, int len) { 00280 int ret; 00281 int used; 00282 int indx; 00283 00284 if (in == NULL) return(-1); 00285 #ifdef DEBUG_INPUT 00286 xmlGenericError(xmlGenericErrorContext, "Read\n"); 00287 #endif 00288 if (in->buf == NULL) return(-1); 00289 if (in->base == NULL) return(-1); 00290 if (in->cur == NULL) return(-1); 00291 if (in->buf->buffer == NULL) return(-1); 00292 if (in->buf->readcallback == NULL) return(-1); 00293 00294 CHECK_BUFFER(in); 00295 00296 used = in->cur - in->buf->buffer->content; 00297 ret = xmlBufferShrink(in->buf->buffer, used); 00298 if (ret > 0) { 00299 in->cur -= ret; 00300 in->consumed += ret; 00301 } 00302 ret = xmlParserInputBufferRead(in->buf, len); 00303 if (in->base != in->buf->buffer->content) { 00304 /* 00305 * the buffer has been reallocated 00306 */ 00307 indx = in->cur - in->base; 00308 in->base = in->buf->buffer->content; 00309 in->cur = &in->buf->buffer->content[indx]; 00310 } 00311 in->end = &in->buf->buffer->content[in->buf->buffer->use]; 00312 00313 CHECK_BUFFER(in); 00314 00315 return(ret); 00316 } 00317 00329 int 00330 xmlParserInputGrow(xmlParserInputPtr in, int len) { 00331 int ret; 00332 int indx; 00333 00334 if (in == NULL) return(-1); 00335 #ifdef DEBUG_INPUT 00336 xmlGenericError(xmlGenericErrorContext, "Grow\n"); 00337 #endif 00338 if (in->buf == NULL) return(-1); 00339 if (in->base == NULL) return(-1); 00340 if (in->cur == NULL) return(-1); 00341 if (in->buf->buffer == NULL) return(-1); 00342 00343 CHECK_BUFFER(in); 00344 00345 indx = in->cur - in->base; 00346 if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) { 00347 00348 CHECK_BUFFER(in); 00349 00350 return(0); 00351 } 00352 if (in->buf->readcallback != NULL) 00353 ret = xmlParserInputBufferGrow(in->buf, len); 00354 else 00355 return(0); 00356 00357 /* 00358 * NOTE : in->base may be a "dangling" i.e. freed pointer in this 00359 * block, but we use it really as an integer to do some 00360 * pointer arithmetic. Insure will raise it as a bug but in 00361 * that specific case, that's not ! 00362 */ 00363 if (in->base != in->buf->buffer->content) { 00364 /* 00365 * the buffer has been reallocated 00366 */ 00367 indx = in->cur - in->base; 00368 in->base = in->buf->buffer->content; 00369 in->cur = &in->buf->buffer->content[indx]; 00370 } 00371 in->end = &in->buf->buffer->content[in->buf->buffer->use]; 00372 00373 CHECK_BUFFER(in); 00374 00375 return(ret); 00376 } 00377 00384 void 00385 xmlParserInputShrink(xmlParserInputPtr in) { 00386 int used; 00387 int ret; 00388 int indx; 00389 00390 #ifdef DEBUG_INPUT 00391 xmlGenericError(xmlGenericErrorContext, "Shrink\n"); 00392 #endif 00393 if (in == NULL) return; 00394 if (in->buf == NULL) return; 00395 if (in->base == NULL) return; 00396 if (in->cur == NULL) return; 00397 if (in->buf->buffer == NULL) return; 00398 00399 CHECK_BUFFER(in); 00400 00401 used = in->cur - in->buf->buffer->content; 00402 /* 00403 * Do not shrink on large buffers whose only a tiny fraction 00404 * was consumed 00405 */ 00406 if (used > INPUT_CHUNK) { 00407 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN); 00408 if (ret > 0) { 00409 in->cur -= ret; 00410 in->consumed += ret; 00411 } 00412 in->end = &in->buf->buffer->content[in->buf->buffer->use]; 00413 } 00414 00415 CHECK_BUFFER(in); 00416 00417 if (in->buf->buffer->use > INPUT_CHUNK) { 00418 return; 00419 } 00420 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK); 00421 if (in->base != in->buf->buffer->content) { 00422 /* 00423 * the buffer has been reallocated 00424 */ 00425 indx = in->cur - in->base; 00426 in->base = in->buf->buffer->content; 00427 in->cur = &in->buf->buffer->content[indx]; 00428 } 00429 in->end = &in->buf->buffer->content[in->buf->buffer->use]; 00430 00431 CHECK_BUFFER(in); 00432 } 00433 00434 /************************************************************************ 00435 * * 00436 * UTF8 character input and related functions * 00437 * * 00438 ************************************************************************/ 00439 00447 void 00448 xmlNextChar(xmlParserCtxtPtr ctxt) 00449 { 00450 if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) || 00451 (ctxt->input == NULL)) 00452 return; 00453 00454 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 00455 if ((*ctxt->input->cur == 0) && 00456 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && 00457 (ctxt->instate != XML_PARSER_COMMENT)) { 00458 /* 00459 * If we are at the end of the current entity and 00460 * the context allows it, we pop consumed entities 00461 * automatically. 00462 * the auto closing should be blocked in other cases 00463 */ 00464 xmlPopInput(ctxt); 00465 } else { 00466 const unsigned char *cur; 00467 unsigned char c; 00468 00469 /* 00470 * 2.11 End-of-Line Handling 00471 * the literal two-character sequence "#xD#xA" or a standalone 00472 * literal #xD, an XML processor must pass to the application 00473 * the single character #xA. 00474 */ 00475 if (*(ctxt->input->cur) == '\n') { 00476 ctxt->input->line++; ctxt->input->col = 1; 00477 } else 00478 ctxt->input->col++; 00479 00480 /* 00481 * We are supposed to handle UTF8, check it's valid 00482 * From rfc2044: encoding of the Unicode values on UTF-8: 00483 * 00484 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 00485 * 0000 0000-0000 007F 0xxxxxxx 00486 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 00487 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 00488 * 00489 * Check for the 0x110000 limit too 00490 */ 00491 cur = ctxt->input->cur; 00492 00493 c = *cur; 00494 if (c & 0x80) { 00495 if (c == 0xC0) 00496 goto encoding_error; 00497 if (cur[1] == 0) { 00498 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 00499 cur = ctxt->input->cur; 00500 } 00501 if ((cur[1] & 0xc0) != 0x80) 00502 goto encoding_error; 00503 if ((c & 0xe0) == 0xe0) { 00504 unsigned int val; 00505 00506 if (cur[2] == 0) { 00507 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 00508 cur = ctxt->input->cur; 00509 } 00510 if ((cur[2] & 0xc0) != 0x80) 00511 goto encoding_error; 00512 if ((c & 0xf0) == 0xf0) { 00513 if (cur[3] == 0) { 00514 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 00515 cur = ctxt->input->cur; 00516 } 00517 if (((c & 0xf8) != 0xf0) || 00518 ((cur[3] & 0xc0) != 0x80)) 00519 goto encoding_error; 00520 /* 4-byte code */ 00521 ctxt->input->cur += 4; 00522 val = (cur[0] & 0x7) << 18; 00523 val |= (cur[1] & 0x3f) << 12; 00524 val |= (cur[2] & 0x3f) << 6; 00525 val |= cur[3] & 0x3f; 00526 } else { 00527 /* 3-byte code */ 00528 ctxt->input->cur += 3; 00529 val = (cur[0] & 0xf) << 12; 00530 val |= (cur[1] & 0x3f) << 6; 00531 val |= cur[2] & 0x3f; 00532 } 00533 if (((val > 0xd7ff) && (val < 0xe000)) || 00534 ((val > 0xfffd) && (val < 0x10000)) || 00535 (val >= 0x110000)) { 00536 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 00537 "Char 0x%X out of allowed range\n", 00538 val); 00539 } 00540 } else 00541 /* 2-byte code */ 00542 ctxt->input->cur += 2; 00543 } else 00544 /* 1-byte code */ 00545 ctxt->input->cur++; 00546 00547 ctxt->nbChars++; 00548 if (*ctxt->input->cur == 0) 00549 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 00550 } 00551 } else { 00552 /* 00553 * Assume it's a fixed length encoding (1) with 00554 * a compatible encoding for the ASCII set, since 00555 * XML constructs only use < 128 chars 00556 */ 00557 00558 if (*(ctxt->input->cur) == '\n') { 00559 ctxt->input->line++; ctxt->input->col = 1; 00560 } else 00561 ctxt->input->col++; 00562 ctxt->input->cur++; 00563 ctxt->nbChars++; 00564 if (*ctxt->input->cur == 0) 00565 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 00566 } 00567 if ((*ctxt->input->cur == '%') && (!ctxt->html)) 00568 xmlParserHandlePEReference(ctxt); 00569 if ((*ctxt->input->cur == 0) && 00570 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) 00571 xmlPopInput(ctxt); 00572 return; 00573 encoding_error: 00574 /* 00575 * If we detect an UTF8 error that probably mean that the 00576 * input encoding didn't get properly advertised in the 00577 * declaration header. Report the error and switch the encoding 00578 * to ISO-Latin-1 (if you don't like this policy, just declare the 00579 * encoding !) 00580 */ 00581 if ((ctxt == NULL) || (ctxt->input == NULL) || 00582 (ctxt->input->end - ctxt->input->cur < 4)) { 00583 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 00584 "Input is not proper UTF-8, indicate encoding !\n", 00585 NULL, NULL); 00586 } else { 00587 char buffer[150]; 00588 00589 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 00590 ctxt->input->cur[0], ctxt->input->cur[1], 00591 ctxt->input->cur[2], ctxt->input->cur[3]); 00592 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 00593 "Input is not proper UTF-8, indicate encoding !\n%s", 00594 BAD_CAST buffer, NULL); 00595 } 00596 ctxt->charset = XML_CHAR_ENCODING_8859_1; 00597 ctxt->input->cur++; 00598 return; 00599 } 00600 00619 int 00620 xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { 00621 if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0); 00622 if (ctxt->instate == XML_PARSER_EOF) 00623 return(0); 00624 00625 if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) { 00626 *len = 1; 00627 return((int) *ctxt->input->cur); 00628 } 00629 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { 00630 /* 00631 * We are supposed to handle UTF8, check it's valid 00632 * From rfc2044: encoding of the Unicode values on UTF-8: 00633 * 00634 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 00635 * 0000 0000-0000 007F 0xxxxxxx 00636 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 00637 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 00638 * 00639 * Check for the 0x110000 limit too 00640 */ 00641 const unsigned char *cur = ctxt->input->cur; 00642 unsigned char c; 00643 unsigned int val; 00644 00645 c = *cur; 00646 if (c & 0x80) { 00647 if (((c & 0x40) == 0) || (c == 0xC0)) 00648 goto encoding_error; 00649 if (cur[1] == 0) { 00650 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 00651 cur = ctxt->input->cur; 00652 } 00653 if ((cur[1] & 0xc0) != 0x80) 00654 goto encoding_error; 00655 if ((c & 0xe0) == 0xe0) { 00656 if (cur[2] == 0) { 00657 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 00658 cur = ctxt->input->cur; 00659 } 00660 if ((cur[2] & 0xc0) != 0x80) 00661 goto encoding_error; 00662 if ((c & 0xf0) == 0xf0) { 00663 if (cur[3] == 0) { 00664 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 00665 cur = ctxt->input->cur; 00666 } 00667 if (((c & 0xf8) != 0xf0) || 00668 ((cur[3] & 0xc0) != 0x80)) 00669 goto encoding_error; 00670 /* 4-byte code */ 00671 *len = 4; 00672 val = (cur[0] & 0x7) << 18; 00673 val |= (cur[1] & 0x3f) << 12; 00674 val |= (cur[2] & 0x3f) << 6; 00675 val |= cur[3] & 0x3f; 00676 if (val < 0x10000) 00677 goto encoding_error; 00678 } else { 00679 /* 3-byte code */ 00680 *len = 3; 00681 val = (cur[0] & 0xf) << 12; 00682 val |= (cur[1] & 0x3f) << 6; 00683 val |= cur[2] & 0x3f; 00684 if (val < 0x800) 00685 goto encoding_error; 00686 } 00687 } else { 00688 /* 2-byte code */ 00689 *len = 2; 00690 val = (cur[0] & 0x1f) << 6; 00691 val |= cur[1] & 0x3f; 00692 if (val < 0x80) 00693 goto encoding_error; 00694 } 00695 if (!IS_CHAR(val)) { 00696 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 00697 "Char 0x%X out of allowed range\n", val); 00698 } 00699 return(val); 00700 } else { 00701 /* 1-byte code */ 00702 *len = 1; 00703 if (*ctxt->input->cur == 0) 00704 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); 00705 if ((*ctxt->input->cur == 0) && 00706 (ctxt->input->end > ctxt->input->cur)) { 00707 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 00708 "Char 0x0 out of allowed range\n", 0); 00709 } 00710 if (*ctxt->input->cur == 0xD) { 00711 if (ctxt->input->cur[1] == 0xA) { 00712 ctxt->nbChars++; 00713 ctxt->input->cur++; 00714 } 00715 return(0xA); 00716 } 00717 return((int) *ctxt->input->cur); 00718 } 00719 } 00720 /* 00721 * Assume it's a fixed length encoding (1) with 00722 * a compatible encoding for the ASCII set, since 00723 * XML constructs only use < 128 chars 00724 */ 00725 *len = 1; 00726 if (*ctxt->input->cur == 0xD) { 00727 if (ctxt->input->cur[1] == 0xA) { 00728 ctxt->nbChars++; 00729 ctxt->input->cur++; 00730 } 00731 return(0xA); 00732 } 00733 return((int) *ctxt->input->cur); 00734 encoding_error: 00735 /* 00736 * An encoding problem may arise from a truncated input buffer 00737 * splitting a character in the middle. In that case do not raise 00738 * an error but return 0 to endicate an end of stream problem 00739 */ 00740 if (ctxt->input->end - ctxt->input->cur < 4) { 00741 *len = 0; 00742 return(0); 00743 } 00744 00745 /* 00746 * If we detect an UTF8 error that probably mean that the 00747 * input encoding didn't get properly advertised in the 00748 * declaration header. Report the error and switch the encoding 00749 * to ISO-Latin-1 (if you don't like this policy, just declare the 00750 * encoding !) 00751 */ 00752 { 00753 char buffer[150]; 00754 00755 snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 00756 ctxt->input->cur[0], ctxt->input->cur[1], 00757 ctxt->input->cur[2], ctxt->input->cur[3]); 00758 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 00759 "Input is not proper UTF-8, indicate encoding !\n%s", 00760 BAD_CAST buffer, NULL); 00761 } 00762 ctxt->charset = XML_CHAR_ENCODING_8859_1; 00763 *len = 1; 00764 return((int) *ctxt->input->cur); 00765 } 00766 00779 int 00780 xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len) 00781 { 00782 if ((len == NULL) || (cur == NULL)) return(0); 00783 if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { 00784 /* 00785 * We are supposed to handle UTF8, check it's valid 00786 * From rfc2044: encoding of the Unicode values on UTF-8: 00787 * 00788 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 00789 * 0000 0000-0000 007F 0xxxxxxx 00790 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 00791 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 00792 * 00793 * Check for the 0x110000 limit too 00794 */ 00795 unsigned char c; 00796 unsigned int val; 00797 00798 c = *cur; 00799 if (c & 0x80) { 00800 if ((cur[1] & 0xc0) != 0x80) 00801 goto encoding_error; 00802 if ((c & 0xe0) == 0xe0) { 00803 00804 if ((cur[2] & 0xc0) != 0x80) 00805 goto encoding_error; 00806 if ((c & 0xf0) == 0xf0) { 00807 if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80)) 00808 goto encoding_error; 00809 /* 4-byte code */ 00810 *len = 4; 00811 val = (cur[0] & 0x7) << 18; 00812 val |= (cur[1] & 0x3f) << 12; 00813 val |= (cur[2] & 0x3f) << 6; 00814 val |= cur[3] & 0x3f; 00815 } else { 00816 /* 3-byte code */ 00817 *len = 3; 00818 val = (cur[0] & 0xf) << 12; 00819 val |= (cur[1] & 0x3f) << 6; 00820 val |= cur[2] & 0x3f; 00821 } 00822 } else { 00823 /* 2-byte code */ 00824 *len = 2; 00825 val = (cur[0] & 0x1f) << 6; 00826 val |= cur[1] & 0x3f; 00827 } 00828 if (!IS_CHAR(val)) { 00829 xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR, 00830 "Char 0x%X out of allowed range\n", val); 00831 } 00832 return (val); 00833 } else { 00834 /* 1-byte code */ 00835 *len = 1; 00836 return ((int) *cur); 00837 } 00838 } 00839 /* 00840 * Assume it's a fixed length encoding (1) with 00841 * a compatible encoding for the ASCII set, since 00842 * XML constructs only use < 128 chars 00843 */ 00844 *len = 1; 00845 return ((int) *cur); 00846 encoding_error: 00847 00848 /* 00849 * An encoding problem may arise from a truncated input buffer 00850 * splitting a character in the middle. In that case do not raise 00851 * an error but return 0 to endicate an end of stream problem 00852 */ 00853 if ((ctxt == NULL) || (ctxt->input == NULL) || 00854 (ctxt->input->end - ctxt->input->cur < 4)) { 00855 *len = 0; 00856 return(0); 00857 } 00858 /* 00859 * If we detect an UTF8 error that probably mean that the 00860 * input encoding didn't get properly advertised in the 00861 * declaration header. Report the error and switch the encoding 00862 * to ISO-Latin-1 (if you don't like this policy, just declare the 00863 * encoding !) 00864 */ 00865 { 00866 char buffer[150]; 00867 00868 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n", 00869 ctxt->input->cur[0], ctxt->input->cur[1], 00870 ctxt->input->cur[2], ctxt->input->cur[3]); 00871 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR, 00872 "Input is not proper UTF-8, indicate encoding !\n%s", 00873 BAD_CAST buffer, NULL); 00874 } 00875 *len = 1; 00876 return ((int) *cur); 00877 } 00878 00888 int 00889 xmlCopyCharMultiByte(xmlChar *out, int val) { 00890 if (out == NULL) return(0); 00891 /* 00892 * We are supposed to handle UTF8, check it's valid 00893 * From rfc2044: encoding of the Unicode values on UTF-8: 00894 * 00895 * UCS-4 range (hex.) UTF-8 octet sequence (binary) 00896 * 0000 0000-0000 007F 0xxxxxxx 00897 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 00898 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 00899 */ 00900 if (val >= 0x80) { 00901 xmlChar *savedout = out; 00902 int bits; 00903 if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; } 00904 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;} 00905 else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; } 00906 else { 00907 xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR, 00908 "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n", 00909 val); 00910 return(0); 00911 } 00912 for ( ; bits >= 0; bits-= 6) 00913 *out++= ((val >> bits) & 0x3F) | 0x80 ; 00914 return (out - savedout); 00915 } 00916 *out = (xmlChar) val; 00917 return 1; 00918 } 00919 00931 int 00932 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { 00933 if (out == NULL) return(0); 00934 /* the len parameter is ignored */ 00935 if (val >= 0x80) { 00936 return(xmlCopyCharMultiByte (out, val)); 00937 } 00938 *out = (xmlChar) val; 00939 return 1; 00940 } 00941 00942 /************************************************************************ 00943 * * 00944 * Commodity functions to switch encodings * 00945 * * 00946 ************************************************************************/ 00947 00948 /* defined in encoding.c, not public */ 00949 int 00950 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, 00951 xmlBufferPtr in, int len); 00952 00953 static int 00954 xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 00955 xmlCharEncodingHandlerPtr handler, int len); 00956 static int 00957 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 00958 xmlCharEncodingHandlerPtr handler, int len); 00969 int 00970 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) 00971 { 00972 xmlCharEncodingHandlerPtr handler; 00973 int len = -1; 00974 00975 if (ctxt == NULL) return(-1); 00976 switch (enc) { 00977 case XML_CHAR_ENCODING_ERROR: 00978 __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING, 00979 "encoding unknown\n", NULL, NULL); 00980 return(-1); 00981 case XML_CHAR_ENCODING_NONE: 00982 /* let's assume it's UTF-8 without the XML decl */ 00983 ctxt->charset = XML_CHAR_ENCODING_UTF8; 00984 return(0); 00985 case XML_CHAR_ENCODING_UTF8: 00986 /* default encoding, no conversion should be needed */ 00987 ctxt->charset = XML_CHAR_ENCODING_UTF8; 00988 00989 /* 00990 * Errata on XML-1.0 June 20 2001 00991 * Specific handling of the Byte Order Mark for 00992 * UTF-8 00993 */ 00994 if ((ctxt->input != NULL) && 00995 (ctxt->input->cur[0] == 0xEF) && 00996 (ctxt->input->cur[1] == 0xBB) && 00997 (ctxt->input->cur[2] == 0xBF)) { 00998 ctxt->input->cur += 3; 00999 } 01000 return(0); 01001 case XML_CHAR_ENCODING_UTF16LE: 01002 case XML_CHAR_ENCODING_UTF16BE: 01003 /*The raw input characters are encoded 01004 *in UTF-16. As we expect this function 01005 *to be called after xmlCharEncInFunc, we expect 01006 *ctxt->input->cur to contain UTF-8 encoded characters. 01007 *So the raw UTF16 Byte Order Mark 01008 *has also been converted into 01009 *an UTF-8 BOM. Let's skip that BOM. 01010 */ 01011 if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) && 01012 (ctxt->input->cur[0] == 0xEF) && 01013 (ctxt->input->cur[1] == 0xBB) && 01014 (ctxt->input->cur[2] == 0xBF)) { 01015 ctxt->input->cur += 3; 01016 } 01017 len = 90; 01018 break; 01019 case XML_CHAR_ENCODING_UCS2: 01020 len = 90; 01021 break; 01022 case XML_CHAR_ENCODING_UCS4BE: 01023 case XML_CHAR_ENCODING_UCS4LE: 01024 case XML_CHAR_ENCODING_UCS4_2143: 01025 case XML_CHAR_ENCODING_UCS4_3412: 01026 len = 180; 01027 break; 01028 case XML_CHAR_ENCODING_EBCDIC: 01029 case XML_CHAR_ENCODING_8859_1: 01030 case XML_CHAR_ENCODING_8859_2: 01031 case XML_CHAR_ENCODING_8859_3: 01032 case XML_CHAR_ENCODING_8859_4: 01033 case XML_CHAR_ENCODING_8859_5: 01034 case XML_CHAR_ENCODING_8859_6: 01035 case XML_CHAR_ENCODING_8859_7: 01036 case XML_CHAR_ENCODING_8859_8: 01037 case XML_CHAR_ENCODING_8859_9: 01038 case XML_CHAR_ENCODING_ASCII: 01039 case XML_CHAR_ENCODING_2022_JP: 01040 case XML_CHAR_ENCODING_SHIFT_JIS: 01041 case XML_CHAR_ENCODING_EUC_JP: 01042 len = 45; 01043 break; 01044 } 01045 handler = xmlGetCharEncodingHandler(enc); 01046 if (handler == NULL) { 01047 /* 01048 * Default handlers. 01049 */ 01050 switch (enc) { 01051 case XML_CHAR_ENCODING_ASCII: 01052 /* default encoding, no conversion should be needed */ 01053 ctxt->charset = XML_CHAR_ENCODING_UTF8; 01054 return(0); 01055 case XML_CHAR_ENCODING_UTF16LE: 01056 break; 01057 case XML_CHAR_ENCODING_UTF16BE: 01058 break; 01059 case XML_CHAR_ENCODING_UCS4LE: 01060 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 01061 "encoding not supported %s\n", 01062 BAD_CAST "USC4 little endian", NULL); 01063 break; 01064 case XML_CHAR_ENCODING_UCS4BE: 01065 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 01066 "encoding not supported %s\n", 01067 BAD_CAST "USC4 big endian", NULL); 01068 break; 01069 case XML_CHAR_ENCODING_EBCDIC: 01070 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 01071 "encoding not supported %s\n", 01072 BAD_CAST "EBCDIC", NULL); 01073 break; 01074 case XML_CHAR_ENCODING_UCS4_2143: 01075 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 01076 "encoding not supported %s\n", 01077 BAD_CAST "UCS4 2143", NULL); 01078 break; 01079 case XML_CHAR_ENCODING_UCS4_3412: 01080 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 01081 "encoding not supported %s\n", 01082 BAD_CAST "UCS4 3412", NULL); 01083 break; 01084 case XML_CHAR_ENCODING_UCS2: 01085 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 01086 "encoding not supported %s\n", 01087 BAD_CAST "UCS2", NULL); 01088 break; 01089 case XML_CHAR_ENCODING_8859_1: 01090 case XML_CHAR_ENCODING_8859_2: 01091 case XML_CHAR_ENCODING_8859_3: 01092 case XML_CHAR_ENCODING_8859_4: 01093 case XML_CHAR_ENCODING_8859_5: 01094 case XML_CHAR_ENCODING_8859_6: 01095 case XML_CHAR_ENCODING_8859_7: 01096 case XML_CHAR_ENCODING_8859_8: 01097 case XML_CHAR_ENCODING_8859_9: 01098 /* 01099 * We used to keep the internal content in the 01100 * document encoding however this turns being unmaintainable 01101 * So xmlGetCharEncodingHandler() will return non-null 01102 * values for this now. 01103 */ 01104 if ((ctxt->inputNr == 1) && 01105 (ctxt->encoding == NULL) && 01106 (ctxt->input != NULL) && 01107 (ctxt->input->encoding != NULL)) { 01108 ctxt->encoding = xmlStrdup(ctxt->input->encoding); 01109 } 01110 ctxt->charset = enc; 01111 return(0); 01112 case XML_CHAR_ENCODING_2022_JP: 01113 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 01114 "encoding not supported %s\n", 01115 BAD_CAST "ISO-2022-JP", NULL); 01116 break; 01117 case XML_CHAR_ENCODING_SHIFT_JIS: 01118 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 01119 "encoding not supported %s\n", 01120 BAD_CAST "Shift_JIS", NULL); 01121 break; 01122 case XML_CHAR_ENCODING_EUC_JP: 01123 __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING, 01124 "encoding not supported %s\n", 01125 BAD_CAST "EUC-JP", NULL); 01126 break; 01127 default: 01128 break; 01129 } 01130 } 01131 if (handler == NULL) 01132 return(-1); 01133 ctxt->charset = XML_CHAR_ENCODING_UTF8; 01134 return(xmlSwitchToEncodingInt(ctxt, handler, len)); 01135 } 01136 01149 static int 01150 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 01151 xmlCharEncodingHandlerPtr handler, int len) 01152 { 01153 int nbchars; 01154 01155 if (handler == NULL) 01156 return (-1); 01157 if (input == NULL) 01158 return (-1); 01159 if (input->buf != NULL) { 01160 if (input->buf->encoder != NULL) { 01161 /* 01162 * Check in case the auto encoding detetection triggered 01163 * in already. 01164 */ 01165 if (input->buf->encoder == handler) 01166 return (0); 01167 01168 /* 01169 * "UTF-16" can be used for both LE and BE 01170 if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name, 01171 BAD_CAST "UTF-16", 6)) && 01172 (!xmlStrncmp(BAD_CAST handler->name, 01173 BAD_CAST "UTF-16", 6))) { 01174 return(0); 01175 } 01176 */ 01177 01178 /* 01179 * Note: this is a bit dangerous, but that's what it 01180 * takes to use nearly compatible signature for different 01181 * encodings. 01182 */ 01183 xmlCharEncCloseFunc(input->buf->encoder); 01184 input->buf->encoder = handler; 01185 return (0); 01186 } 01187 input->buf->encoder = handler; 01188 01189 /* 01190 * Is there already some content down the pipe to convert ? 01191 */ 01192 if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) { 01193 int processed; 01194 unsigned int use; 01195 01196 /* 01197 * Specific handling of the Byte Order Mark for 01198 * UTF-16 01199 */ 01200 if ((handler->name != NULL) && 01201 (!strcmp(handler->name, "UTF-16LE") || 01202 !strcmp(handler->name, "UTF-16")) && 01203 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) { 01204 input->cur += 2; 01205 } 01206 if ((handler->name != NULL) && 01207 (!strcmp(handler->name, "UTF-16BE")) && 01208 (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) { 01209 input->cur += 2; 01210 } 01211 /* 01212 * Errata on XML-1.0 June 20 2001 01213 * Specific handling of the Byte Order Mark for 01214 * UTF-8 01215 */ 01216 if ((handler->name != NULL) && 01217 (!strcmp(handler->name, "UTF-8")) && 01218 (input->cur[0] == 0xEF) && 01219 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) { 01220 input->cur += 3; 01221 } 01222 01223 /* 01224 * Shrink the current input buffer. 01225 * Move it as the raw buffer and create a new input buffer 01226 */ 01227 processed = input->cur - input->base; 01228 xmlBufferShrink(input->buf->buffer, processed); 01229 input->buf->raw = input->buf->buffer; 01230 input->buf->buffer = xmlBufferCreate(); 01231 input->buf->rawconsumed = processed; 01232 use = input->buf->raw->use; 01233 01234 if (ctxt->html) { 01235 /* 01236 * convert as much as possible of the buffer 01237 */ 01238 nbchars = xmlCharEncInFunc(input->buf->encoder, 01239 input->buf->buffer, 01240 input->buf->raw); 01241 } else { 01242 /* 01243 * convert just enough to get 01244 * '<?xml version="1.0" encoding="xxx"?>' 01245 * parsed with the autodetected encoding 01246 * into the parser reading buffer. 01247 */ 01248 nbchars = xmlCharEncFirstLineInt(input->buf->encoder, 01249 input->buf->buffer, 01250 input->buf->raw, 01251 len); 01252 } 01253 if (nbchars < 0) { 01254 xmlErrInternal(ctxt, 01255 "switching encoding: encoder error\n", 01256 NULL); 01257 return (-1); 01258 } 01259 input->buf->rawconsumed += use - input->buf->raw->use; 01260 input->base = input->cur = input->buf->buffer->content; 01261 input->end = &input->base[input->buf->buffer->use]; 01262 01263 } 01264 return (0); 01265 } else if (input->length == 0) { 01266 /* 01267 * When parsing a static memory array one must know the 01268 * size to be able to convert the buffer. 01269 */ 01270 xmlErrInternal(ctxt, "switching encoding : no input\n", NULL); 01271 return (-1); 01272 } 01273 return (0); 01274 } 01275 01287 int 01288 xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, 01289 xmlCharEncodingHandlerPtr handler) { 01290 return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1)); 01291 } 01292 01306 static int 01307 xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt, 01308 xmlCharEncodingHandlerPtr handler, int len) { 01309 int ret = 0; 01310 01311 if (handler != NULL) { 01312 if (ctxt->input != NULL) { 01313 ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len); 01314 } else { 01315 xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n", 01316 NULL); 01317 return(-1); 01318 } 01319 /* 01320 * The parsing is now done in UTF8 natively 01321 */ 01322 ctxt->charset = XML_CHAR_ENCODING_UTF8; 01323 } else 01324 return(-1); 01325 return(ret); 01326 } 01327 01338 int 01339 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 01340 { 01341 return (xmlSwitchToEncodingInt(ctxt, handler, -1)); 01342 } 01343 01344 /************************************************************************ 01345 * * 01346 * Commodity functions to handle entities processing * 01347 * * 01348 ************************************************************************/ 01349 01356 void 01357 xmlFreeInputStream(xmlParserInputPtr input) { 01358 if (input == NULL) return; 01359 01360 if (input->filename != NULL) xmlFree((char *) input->filename); 01361 if (input->directory != NULL) xmlFree((char *) input->directory); 01362 if (input->encoding != NULL) xmlFree((char *) input->encoding); 01363 if (input->version != NULL) xmlFree((char *) input->version); 01364 if ((input->free != NULL) && (input->base != NULL)) 01365 input->free((xmlChar *) input->base); 01366 if (input->buf != NULL) 01367 xmlFreeParserInputBuffer(input->buf); 01368 xmlFree(input); 01369 } 01370 01378 xmlParserInputPtr 01379 xmlNewInputStream(xmlParserCtxtPtr ctxt) { 01380 xmlParserInputPtr input; 01381 static int id = 0; 01382 01383 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput)); 01384 if (input == NULL) { 01385 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 01386 return(NULL); 01387 } 01388 memset(input, 0, sizeof(xmlParserInput)); 01389 input->line = 1; 01390 input->col = 1; 01391 input->standalone = -1; 01392 /* 01393 * we don't care about thread reentrancy unicity for a single 01394 * parser context (and hence thread) is sufficient. 01395 */ 01396 input->id = id++; 01397 return(input); 01398 } 01399 01411 xmlParserInputPtr 01412 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input, 01413 xmlCharEncoding enc) { 01414 xmlParserInputPtr inputStream; 01415 01416 if (input == NULL) return(NULL); 01417 if (xmlParserDebugEntities) 01418 xmlGenericError(xmlGenericErrorContext, "new input from I/O\n"); 01419 inputStream = xmlNewInputStream(ctxt); 01420 if (inputStream == NULL) { 01421 return(NULL); 01422 } 01423 inputStream->filename = NULL; 01424 inputStream->buf = input; 01425 inputStream->base = inputStream->buf->buffer->content; 01426 inputStream->cur = inputStream->buf->buffer->content; 01427 inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; 01428 if (enc != XML_CHAR_ENCODING_NONE) { 01429 xmlSwitchEncoding(ctxt, enc); 01430 } 01431 01432 return(inputStream); 01433 } 01434 01444 xmlParserInputPtr 01445 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { 01446 xmlParserInputPtr input; 01447 01448 if (entity == NULL) { 01449 xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n", 01450 NULL); 01451 return(NULL); 01452 } 01453 if (xmlParserDebugEntities) 01454 xmlGenericError(xmlGenericErrorContext, 01455 "new input from entity: %s\n", entity->name); 01456 if (entity->content == NULL) { 01457 switch (entity->etype) { 01458 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 01459 xmlErrInternal(ctxt, "Cannot parse entity %s\n", 01460 entity->name); 01461 break; 01462 case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 01463 case XML_EXTERNAL_PARAMETER_ENTITY: 01464 return(xmlLoadExternalEntity((char *) entity->URI, 01465 (char *) entity->ExternalID, ctxt)); 01466 case XML_INTERNAL_GENERAL_ENTITY: 01467 xmlErrInternal(ctxt, 01468 "Internal entity %s without content !\n", 01469 entity->name); 01470 break; 01471 case XML_INTERNAL_PARAMETER_ENTITY: 01472 xmlErrInternal(ctxt, 01473 "Internal parameter entity %s without content !\n", 01474 entity->name); 01475 break; 01476 case XML_INTERNAL_PREDEFINED_ENTITY: 01477 xmlErrInternal(ctxt, 01478 "Predefined entity %s without content !\n", 01479 entity->name); 01480 break; 01481 } 01482 return(NULL); 01483 } 01484 input = xmlNewInputStream(ctxt); 01485 if (input == NULL) { 01486 return(NULL); 01487 } 01488 if (entity->URI != NULL) 01489 input->filename = (char *) xmlStrdup((xmlChar *) entity->URI); 01490 input->base = entity->content; 01491 input->cur = entity->content; 01492 input->length = entity->length; 01493 input->end = &entity->content[input->length]; 01494 return(input); 01495 } 01496 01505 xmlParserInputPtr 01506 xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { 01507 xmlParserInputPtr input; 01508 01509 if (buffer == NULL) { 01510 xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n", 01511 NULL); 01512 return(NULL); 01513 } 01514 if (xmlParserDebugEntities) 01515 xmlGenericError(xmlGenericErrorContext, 01516 "new fixed input: %.30s\n", buffer); 01517 input = xmlNewInputStream(ctxt); 01518 if (input == NULL) { 01519 xmlErrMemory(ctxt, "couldn't allocate a new input stream\n"); 01520 return(NULL); 01521 } 01522 input->base = buffer; 01523 input->cur = buffer; 01524 input->length = xmlStrlen(buffer); 01525 input->end = &buffer[input->length]; 01526 return(input); 01527 } 01528 01538 xmlParserInputPtr 01539 xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { 01540 xmlParserInputBufferPtr buf; 01541 xmlParserInputPtr inputStream; 01542 char *directory = NULL; 01543 xmlChar *URI = NULL; 01544 01545 if (xmlParserDebugEntities) 01546 xmlGenericError(xmlGenericErrorContext, 01547 "new input from file: %s\n", filename); 01548 if (ctxt == NULL) return(NULL); 01549 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE); 01550 if (buf == NULL) { 01551 if (filename == NULL) 01552 __xmlLoaderErr(ctxt, 01553 "failed to load external entity: NULL filename \n", 01554 NULL); 01555 else 01556 __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", 01557 (const char *) filename); 01558 return(NULL); 01559 } 01560 01561 inputStream = xmlNewInputStream(ctxt); 01562 if (inputStream == NULL) 01563 return(NULL); 01564 01565 inputStream->buf = buf; 01566 inputStream = xmlCheckHTTPInput(ctxt, inputStream); 01567 if (inputStream == NULL) 01568 return(NULL); 01569 01570 if (inputStream->filename == NULL) 01571 URI = xmlStrdup((xmlChar *) filename); 01572 else 01573 URI = xmlStrdup((xmlChar *) inputStream->filename); 01574 directory = xmlParserGetDirectory((const char *) URI); 01575 if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename); 01576 inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI); 01577 if (URI != NULL) xmlFree((char *) URI); 01578 inputStream->directory = directory; 01579 01580 inputStream->base = inputStream->buf->buffer->content; 01581 inputStream->cur = inputStream->buf->buffer->content; 01582 inputStream->end = &inputStream->base[inputStream->buf->buffer->use]; 01583 if ((ctxt->directory == NULL) && (directory != NULL)) 01584 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory); 01585 return(inputStream); 01586 } 01587 01588 /************************************************************************ 01589 * * 01590 * Commodity functions to handle parser contexts * 01591 * * 01592 ************************************************************************/ 01593 01603 int 01604 xmlInitParserCtxt(xmlParserCtxtPtr ctxt) 01605 { 01606 xmlParserInputPtr input; 01607 01608 if(ctxt==NULL) { 01609 xmlErrInternal(NULL, "Got NULL parser context\n", NULL); 01610 return(-1); 01611 } 01612 01613 xmlDefaultSAXHandlerInit(); 01614 01615 if (ctxt->dict == NULL) 01616 ctxt->dict = xmlDictCreate(); 01617 if (ctxt->dict == NULL) { 01618 xmlErrMemory(NULL, "cannot initialize parser context\n"); 01619 return(-1); 01620 } 01621 if (ctxt->sax == NULL) 01622 ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); 01623 if (ctxt->sax == NULL) { 01624 xmlErrMemory(NULL, "cannot initialize parser context\n"); 01625 return(-1); 01626 } 01627 else 01628 xmlSAXVersion(ctxt->sax, 2); 01629 01630 ctxt->maxatts = 0; 01631 ctxt->atts = NULL; 01632 /* Allocate the Input stack */ 01633 if (ctxt->inputTab == NULL) { 01634 ctxt->inputTab = (xmlParserInputPtr *) 01635 xmlMalloc(5 * sizeof(xmlParserInputPtr)); 01636 ctxt->inputMax = 5; 01637 } 01638 if (ctxt->inputTab == NULL) { 01639 xmlErrMemory(NULL, "cannot initialize parser context\n"); 01640 ctxt->inputNr = 0; 01641 ctxt->inputMax = 0; 01642 ctxt->input = NULL; 01643 return(-1); 01644 } 01645 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 01646 xmlFreeInputStream(input); 01647 } 01648 ctxt->inputNr = 0; 01649 ctxt->input = NULL; 01650 01651 ctxt->version = NULL; 01652 ctxt->encoding = NULL; 01653 ctxt->standalone = -1; 01654 ctxt->hasExternalSubset = 0; 01655 ctxt->hasPErefs = 0; 01656 ctxt->html = 0; 01657 ctxt->external = 0; 01658 ctxt->instate = XML_PARSER_START; 01659 ctxt->token = 0; 01660 ctxt->directory = NULL; 01661 01662 /* Allocate the Node stack */ 01663 if (ctxt->nodeTab == NULL) { 01664 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr)); 01665 ctxt->nodeMax = 10; 01666 } 01667 if (ctxt->nodeTab == NULL) { 01668 xmlErrMemory(NULL, "cannot initialize parser context\n"); 01669 ctxt->nodeNr = 0; 01670 ctxt->nodeMax = 0; 01671 ctxt->node = NULL; 01672 ctxt->inputNr = 0; 01673 ctxt->inputMax = 0; 01674 ctxt->input = NULL; 01675 return(-1); 01676 } 01677 ctxt->nodeNr = 0; 01678 ctxt->node = NULL; 01679 01680 /* Allocate the Name stack */ 01681 if (ctxt->nameTab == NULL) { 01682 ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *)); 01683 ctxt->nameMax = 10; 01684 } 01685 if (ctxt->nameTab == NULL) { 01686 xmlErrMemory(NULL, "cannot initialize parser context\n"); 01687 ctxt->nodeNr = 0; 01688 ctxt->nodeMax = 0; 01689 ctxt->node = NULL; 01690 ctxt->inputNr = 0; 01691 ctxt->inputMax = 0; 01692 ctxt->input = NULL; 01693 ctxt->nameNr = 0; 01694 ctxt->nameMax = 0; 01695 ctxt->name = NULL; 01696 return(-1); 01697 } 01698 ctxt->nameNr = 0; 01699 ctxt->name = NULL; 01700 01701 /* Allocate the space stack */ 01702 if (ctxt->spaceTab == NULL) { 01703 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int)); 01704 ctxt->spaceMax = 10; 01705 } 01706 if (ctxt->spaceTab == NULL) { 01707 xmlErrMemory(NULL, "cannot initialize parser context\n"); 01708 ctxt->nodeNr = 0; 01709 ctxt->nodeMax = 0; 01710 ctxt->node = NULL; 01711 ctxt->inputNr = 0; 01712 ctxt->inputMax = 0; 01713 ctxt->input = NULL; 01714 ctxt->nameNr = 0; 01715 ctxt->nameMax = 0; 01716 ctxt->name = NULL; 01717 ctxt->spaceNr = 0; 01718 ctxt->spaceMax = 0; 01719 ctxt->space = NULL; 01720 return(-1); 01721 } 01722 ctxt->spaceNr = 1; 01723 ctxt->spaceMax = 10; 01724 ctxt->spaceTab[0] = -1; 01725 ctxt->space = &ctxt->spaceTab[0]; 01726 ctxt->userData = ctxt; 01727 ctxt->myDoc = NULL; 01728 ctxt->wellFormed = 1; 01729 ctxt->nsWellFormed = 1; 01730 ctxt->valid = 1; 01731 ctxt->loadsubset = xmlLoadExtDtdDefaultValue; 01732 ctxt->validate = xmlDoValidityCheckingDefaultValue; 01733 ctxt->pedantic = xmlPedanticParserDefaultValue; 01734 ctxt->linenumbers = xmlLineNumbersDefaultValue; 01735 ctxt->keepBlanks = xmlKeepBlanksDefaultValue; 01736 if (ctxt->keepBlanks == 0) 01737 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace; 01738 01739 ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0; 01740 ctxt->vctxt.userData = ctxt; 01741 ctxt->vctxt.error = xmlParserValidityError; 01742 ctxt->vctxt.warning = xmlParserValidityWarning; 01743 if (ctxt->validate) { 01744 if (xmlGetWarningsDefaultValue == 0) 01745 ctxt->vctxt.warning = NULL; 01746 else 01747 ctxt->vctxt.warning = xmlParserValidityWarning; 01748 ctxt->vctxt.nodeMax = 0; 01749 } 01750 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue; 01751 ctxt->record_info = 0; 01752 ctxt->nbChars = 0; 01753 ctxt->checkIndex = 0; 01754 ctxt->inSubset = 0; 01755 ctxt->errNo = XML_ERR_OK; 01756 ctxt->depth = 0; 01757 ctxt->charset = XML_CHAR_ENCODING_UTF8; 01758 ctxt->catalogs = NULL; 01759 ctxt->nbentities = 0; 01760 xmlInitNodeInfoSeq(&ctxt->node_seq); 01761 return(0); 01762 } 01763 01772 void 01773 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) 01774 { 01775 xmlParserInputPtr input; 01776 01777 if (ctxt == NULL) return; 01778 01779 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ 01780 xmlFreeInputStream(input); 01781 } 01782 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); 01783 if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab); 01784 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); 01785 if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab); 01786 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); 01787 if (ctxt->version != NULL) xmlFree((char *) ctxt->version); 01788 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding); 01789 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI); 01790 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem); 01791 #ifdef LIBXML_SAX1_ENABLED 01792 if ((ctxt->sax != NULL) && 01793 (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)) 01794 #else 01795 if (ctxt->sax != NULL) 01796 #endif /* LIBXML_SAX1_ENABLED */ 01797 xmlFree(ctxt->sax); 01798 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory); 01799 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab); 01800 if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts); 01801 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict); 01802 if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab); 01803 if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab); 01804 if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs); 01805 if (ctxt->attsDefault != NULL) 01806 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree); 01807 if (ctxt->attsSpecial != NULL) 01808 xmlHashFree(ctxt->attsSpecial, NULL); 01809 if (ctxt->freeElems != NULL) { 01810 xmlNodePtr cur, next; 01811 01812 cur = ctxt->freeElems; 01813 while (cur != NULL) { 01814 next = cur->next; 01815 xmlFree(cur); 01816 cur = next; 01817 } 01818 } 01819 if (ctxt->freeAttrs != NULL) { 01820 xmlAttrPtr cur, next; 01821 01822 cur = ctxt->freeAttrs; 01823 while (cur != NULL) { 01824 next = cur->next; 01825 xmlFree(cur); 01826 cur = next; 01827 } 01828 } 01829 /* 01830 * cleanup the error strings 01831 */ 01832 if (ctxt->lastError.message != NULL) 01833 xmlFree(ctxt->lastError.message); 01834 if (ctxt->lastError.file != NULL) 01835 xmlFree(ctxt->lastError.file); 01836 if (ctxt->lastError.str1 != NULL) 01837 xmlFree(ctxt->lastError.str1); 01838 if (ctxt->lastError.str2 != NULL) 01839 xmlFree(ctxt->lastError.str2); 01840 if (ctxt->lastError.str3 != NULL) 01841 xmlFree(ctxt->lastError.str3); 01842 01843 #ifdef LIBXML_CATALOG_ENABLED 01844 if (ctxt->catalogs != NULL) 01845 xmlCatalogFreeLocal(ctxt->catalogs); 01846 #endif 01847 xmlFree(ctxt); 01848 } 01849 01858 xmlParserCtxtPtr 01859 xmlNewParserCtxt(void) 01860 { 01861 xmlParserCtxtPtr ctxt; 01862 01863 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt)); 01864 if (ctxt == NULL) { 01865 xmlErrMemory(NULL, "cannot allocate parser context\n"); 01866 return(NULL); 01867 } 01868 memset(ctxt, 0, sizeof(xmlParserCtxt)); 01869 if (xmlInitParserCtxt(ctxt) < 0) { 01870 xmlFreeParserCtxt(ctxt); 01871 return(NULL); 01872 } 01873 return(ctxt); 01874 } 01875 01876 /************************************************************************ 01877 * * 01878 * Handling of node informations * 01879 * * 01880 ************************************************************************/ 01881 01889 void 01890 xmlClearParserCtxt(xmlParserCtxtPtr ctxt) 01891 { 01892 if (ctxt==NULL) 01893 return; 01894 xmlClearNodeInfoSeq(&ctxt->node_seq); 01895 xmlCtxtReset(ctxt); 01896 } 01897 01898 01908 const xmlParserNodeInfo * 01909 xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node) 01910 { 01911 unsigned long pos; 01912 01913 if ((ctx == NULL) || (node == NULL)) 01914 return (NULL); 01915 /* Find position where node should be at */ 01916 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node); 01917 if (pos < ctx->node_seq.length 01918 && ctx->node_seq.buffer[pos].node == node) 01919 return &ctx->node_seq.buffer[pos]; 01920 else 01921 return NULL; 01922 } 01923 01924 01931 void 01932 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 01933 { 01934 if (seq == NULL) 01935 return; 01936 seq->length = 0; 01937 seq->maximum = 0; 01938 seq->buffer = NULL; 01939 } 01940 01948 void 01949 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq) 01950 { 01951 if (seq == NULL) 01952 return; 01953 if (seq->buffer != NULL) 01954 xmlFree(seq->buffer); 01955 xmlInitNodeInfoSeq(seq); 01956 } 01957 01969 unsigned long 01970 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 01971 const xmlNodePtr node) 01972 { 01973 unsigned long upper, lower, middle; 01974 int found = 0; 01975 01976 if ((seq == NULL) || (node == NULL)) 01977 return ((unsigned long) -1); 01978 01979 /* Do a binary search for the key */ 01980 lower = 1; 01981 upper = seq->length; 01982 middle = 0; 01983 while (lower <= upper && !found) { 01984 middle = lower + (upper - lower) / 2; 01985 if (node == seq->buffer[middle - 1].node) 01986 found = 1; 01987 else if (node < seq->buffer[middle - 1].node) 01988 upper = middle - 1; 01989 else 01990 lower = middle + 1; 01991 } 01992 01993 /* Return position */ 01994 if (middle == 0 || seq->buffer[middle - 1].node < node) 01995 return middle; 01996 else 01997 return middle - 1; 01998 } 01999 02000 02008 void 02009 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt, 02010 const xmlParserNodeInfoPtr info) 02011 { 02012 unsigned long pos; 02013 02014 if ((ctxt == NULL) || (info == NULL)) return; 02015 02016 /* Find pos and check to see if node is already in the sequence */ 02017 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr) 02018 info->node); 02019 02020 if ((pos < ctxt->node_seq.length) && 02021 (ctxt->node_seq.buffer != NULL) && 02022 (ctxt->node_seq.buffer[pos].node == info->node)) { 02023 ctxt->node_seq.buffer[pos] = *info; 02024 } 02025 02026 /* Otherwise, we need to add new node to buffer */ 02027 else { 02028 if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) { 02029 xmlParserNodeInfo *tmp_buffer; 02030 unsigned int byte_size; 02031 02032 if (ctxt->node_seq.maximum == 0) 02033 ctxt->node_seq.maximum = 2; 02034 byte_size = (sizeof(*ctxt->node_seq.buffer) * 02035 (2 * ctxt->node_seq.maximum)); 02036 02037 if (ctxt->node_seq.buffer == NULL) 02038 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size); 02039 else 02040 tmp_buffer = 02041 (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer, 02042 byte_size); 02043 02044 if (tmp_buffer == NULL) { 02045 xmlErrMemory(ctxt, "failed to allocate buffer\n"); 02046 return; 02047 } 02048 ctxt->node_seq.buffer = tmp_buffer; 02049 ctxt->node_seq.maximum *= 2; 02050 } 02051 02052 /* If position is not at end, move elements out of the way */ 02053 if (pos != ctxt->node_seq.length) { 02054 unsigned long i; 02055 02056 for (i = ctxt->node_seq.length; i > pos; i--) 02057 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1]; 02058 } 02059 02060 /* Copy element and increase length */ 02061 ctxt->node_seq.buffer[pos] = *info; 02062 ctxt->node_seq.length++; 02063 } 02064 } 02065 02066 /************************************************************************ 02067 * * 02068 * Defaults settings * 02069 * * 02070 ************************************************************************/ 02080 int 02081 xmlPedanticParserDefault(int val) { 02082 int old = xmlPedanticParserDefaultValue; 02083 02084 xmlPedanticParserDefaultValue = val; 02085 return(old); 02086 } 02087 02098 int 02099 xmlLineNumbersDefault(int val) { 02100 int old = xmlLineNumbersDefaultValue; 02101 02102 xmlLineNumbersDefaultValue = val; 02103 return(old); 02104 } 02105 02120 int 02121 xmlSubstituteEntitiesDefault(int val) { 02122 int old = xmlSubstituteEntitiesDefaultValue; 02123 02124 xmlSubstituteEntitiesDefaultValue = val; 02125 return(old); 02126 } 02127 02152 int 02153 xmlKeepBlanksDefault(int val) { 02154 int old = xmlKeepBlanksDefaultValue; 02155 02156 xmlKeepBlanksDefaultValue = val; 02157 if (!val) xmlIndentTreeOutput = 1; 02158 return(old); 02159 } 02160 02161 #define bottom_parserInternals 02162 #include "elfgcchack.h" Generated on Sat May 26 2012 04:33:19 for ReactOS by
1.7.6.1
|