ReactOS Fundraising Campaign 2012
 
€ 4,410 / € 30,000

Information | Donate

Home | Info | Community | Development | myReactOS | Contact Us

  1. Home
  2. Community
  3. Development
  4. myReactOS
  5. Fundraiser 2012

  1. Main Page
  2. Alphabetical List
  3. Data Structures
  4. Directories
  5. File List
  6. Data Fields
  7. Globals
  8. Related Pages

ReactOS Development > Doxygen

parserInternals.c
Go to the documentation of this file.
00001 /*
00002  * parserInternals.c : Internal routines (and obsolete ones) needed for the
00003  *                     XML and HTML parsers.
00004  *
00005  * See Copyright for the status of this software.
00006  *
00007  * daniel@veillard.com
00008  */
00009 
00010 #define IN_LIBXML
00011 #include "libxml.h"
00012 
00013 #if defined(WIN32) && !defined (__CYGWIN__)
00014 #define XML_DIR_SEP '\\'
00015 #else
00016 #define XML_DIR_SEP '/'
00017 #endif
00018 
00019 #include <string.h>
00020 #ifdef HAVE_CTYPE_H
00021 #include <ctype.h>
00022 #endif
00023 #ifdef HAVE_STDLIB_H
00024 #include <stdlib.h>
00025 #endif
00026 #ifdef HAVE_SYS_STAT_H
00027 #include <sys/stat.h>
00028 #endif
00029 #ifdef HAVE_FCNTL_H
00030 #include <fcntl.h>
00031 #endif
00032 #ifdef HAVE_UNISTD_H
00033 #include <unistd.h>
00034 #endif
00035 #ifdef HAVE_ZLIB_H
00036 #include <zlib.h>
00037 #endif
00038 
00039 #include <libxml/xmlmemory.h>
00040 #include <libxml/tree.h>
00041 #include <libxml/parser.h>
00042 #include <libxml/parserInternals.h>
00043 #include <libxml/valid.h>
00044 #include <libxml/entities.h>
00045 #include <libxml/xmlerror.h>
00046 #include <libxml/encoding.h>
00047 #include <libxml/valid.h>
00048 #include <libxml/xmlIO.h>
00049 #include <libxml/uri.h>
00050 #include <libxml/dict.h>
00051 #include <libxml/SAX.h>
00052 #ifdef LIBXML_CATALOG_ENABLED
00053 #include <libxml/catalog.h>
00054 #endif
00055 #include <libxml/globals.h>
00056 #include <libxml/chvalid.h>
00057 
00058 /*
00059  * Various global defaults for parsing
00060  */
00061 
00069 void
00070 xmlCheckVersion(int version) {
00071     int myversion = (int) LIBXML_VERSION;
00072 
00073     xmlInitParser();
00074 
00075     if ((myversion / 10000) != (version / 10000)) {
00076     xmlGenericError(xmlGenericErrorContext, 
00077         "Fatal: program compiled against libxml %d using libxml %d\n",
00078         (version / 10000), (myversion / 10000));
00079     fprintf(stderr, 
00080         "Fatal: program compiled against libxml %d using libxml %d\n",
00081         (version / 10000), (myversion / 10000));
00082     }
00083     if ((myversion / 100) < (version / 100)) {
00084     xmlGenericError(xmlGenericErrorContext, 
00085         "Warning: program compiled against libxml %d using older %d\n",
00086         (version / 100), (myversion / 100));
00087     }
00088 }
00089 
00090 
00091 /************************************************************************
00092  *                                  *
00093  *      Some factorized error routines              *
00094  *                                  *
00095  ************************************************************************/
00096 
00097 
00105 void
00106 xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
00107 {
00108     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
00109         (ctxt->instate == XML_PARSER_EOF))
00110     return;
00111     if (ctxt != NULL) {
00112         ctxt->errNo = XML_ERR_NO_MEMORY;
00113         ctxt->instate = XML_PARSER_EOF;
00114         ctxt->disableSAX = 1;
00115     }
00116     if (extra)
00117         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
00118                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
00119                         NULL, NULL, 0, 0,
00120                         "Memory allocation failed : %s\n", extra);
00121     else
00122         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
00123                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
00124                         NULL, NULL, 0, 0, "Memory allocation failed\n");
00125 }
00126 
00137 void
00138 __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
00139                  const char *msg, const xmlChar * str1, const xmlChar * str2)
00140 {
00141     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
00142         (ctxt->instate == XML_PARSER_EOF))
00143     return;
00144     if (ctxt != NULL)
00145         ctxt->errNo = xmlerr;
00146     __xmlRaiseError(NULL, NULL, NULL,
00147                     ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
00148                     NULL, 0, (const char *) str1, (const char *) str2,
00149                     NULL, 0, 0, msg, str1, str2);
00150     if (ctxt != NULL) {
00151         ctxt->wellFormed = 0;
00152         if (ctxt->recovery == 0)
00153             ctxt->disableSAX = 1;
00154     }
00155 }
00156 
00165 static void
00166 xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
00167 {
00168     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
00169         (ctxt->instate == XML_PARSER_EOF))
00170     return;
00171     if (ctxt != NULL)
00172         ctxt->errNo = XML_ERR_INTERNAL_ERROR;
00173     __xmlRaiseError(NULL, NULL, NULL,
00174                     ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
00175                     XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
00176                     0, 0, msg, str);
00177     if (ctxt != NULL) {
00178         ctxt->wellFormed = 0;
00179         if (ctxt->recovery == 0)
00180             ctxt->disableSAX = 1;
00181     }
00182 }
00183 
00193 static void
00194 xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
00195                   const char *msg, int val)
00196 {
00197     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
00198         (ctxt->instate == XML_PARSER_EOF))
00199     return;
00200     if (ctxt != NULL)
00201         ctxt->errNo = error;
00202     __xmlRaiseError(NULL, NULL, NULL,
00203                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
00204                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
00205     if (ctxt != NULL) {
00206         ctxt->wellFormed = 0;
00207         if (ctxt->recovery == 0)
00208             ctxt->disableSAX = 1;
00209     }
00210 }
00211 
00221 int
00222 xmlIsLetter(int c) {
00223     return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
00224 }
00225 
00226 /************************************************************************
00227  *                                  *
00228  *      Input handling functions for progressive parsing    *
00229  *                                  *
00230  ************************************************************************/
00231 
00232 /* #define DEBUG_INPUT */
00233 /* #define DEBUG_STACK */
00234 /* #define DEBUG_PUSH */
00235 
00236 
00237 /* we need to keep enough input to show errors in context */
00238 #define LINE_LEN        80
00239 
00240 #ifdef DEBUG_INPUT
00241 #define CHECK_BUFFER(in) check_buffer(in)
00242 
00243 static
00244 void check_buffer(xmlParserInputPtr in) {
00245     if (in->base != in->buf->buffer->content) {
00246         xmlGenericError(xmlGenericErrorContext,
00247         "xmlParserInput: base mismatch problem\n");
00248     }
00249     if (in->cur < in->base) {
00250         xmlGenericError(xmlGenericErrorContext,
00251         "xmlParserInput: cur < base problem\n");
00252     }
00253     if (in->cur > in->base + in->buf->buffer->use) {
00254         xmlGenericError(xmlGenericErrorContext,
00255         "xmlParserInput: cur > base + use problem\n");
00256     }
00257     xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
00258             (int) in, (int) in->buf->buffer->content, in->cur - in->base,
00259         in->buf->buffer->use, in->buf->buffer->size);
00260 }
00261 
00262 #else
00263 #define CHECK_BUFFER(in) 
00264 #endif
00265 
00266 
00278 int
00279 xmlParserInputRead(xmlParserInputPtr in, int len) {
00280     int ret;
00281     int used;
00282     int indx;
00283 
00284     if (in == NULL) return(-1);
00285 #ifdef DEBUG_INPUT
00286     xmlGenericError(xmlGenericErrorContext, "Read\n");
00287 #endif
00288     if (in->buf == NULL) return(-1);
00289     if (in->base == NULL) return(-1);
00290     if (in->cur == NULL) return(-1);
00291     if (in->buf->buffer == NULL) return(-1);
00292     if (in->buf->readcallback == NULL) return(-1);
00293 
00294     CHECK_BUFFER(in);
00295 
00296     used = in->cur - in->buf->buffer->content;
00297     ret = xmlBufferShrink(in->buf->buffer, used);
00298     if (ret > 0) {
00299     in->cur -= ret;
00300     in->consumed += ret;
00301     }
00302     ret = xmlParserInputBufferRead(in->buf, len);
00303     if (in->base != in->buf->buffer->content) {
00304         /*
00305      * the buffer has been reallocated
00306      */
00307     indx = in->cur - in->base;
00308     in->base = in->buf->buffer->content;
00309     in->cur = &in->buf->buffer->content[indx];
00310     }
00311     in->end = &in->buf->buffer->content[in->buf->buffer->use];
00312 
00313     CHECK_BUFFER(in);
00314 
00315     return(ret);
00316 }
00317 
00329 int
00330 xmlParserInputGrow(xmlParserInputPtr in, int len) {
00331     int ret;
00332     int indx;
00333 
00334     if (in == NULL) return(-1);
00335 #ifdef DEBUG_INPUT
00336     xmlGenericError(xmlGenericErrorContext, "Grow\n");
00337 #endif
00338     if (in->buf == NULL) return(-1);
00339     if (in->base == NULL) return(-1);
00340     if (in->cur == NULL) return(-1);
00341     if (in->buf->buffer == NULL) return(-1);
00342 
00343     CHECK_BUFFER(in);
00344 
00345     indx = in->cur - in->base;
00346     if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
00347 
00348     CHECK_BUFFER(in);
00349 
00350         return(0);
00351     }
00352     if (in->buf->readcallback != NULL)
00353     ret = xmlParserInputBufferGrow(in->buf, len);
00354     else    
00355         return(0);
00356 
00357     /*
00358      * NOTE : in->base may be a "dangling" i.e. freed pointer in this
00359      *        block, but we use it really as an integer to do some
00360      *        pointer arithmetic. Insure will raise it as a bug but in
00361      *        that specific case, that's not !
00362      */
00363     if (in->base != in->buf->buffer->content) {
00364         /*
00365      * the buffer has been reallocated
00366      */
00367     indx = in->cur - in->base;
00368     in->base = in->buf->buffer->content;
00369     in->cur = &in->buf->buffer->content[indx];
00370     }
00371     in->end = &in->buf->buffer->content[in->buf->buffer->use];
00372 
00373     CHECK_BUFFER(in);
00374 
00375     return(ret);
00376 }
00377 
00384 void
00385 xmlParserInputShrink(xmlParserInputPtr in) {
00386     int used;
00387     int ret;
00388     int indx;
00389 
00390 #ifdef DEBUG_INPUT
00391     xmlGenericError(xmlGenericErrorContext, "Shrink\n");
00392 #endif
00393     if (in == NULL) return;
00394     if (in->buf == NULL) return;
00395     if (in->base == NULL) return;
00396     if (in->cur == NULL) return;
00397     if (in->buf->buffer == NULL) return;
00398 
00399     CHECK_BUFFER(in);
00400 
00401     used = in->cur - in->buf->buffer->content;
00402     /*
00403      * Do not shrink on large buffers whose only a tiny fraction
00404      * was consumed
00405      */
00406     if (used > INPUT_CHUNK) {
00407     ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
00408     if (ret > 0) {
00409         in->cur -= ret;
00410         in->consumed += ret;
00411     }
00412     in->end = &in->buf->buffer->content[in->buf->buffer->use];
00413     }
00414 
00415     CHECK_BUFFER(in);
00416 
00417     if (in->buf->buffer->use > INPUT_CHUNK) {
00418         return;
00419     }
00420     xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
00421     if (in->base != in->buf->buffer->content) {
00422         /*
00423      * the buffer has been reallocated
00424      */
00425     indx = in->cur - in->base;
00426     in->base = in->buf->buffer->content;
00427     in->cur = &in->buf->buffer->content[indx];
00428     }
00429     in->end = &in->buf->buffer->content[in->buf->buffer->use];
00430 
00431     CHECK_BUFFER(in);
00432 }
00433 
00434 /************************************************************************
00435  *                                  *
00436  *      UTF8 character input and related functions      *
00437  *                                  *
00438  ************************************************************************/
00439 
00447 void
00448 xmlNextChar(xmlParserCtxtPtr ctxt)
00449 {
00450     if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
00451         (ctxt->input == NULL))
00452         return;
00453 
00454     if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
00455         if ((*ctxt->input->cur == 0) &&
00456             (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
00457             (ctxt->instate != XML_PARSER_COMMENT)) {
00458             /*
00459              * If we are at the end of the current entity and
00460              * the context allows it, we pop consumed entities
00461              * automatically.
00462              * the auto closing should be blocked in other cases
00463              */
00464             xmlPopInput(ctxt);
00465         } else {
00466             const unsigned char *cur;
00467             unsigned char c;
00468 
00469             /*
00470              *   2.11 End-of-Line Handling
00471              *   the literal two-character sequence "#xD#xA" or a standalone
00472              *   literal #xD, an XML processor must pass to the application
00473              *   the single character #xA.
00474              */
00475             if (*(ctxt->input->cur) == '\n') {
00476                 ctxt->input->line++; ctxt->input->col = 1;
00477             } else
00478                 ctxt->input->col++;
00479 
00480             /*
00481              * We are supposed to handle UTF8, check it's valid
00482              * From rfc2044: encoding of the Unicode values on UTF-8:
00483              *
00484              * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
00485              * 0000 0000-0000 007F   0xxxxxxx
00486              * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
00487              * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
00488              *
00489              * Check for the 0x110000 limit too
00490              */
00491             cur = ctxt->input->cur;
00492 
00493             c = *cur;
00494             if (c & 0x80) {
00495             if (c == 0xC0)
00496             goto encoding_error;
00497                 if (cur[1] == 0) {
00498                     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
00499                     cur = ctxt->input->cur;
00500                 }
00501                 if ((cur[1] & 0xc0) != 0x80)
00502                     goto encoding_error;
00503                 if ((c & 0xe0) == 0xe0) {
00504                     unsigned int val;
00505 
00506                     if (cur[2] == 0) {
00507                         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
00508                         cur = ctxt->input->cur;
00509                     }
00510                     if ((cur[2] & 0xc0) != 0x80)
00511                         goto encoding_error;
00512                     if ((c & 0xf0) == 0xf0) {
00513                         if (cur[3] == 0) {
00514                             xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
00515                             cur = ctxt->input->cur;
00516                         }
00517                         if (((c & 0xf8) != 0xf0) ||
00518                             ((cur[3] & 0xc0) != 0x80))
00519                             goto encoding_error;
00520                         /* 4-byte code */
00521                         ctxt->input->cur += 4;
00522                         val = (cur[0] & 0x7) << 18;
00523                         val |= (cur[1] & 0x3f) << 12;
00524                         val |= (cur[2] & 0x3f) << 6;
00525                         val |= cur[3] & 0x3f;
00526                     } else {
00527                         /* 3-byte code */
00528                         ctxt->input->cur += 3;
00529                         val = (cur[0] & 0xf) << 12;
00530                         val |= (cur[1] & 0x3f) << 6;
00531                         val |= cur[2] & 0x3f;
00532                     }
00533                     if (((val > 0xd7ff) && (val < 0xe000)) ||
00534                         ((val > 0xfffd) && (val < 0x10000)) ||
00535                         (val >= 0x110000)) {
00536             xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
00537                       "Char 0x%X out of allowed range\n",
00538                       val);
00539                     }
00540                 } else
00541                     /* 2-byte code */
00542                     ctxt->input->cur += 2;
00543             } else
00544                 /* 1-byte code */
00545                 ctxt->input->cur++;
00546 
00547             ctxt->nbChars++;
00548             if (*ctxt->input->cur == 0)
00549                 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
00550         }
00551     } else {
00552         /*
00553          * Assume it's a fixed length encoding (1) with
00554          * a compatible encoding for the ASCII set, since
00555          * XML constructs only use < 128 chars
00556          */
00557 
00558         if (*(ctxt->input->cur) == '\n') {
00559             ctxt->input->line++; ctxt->input->col = 1;
00560         } else
00561             ctxt->input->col++;
00562         ctxt->input->cur++;
00563         ctxt->nbChars++;
00564         if (*ctxt->input->cur == 0)
00565             xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
00566     }
00567     if ((*ctxt->input->cur == '%') && (!ctxt->html))
00568         xmlParserHandlePEReference(ctxt);
00569     if ((*ctxt->input->cur == 0) &&
00570         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
00571         xmlPopInput(ctxt);
00572     return;
00573 encoding_error:
00574     /*
00575      * If we detect an UTF8 error that probably mean that the
00576      * input encoding didn't get properly advertised in the
00577      * declaration header. Report the error and switch the encoding
00578      * to ISO-Latin-1 (if you don't like this policy, just declare the
00579      * encoding !)
00580      */
00581     if ((ctxt == NULL) || (ctxt->input == NULL) ||
00582         (ctxt->input->end - ctxt->input->cur < 4)) {
00583     __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
00584              "Input is not proper UTF-8, indicate encoding !\n",
00585              NULL, NULL);
00586     } else {
00587         char buffer[150];
00588 
00589     snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
00590             ctxt->input->cur[0], ctxt->input->cur[1],
00591             ctxt->input->cur[2], ctxt->input->cur[3]);
00592     __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
00593              "Input is not proper UTF-8, indicate encoding !\n%s",
00594              BAD_CAST buffer, NULL);
00595     }
00596     ctxt->charset = XML_CHAR_ENCODING_8859_1;
00597     ctxt->input->cur++;
00598     return;
00599 }
00600 
00619 int
00620 xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
00621     if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
00622     if (ctxt->instate == XML_PARSER_EOF)
00623     return(0);
00624 
00625     if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
00626         *len = 1;
00627         return((int) *ctxt->input->cur);
00628     }
00629     if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
00630     /*
00631      * We are supposed to handle UTF8, check it's valid
00632      * From rfc2044: encoding of the Unicode values on UTF-8:
00633      *
00634      * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
00635      * 0000 0000-0000 007F   0xxxxxxx
00636      * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
00637      * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
00638      *
00639      * Check for the 0x110000 limit too
00640      */
00641     const unsigned char *cur = ctxt->input->cur;
00642     unsigned char c;
00643     unsigned int val;
00644 
00645     c = *cur;
00646     if (c & 0x80) {
00647         if (((c & 0x40) == 0) || (c == 0xC0))
00648         goto encoding_error;
00649         if (cur[1] == 0) {
00650         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
00651                 cur = ctxt->input->cur;
00652             }
00653         if ((cur[1] & 0xc0) != 0x80)
00654         goto encoding_error;
00655         if ((c & 0xe0) == 0xe0) {
00656         if (cur[2] == 0) {
00657             xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
00658                     cur = ctxt->input->cur;
00659                 }
00660         if ((cur[2] & 0xc0) != 0x80)
00661             goto encoding_error;
00662         if ((c & 0xf0) == 0xf0) {
00663             if (cur[3] == 0) {
00664             xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
00665                         cur = ctxt->input->cur;
00666                     }
00667             if (((c & 0xf8) != 0xf0) ||
00668             ((cur[3] & 0xc0) != 0x80))
00669             goto encoding_error;
00670             /* 4-byte code */
00671             *len = 4;
00672             val = (cur[0] & 0x7) << 18;
00673             val |= (cur[1] & 0x3f) << 12;
00674             val |= (cur[2] & 0x3f) << 6;
00675             val |= cur[3] & 0x3f;
00676             if (val < 0x10000)
00677             goto encoding_error;
00678         } else {
00679           /* 3-byte code */
00680             *len = 3;
00681             val = (cur[0] & 0xf) << 12;
00682             val |= (cur[1] & 0x3f) << 6;
00683             val |= cur[2] & 0x3f;
00684             if (val < 0x800)
00685             goto encoding_error;
00686         }
00687         } else {
00688           /* 2-byte code */
00689         *len = 2;
00690         val = (cur[0] & 0x1f) << 6;
00691         val |= cur[1] & 0x3f;
00692         if (val < 0x80)
00693             goto encoding_error;
00694         }
00695         if (!IS_CHAR(val)) {
00696             xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
00697                   "Char 0x%X out of allowed range\n", val);
00698         }    
00699         return(val);
00700     } else {
00701         /* 1-byte code */
00702         *len = 1;
00703         if (*ctxt->input->cur == 0)
00704         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
00705         if ((*ctxt->input->cur == 0) &&
00706             (ctxt->input->end > ctxt->input->cur)) {
00707             xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
00708                   "Char 0x0 out of allowed range\n", 0);
00709         }
00710         if (*ctxt->input->cur == 0xD) {
00711         if (ctxt->input->cur[1] == 0xA) {
00712             ctxt->nbChars++;
00713             ctxt->input->cur++;
00714         }
00715         return(0xA);
00716         }
00717         return((int) *ctxt->input->cur);
00718     }
00719     }
00720     /*
00721      * Assume it's a fixed length encoding (1) with
00722      * a compatible encoding for the ASCII set, since
00723      * XML constructs only use < 128 chars
00724      */
00725     *len = 1;
00726     if (*ctxt->input->cur == 0xD) {
00727     if (ctxt->input->cur[1] == 0xA) {
00728         ctxt->nbChars++;
00729         ctxt->input->cur++;
00730     }
00731     return(0xA);
00732     }
00733     return((int) *ctxt->input->cur);
00734 encoding_error:
00735     /*
00736      * An encoding problem may arise from a truncated input buffer
00737      * splitting a character in the middle. In that case do not raise
00738      * an error but return 0 to endicate an end of stream problem
00739      */
00740     if (ctxt->input->end - ctxt->input->cur < 4) {
00741     *len = 0;
00742     return(0);
00743     }
00744 
00745     /*
00746      * If we detect an UTF8 error that probably mean that the
00747      * input encoding didn't get properly advertised in the
00748      * declaration header. Report the error and switch the encoding
00749      * to ISO-Latin-1 (if you don't like this policy, just declare the
00750      * encoding !)
00751      */
00752     {
00753         char buffer[150];
00754 
00755     snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
00756             ctxt->input->cur[0], ctxt->input->cur[1],
00757             ctxt->input->cur[2], ctxt->input->cur[3]);
00758     __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
00759              "Input is not proper UTF-8, indicate encoding !\n%s",
00760              BAD_CAST buffer, NULL);
00761     }
00762     ctxt->charset = XML_CHAR_ENCODING_8859_1; 
00763     *len = 1;
00764     return((int) *ctxt->input->cur);
00765 }
00766 
00779 int
00780 xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
00781 {
00782     if ((len == NULL) || (cur == NULL)) return(0);
00783     if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
00784         /*
00785          * We are supposed to handle UTF8, check it's valid
00786          * From rfc2044: encoding of the Unicode values on UTF-8:
00787          *
00788          * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
00789          * 0000 0000-0000 007F   0xxxxxxx
00790          * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
00791          * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
00792          *
00793          * Check for the 0x110000 limit too
00794          */
00795         unsigned char c;
00796         unsigned int val;
00797 
00798         c = *cur;
00799         if (c & 0x80) {
00800             if ((cur[1] & 0xc0) != 0x80)
00801                 goto encoding_error;
00802             if ((c & 0xe0) == 0xe0) {
00803 
00804                 if ((cur[2] & 0xc0) != 0x80)
00805                     goto encoding_error;
00806                 if ((c & 0xf0) == 0xf0) {
00807                     if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
00808                         goto encoding_error;
00809                     /* 4-byte code */
00810                     *len = 4;
00811                     val = (cur[0] & 0x7) << 18;
00812                     val |= (cur[1] & 0x3f) << 12;
00813                     val |= (cur[2] & 0x3f) << 6;
00814                     val |= cur[3] & 0x3f;
00815                 } else {
00816                     /* 3-byte code */
00817                     *len = 3;
00818                     val = (cur[0] & 0xf) << 12;
00819                     val |= (cur[1] & 0x3f) << 6;
00820                     val |= cur[2] & 0x3f;
00821                 }
00822             } else {
00823                 /* 2-byte code */
00824                 *len = 2;
00825                 val = (cur[0] & 0x1f) << 6;
00826                 val |= cur[1] & 0x3f;
00827             }
00828             if (!IS_CHAR(val)) {
00829             xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
00830                   "Char 0x%X out of allowed range\n", val);
00831             }
00832             return (val);
00833         } else {
00834             /* 1-byte code */
00835             *len = 1;
00836             return ((int) *cur);
00837         }
00838     }
00839     /*
00840      * Assume it's a fixed length encoding (1) with
00841      * a compatible encoding for the ASCII set, since
00842      * XML constructs only use < 128 chars
00843      */
00844     *len = 1;
00845     return ((int) *cur);
00846 encoding_error:
00847 
00848     /*
00849      * An encoding problem may arise from a truncated input buffer
00850      * splitting a character in the middle. In that case do not raise
00851      * an error but return 0 to endicate an end of stream problem
00852      */
00853     if ((ctxt == NULL) || (ctxt->input == NULL) ||
00854         (ctxt->input->end - ctxt->input->cur < 4)) {
00855     *len = 0;
00856     return(0);
00857     }
00858     /*
00859      * If we detect an UTF8 error that probably mean that the
00860      * input encoding didn't get properly advertised in the
00861      * declaration header. Report the error and switch the encoding
00862      * to ISO-Latin-1 (if you don't like this policy, just declare the
00863      * encoding !)
00864      */
00865     {
00866         char buffer[150];
00867 
00868     snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
00869             ctxt->input->cur[0], ctxt->input->cur[1],
00870             ctxt->input->cur[2], ctxt->input->cur[3]);
00871     __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
00872              "Input is not proper UTF-8, indicate encoding !\n%s",
00873              BAD_CAST buffer, NULL);
00874     }
00875     *len = 1;
00876     return ((int) *cur);
00877 }
00878 
00888 int
00889 xmlCopyCharMultiByte(xmlChar *out, int val) {
00890     if (out == NULL) return(0);
00891     /*
00892      * We are supposed to handle UTF8, check it's valid
00893      * From rfc2044: encoding of the Unicode values on UTF-8:
00894      *
00895      * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
00896      * 0000 0000-0000 007F   0xxxxxxx
00897      * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
00898      * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
00899      */
00900     if  (val >= 0x80) {
00901     xmlChar *savedout = out;
00902     int bits;
00903     if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
00904     else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
00905     else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
00906     else {
00907         xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
00908             "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
00909                   val);
00910         return(0);
00911     }
00912     for ( ; bits >= 0; bits-= 6)
00913         *out++= ((val >> bits) & 0x3F) | 0x80 ;
00914     return (out - savedout);
00915     }
00916     *out = (xmlChar) val;
00917     return 1;
00918 }
00919 
00931 int
00932 xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
00933     if (out == NULL) return(0);
00934     /* the len parameter is ignored */
00935     if  (val >= 0x80) {
00936     return(xmlCopyCharMultiByte (out, val));
00937     }
00938     *out = (xmlChar) val;
00939     return 1;
00940 }
00941 
00942 /************************************************************************
00943  *                                  *
00944  *      Commodity functions to switch encodings         *
00945  *                                  *
00946  ************************************************************************/
00947 
00948 /* defined in encoding.c, not public */
00949 int
00950 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
00951                        xmlBufferPtr in, int len);
00952 
00953 static int
00954 xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
00955                        xmlCharEncodingHandlerPtr handler, int len);
00956 static int
00957 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
00958                           xmlCharEncodingHandlerPtr handler, int len);
00969 int
00970 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
00971 {
00972     xmlCharEncodingHandlerPtr handler;
00973     int len = -1;
00974 
00975     if (ctxt == NULL) return(-1);
00976     switch (enc) {
00977     case XML_CHAR_ENCODING_ERROR:
00978         __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
00979                        "encoding unknown\n", NULL, NULL);
00980         return(-1);
00981     case XML_CHAR_ENCODING_NONE:
00982         /* let's assume it's UTF-8 without the XML decl */
00983         ctxt->charset = XML_CHAR_ENCODING_UTF8;
00984         return(0);
00985     case XML_CHAR_ENCODING_UTF8:
00986         /* default encoding, no conversion should be needed */
00987         ctxt->charset = XML_CHAR_ENCODING_UTF8;
00988 
00989         /*
00990          * Errata on XML-1.0 June 20 2001
00991          * Specific handling of the Byte Order Mark for
00992          * UTF-8
00993          */
00994         if ((ctxt->input != NULL) &&
00995         (ctxt->input->cur[0] == 0xEF) &&
00996         (ctxt->input->cur[1] == 0xBB) &&
00997         (ctxt->input->cur[2] == 0xBF)) {
00998         ctxt->input->cur += 3;
00999         }
01000         return(0);
01001     case XML_CHAR_ENCODING_UTF16LE:
01002     case XML_CHAR_ENCODING_UTF16BE:
01003         /*The raw input characters are encoded
01004          *in UTF-16. As we expect this function
01005          *to be called after xmlCharEncInFunc, we expect
01006          *ctxt->input->cur to contain UTF-8 encoded characters.
01007          *So the raw UTF16 Byte Order Mark
01008          *has also been converted into
01009          *an UTF-8 BOM. Let's skip that BOM.
01010          */
01011         if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) &&
01012             (ctxt->input->cur[0] == 0xEF) &&
01013             (ctxt->input->cur[1] == 0xBB) &&
01014             (ctxt->input->cur[2] == 0xBF)) {
01015             ctxt->input->cur += 3;
01016         }
01017         len = 90;
01018     break;
01019     case XML_CHAR_ENCODING_UCS2:
01020         len = 90;
01021     break;
01022     case XML_CHAR_ENCODING_UCS4BE:
01023     case XML_CHAR_ENCODING_UCS4LE:
01024     case XML_CHAR_ENCODING_UCS4_2143:
01025     case XML_CHAR_ENCODING_UCS4_3412:
01026         len = 180;
01027     break;
01028     case XML_CHAR_ENCODING_EBCDIC:
01029     case XML_CHAR_ENCODING_8859_1:
01030     case XML_CHAR_ENCODING_8859_2:
01031     case XML_CHAR_ENCODING_8859_3:
01032     case XML_CHAR_ENCODING_8859_4:
01033     case XML_CHAR_ENCODING_8859_5:
01034     case XML_CHAR_ENCODING_8859_6:
01035     case XML_CHAR_ENCODING_8859_7:
01036     case XML_CHAR_ENCODING_8859_8:
01037     case XML_CHAR_ENCODING_8859_9:
01038     case XML_CHAR_ENCODING_ASCII:
01039     case XML_CHAR_ENCODING_2022_JP:
01040     case XML_CHAR_ENCODING_SHIFT_JIS:
01041     case XML_CHAR_ENCODING_EUC_JP:
01042         len = 45;
01043     break;
01044     }
01045     handler = xmlGetCharEncodingHandler(enc);
01046     if (handler == NULL) {
01047     /*
01048      * Default handlers.
01049      */
01050     switch (enc) {
01051         case XML_CHAR_ENCODING_ASCII:
01052         /* default encoding, no conversion should be needed */
01053         ctxt->charset = XML_CHAR_ENCODING_UTF8;
01054         return(0);
01055         case XML_CHAR_ENCODING_UTF16LE:
01056         break;
01057         case XML_CHAR_ENCODING_UTF16BE:
01058         break;
01059         case XML_CHAR_ENCODING_UCS4LE:
01060         __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
01061                    "encoding not supported %s\n",
01062                    BAD_CAST "USC4 little endian", NULL);
01063         break;
01064         case XML_CHAR_ENCODING_UCS4BE:
01065         __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
01066                    "encoding not supported %s\n",
01067                    BAD_CAST "USC4 big endian", NULL);
01068         break;
01069         case XML_CHAR_ENCODING_EBCDIC:
01070         __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
01071                    "encoding not supported %s\n",
01072                    BAD_CAST "EBCDIC", NULL);
01073         break;
01074         case XML_CHAR_ENCODING_UCS4_2143:
01075         __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
01076                    "encoding not supported %s\n",
01077                    BAD_CAST "UCS4 2143", NULL);
01078         break;
01079         case XML_CHAR_ENCODING_UCS4_3412:
01080         __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
01081                    "encoding not supported %s\n",
01082                    BAD_CAST "UCS4 3412", NULL);
01083         break;
01084         case XML_CHAR_ENCODING_UCS2:
01085         __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
01086                    "encoding not supported %s\n",
01087                    BAD_CAST "UCS2", NULL);
01088         break;
01089         case XML_CHAR_ENCODING_8859_1:
01090         case XML_CHAR_ENCODING_8859_2:
01091         case XML_CHAR_ENCODING_8859_3:
01092         case XML_CHAR_ENCODING_8859_4:
01093         case XML_CHAR_ENCODING_8859_5:
01094         case XML_CHAR_ENCODING_8859_6:
01095         case XML_CHAR_ENCODING_8859_7:
01096         case XML_CHAR_ENCODING_8859_8:
01097         case XML_CHAR_ENCODING_8859_9:
01098         /*
01099          * We used to keep the internal content in the
01100          * document encoding however this turns being unmaintainable
01101          * So xmlGetCharEncodingHandler() will return non-null
01102          * values for this now.
01103          */
01104         if ((ctxt->inputNr == 1) &&
01105             (ctxt->encoding == NULL) &&
01106             (ctxt->input != NULL) &&
01107             (ctxt->input->encoding != NULL)) {
01108             ctxt->encoding = xmlStrdup(ctxt->input->encoding);
01109         }
01110         ctxt->charset = enc;
01111         return(0);
01112         case XML_CHAR_ENCODING_2022_JP:
01113         __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
01114                    "encoding not supported %s\n",
01115                    BAD_CAST "ISO-2022-JP", NULL);
01116         break;
01117         case XML_CHAR_ENCODING_SHIFT_JIS:
01118         __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
01119                    "encoding not supported %s\n",
01120                    BAD_CAST "Shift_JIS", NULL);
01121         break;
01122         case XML_CHAR_ENCODING_EUC_JP:
01123         __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
01124                    "encoding not supported %s\n",
01125                    BAD_CAST "EUC-JP", NULL);
01126         break;
01127         default:
01128             break;
01129     }
01130     }
01131     if (handler == NULL)
01132     return(-1);
01133     ctxt->charset = XML_CHAR_ENCODING_UTF8;
01134     return(xmlSwitchToEncodingInt(ctxt, handler, len));
01135 }
01136 
01149 static int
01150 xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
01151                           xmlCharEncodingHandlerPtr handler, int len)
01152 {
01153     int nbchars;
01154 
01155     if (handler == NULL)
01156         return (-1);
01157     if (input == NULL)
01158         return (-1);
01159     if (input->buf != NULL) {
01160         if (input->buf->encoder != NULL) {
01161             /*
01162              * Check in case the auto encoding detetection triggered
01163              * in already.
01164              */
01165             if (input->buf->encoder == handler)
01166                 return (0);
01167 
01168             /*
01169              * "UTF-16" can be used for both LE and BE
01170              if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
01171              BAD_CAST "UTF-16", 6)) &&
01172              (!xmlStrncmp(BAD_CAST handler->name,
01173              BAD_CAST "UTF-16", 6))) {
01174              return(0);
01175              }
01176              */
01177 
01178             /*
01179              * Note: this is a bit dangerous, but that's what it
01180              * takes to use nearly compatible signature for different
01181              * encodings.
01182              */
01183             xmlCharEncCloseFunc(input->buf->encoder);
01184             input->buf->encoder = handler;
01185             return (0);
01186         }
01187         input->buf->encoder = handler;
01188 
01189         /*
01190          * Is there already some content down the pipe to convert ?
01191          */
01192         if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) {
01193             int processed;
01194         unsigned int use;
01195 
01196             /*
01197              * Specific handling of the Byte Order Mark for 
01198              * UTF-16
01199              */
01200             if ((handler->name != NULL) &&
01201                 (!strcmp(handler->name, "UTF-16LE") ||
01202                  !strcmp(handler->name, "UTF-16")) &&
01203                 (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
01204                 input->cur += 2;
01205             }
01206             if ((handler->name != NULL) &&
01207                 (!strcmp(handler->name, "UTF-16BE")) &&
01208                 (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
01209                 input->cur += 2;
01210             }
01211             /*
01212              * Errata on XML-1.0 June 20 2001
01213              * Specific handling of the Byte Order Mark for
01214              * UTF-8
01215              */
01216             if ((handler->name != NULL) &&
01217                 (!strcmp(handler->name, "UTF-8")) &&
01218                 (input->cur[0] == 0xEF) &&
01219                 (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
01220                 input->cur += 3;
01221             }
01222 
01223             /*
01224              * Shrink the current input buffer.
01225              * Move it as the raw buffer and create a new input buffer
01226              */
01227             processed = input->cur - input->base;
01228             xmlBufferShrink(input->buf->buffer, processed);
01229             input->buf->raw = input->buf->buffer;
01230             input->buf->buffer = xmlBufferCreate();
01231         input->buf->rawconsumed = processed;
01232         use = input->buf->raw->use;
01233 
01234             if (ctxt->html) {
01235                 /*
01236                  * convert as much as possible of the buffer
01237                  */
01238                 nbchars = xmlCharEncInFunc(input->buf->encoder,
01239                                            input->buf->buffer,
01240                                            input->buf->raw);
01241             } else {
01242                 /*
01243                  * convert just enough to get
01244                  * '<?xml version="1.0" encoding="xxx"?>'
01245                  * parsed with the autodetected encoding
01246                  * into the parser reading buffer.
01247                  */
01248                 nbchars = xmlCharEncFirstLineInt(input->buf->encoder,
01249                                                  input->buf->buffer,
01250                                                  input->buf->raw,
01251                                                  len);
01252             }
01253             if (nbchars < 0) {
01254                 xmlErrInternal(ctxt,
01255                                "switching encoding: encoder error\n",
01256                                NULL);
01257                 return (-1);
01258             }
01259         input->buf->rawconsumed += use - input->buf->raw->use;
01260             input->base = input->cur = input->buf->buffer->content;
01261             input->end = &input->base[input->buf->buffer->use];
01262 
01263         }
01264         return (0);
01265     } else if (input->length == 0) {
01266     /*
01267      * When parsing a static memory array one must know the
01268      * size to be able to convert the buffer.
01269      */
01270     xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
01271     return (-1);
01272     }
01273     return (0);
01274 }
01275 
01287 int
01288 xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
01289                           xmlCharEncodingHandlerPtr handler) {
01290     return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
01291 }
01292 
01306 static int
01307 xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
01308                        xmlCharEncodingHandlerPtr handler, int len) {
01309     int ret = 0;
01310 
01311     if (handler != NULL) {
01312         if (ctxt->input != NULL) {
01313         ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
01314     } else {
01315         xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
01316                        NULL);
01317         return(-1);
01318     }
01319     /*
01320      * The parsing is now done in UTF8 natively
01321      */
01322     ctxt->charset = XML_CHAR_ENCODING_UTF8;
01323     } else
01324     return(-1);
01325     return(ret);
01326 }
01327 
01338 int
01339 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 
01340 {
01341     return (xmlSwitchToEncodingInt(ctxt, handler, -1));
01342 }
01343 
01344 /************************************************************************
01345  *                                  *
01346  *  Commodity functions to handle entities processing       *
01347  *                                  *
01348  ************************************************************************/
01349 
01356 void
01357 xmlFreeInputStream(xmlParserInputPtr input) {
01358     if (input == NULL) return;
01359 
01360     if (input->filename != NULL) xmlFree((char *) input->filename);
01361     if (input->directory != NULL) xmlFree((char *) input->directory);
01362     if (input->encoding != NULL) xmlFree((char *) input->encoding);
01363     if (input->version != NULL) xmlFree((char *) input->version);
01364     if ((input->free != NULL) && (input->base != NULL))
01365         input->free((xmlChar *) input->base);
01366     if (input->buf != NULL) 
01367         xmlFreeParserInputBuffer(input->buf);
01368     xmlFree(input);
01369 }
01370 
01378 xmlParserInputPtr
01379 xmlNewInputStream(xmlParserCtxtPtr ctxt) {
01380     xmlParserInputPtr input;
01381     static int id = 0;
01382 
01383     input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
01384     if (input == NULL) {
01385         xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
01386     return(NULL);
01387     }
01388     memset(input, 0, sizeof(xmlParserInput));
01389     input->line = 1;
01390     input->col = 1;
01391     input->standalone = -1;
01392     /*
01393      * we don't care about thread reentrancy unicity for a single
01394      * parser context (and hence thread) is sufficient.
01395      */
01396     input->id = id++;
01397     return(input);
01398 }
01399 
01411 xmlParserInputPtr
01412 xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
01413                 xmlCharEncoding enc) {
01414     xmlParserInputPtr inputStream;
01415 
01416     if (input == NULL) return(NULL);
01417     if (xmlParserDebugEntities)
01418     xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
01419     inputStream = xmlNewInputStream(ctxt);
01420     if (inputStream == NULL) {
01421     return(NULL);
01422     }
01423     inputStream->filename = NULL;
01424     inputStream->buf = input;
01425     inputStream->base = inputStream->buf->buffer->content;
01426     inputStream->cur = inputStream->buf->buffer->content;
01427     inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
01428     if (enc != XML_CHAR_ENCODING_NONE) {
01429         xmlSwitchEncoding(ctxt, enc);
01430     }
01431 
01432     return(inputStream);
01433 }
01434 
01444 xmlParserInputPtr
01445 xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
01446     xmlParserInputPtr input;
01447 
01448     if (entity == NULL) {
01449         xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
01450                    NULL);
01451     return(NULL);
01452     }
01453     if (xmlParserDebugEntities)
01454     xmlGenericError(xmlGenericErrorContext,
01455         "new input from entity: %s\n", entity->name);
01456     if (entity->content == NULL) {
01457     switch (entity->etype) {
01458             case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
01459             xmlErrInternal(ctxt, "Cannot parse entity %s\n",
01460                        entity->name);
01461                 break;
01462             case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
01463             case XML_EXTERNAL_PARAMETER_ENTITY:
01464         return(xmlLoadExternalEntity((char *) entity->URI,
01465                (char *) entity->ExternalID, ctxt));
01466             case XML_INTERNAL_GENERAL_ENTITY:
01467             xmlErrInternal(ctxt,
01468               "Internal entity %s without content !\n",
01469                        entity->name);
01470                 break;
01471             case XML_INTERNAL_PARAMETER_ENTITY:
01472             xmlErrInternal(ctxt,
01473               "Internal parameter entity %s without content !\n",
01474                        entity->name);
01475                 break;
01476             case XML_INTERNAL_PREDEFINED_ENTITY:
01477             xmlErrInternal(ctxt,
01478               "Predefined entity %s without content !\n",
01479                        entity->name);
01480                 break;
01481     }
01482     return(NULL);
01483     }
01484     input = xmlNewInputStream(ctxt);
01485     if (input == NULL) {
01486     return(NULL);
01487     }
01488     if (entity->URI != NULL)
01489     input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
01490     input->base = entity->content;
01491     input->cur = entity->content;
01492     input->length = entity->length;
01493     input->end = &entity->content[input->length];
01494     return(input);
01495 }
01496 
01505 xmlParserInputPtr
01506 xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
01507     xmlParserInputPtr input;
01508 
01509     if (buffer == NULL) {
01510         xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
01511                    NULL);
01512     return(NULL);
01513     }
01514     if (xmlParserDebugEntities)
01515     xmlGenericError(xmlGenericErrorContext,
01516         "new fixed input: %.30s\n", buffer);
01517     input = xmlNewInputStream(ctxt);
01518     if (input == NULL) {
01519         xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
01520     return(NULL);
01521     }
01522     input->base = buffer;
01523     input->cur = buffer;
01524     input->length = xmlStrlen(buffer);
01525     input->end = &buffer[input->length];
01526     return(input);
01527 }
01528 
01538 xmlParserInputPtr
01539 xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
01540     xmlParserInputBufferPtr buf;
01541     xmlParserInputPtr inputStream;
01542     char *directory = NULL;
01543     xmlChar *URI = NULL;
01544 
01545     if (xmlParserDebugEntities)
01546     xmlGenericError(xmlGenericErrorContext,
01547         "new input from file: %s\n", filename);
01548     if (ctxt == NULL) return(NULL);
01549     buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
01550     if (buf == NULL) {
01551     if (filename == NULL)
01552         __xmlLoaderErr(ctxt,
01553                        "failed to load external entity: NULL filename \n",
01554                NULL);
01555     else
01556         __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
01557                (const char *) filename);
01558     return(NULL);
01559     }
01560 
01561     inputStream = xmlNewInputStream(ctxt);
01562     if (inputStream == NULL)
01563     return(NULL);
01564 
01565     inputStream->buf = buf;
01566     inputStream = xmlCheckHTTPInput(ctxt, inputStream);
01567     if (inputStream == NULL)
01568         return(NULL);
01569     
01570     if (inputStream->filename == NULL)
01571     URI = xmlStrdup((xmlChar *) filename);
01572     else
01573     URI = xmlStrdup((xmlChar *) inputStream->filename);
01574     directory = xmlParserGetDirectory((const char *) URI);
01575     if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
01576     inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
01577     if (URI != NULL) xmlFree((char *) URI);
01578     inputStream->directory = directory;
01579 
01580     inputStream->base = inputStream->buf->buffer->content;
01581     inputStream->cur = inputStream->buf->buffer->content;
01582     inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
01583     if ((ctxt->directory == NULL) && (directory != NULL))
01584         ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
01585     return(inputStream);
01586 }
01587 
01588 /************************************************************************
01589  *                                  *
01590  *      Commodity functions to handle parser contexts       *
01591  *                                  *
01592  ************************************************************************/
01593 
01603 int
01604 xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
01605 {
01606     xmlParserInputPtr input;
01607 
01608     if(ctxt==NULL) {
01609         xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
01610         return(-1);
01611     }
01612 
01613     xmlDefaultSAXHandlerInit();
01614 
01615     if (ctxt->dict == NULL)
01616     ctxt->dict = xmlDictCreate();
01617     if (ctxt->dict == NULL) {
01618         xmlErrMemory(NULL, "cannot initialize parser context\n");
01619     return(-1);
01620     }
01621     if (ctxt->sax == NULL)
01622     ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
01623     if (ctxt->sax == NULL) {
01624         xmlErrMemory(NULL, "cannot initialize parser context\n");
01625     return(-1);
01626     }
01627     else
01628         xmlSAXVersion(ctxt->sax, 2);
01629 
01630     ctxt->maxatts = 0;
01631     ctxt->atts = NULL;
01632     /* Allocate the Input stack */
01633     if (ctxt->inputTab == NULL) {
01634     ctxt->inputTab = (xmlParserInputPtr *)
01635             xmlMalloc(5 * sizeof(xmlParserInputPtr));
01636     ctxt->inputMax = 5;
01637     }
01638     if (ctxt->inputTab == NULL) {
01639         xmlErrMemory(NULL, "cannot initialize parser context\n");
01640     ctxt->inputNr = 0;
01641     ctxt->inputMax = 0;
01642     ctxt->input = NULL;
01643     return(-1);
01644     }
01645     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
01646         xmlFreeInputStream(input);
01647     }
01648     ctxt->inputNr = 0;
01649     ctxt->input = NULL;
01650 
01651     ctxt->version = NULL;
01652     ctxt->encoding = NULL;
01653     ctxt->standalone = -1;
01654     ctxt->hasExternalSubset = 0;
01655     ctxt->hasPErefs = 0;
01656     ctxt->html = 0;
01657     ctxt->external = 0;
01658     ctxt->instate = XML_PARSER_START;
01659     ctxt->token = 0;
01660     ctxt->directory = NULL;
01661 
01662     /* Allocate the Node stack */
01663     if (ctxt->nodeTab == NULL) {
01664     ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
01665     ctxt->nodeMax = 10;
01666     }
01667     if (ctxt->nodeTab == NULL) {
01668         xmlErrMemory(NULL, "cannot initialize parser context\n");
01669     ctxt->nodeNr = 0;
01670     ctxt->nodeMax = 0;
01671     ctxt->node = NULL;
01672     ctxt->inputNr = 0;
01673     ctxt->inputMax = 0;
01674     ctxt->input = NULL;
01675     return(-1);
01676     }
01677     ctxt->nodeNr = 0;
01678     ctxt->node = NULL;
01679 
01680     /* Allocate the Name stack */
01681     if (ctxt->nameTab == NULL) {
01682     ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
01683     ctxt->nameMax = 10;
01684     }
01685     if (ctxt->nameTab == NULL) {
01686         xmlErrMemory(NULL, "cannot initialize parser context\n");
01687     ctxt->nodeNr = 0;
01688     ctxt->nodeMax = 0;
01689     ctxt->node = NULL;
01690     ctxt->inputNr = 0;
01691     ctxt->inputMax = 0;
01692     ctxt->input = NULL;
01693     ctxt->nameNr = 0;
01694     ctxt->nameMax = 0;
01695     ctxt->name = NULL;
01696     return(-1);
01697     }
01698     ctxt->nameNr = 0;
01699     ctxt->name = NULL;
01700 
01701     /* Allocate the space stack */
01702     if (ctxt->spaceTab == NULL) {
01703     ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
01704     ctxt->spaceMax = 10;
01705     }
01706     if (ctxt->spaceTab == NULL) {
01707         xmlErrMemory(NULL, "cannot initialize parser context\n");
01708     ctxt->nodeNr = 0;
01709     ctxt->nodeMax = 0;
01710     ctxt->node = NULL;
01711     ctxt->inputNr = 0;
01712     ctxt->inputMax = 0;
01713     ctxt->input = NULL;
01714     ctxt->nameNr = 0;
01715     ctxt->nameMax = 0;
01716     ctxt->name = NULL;
01717     ctxt->spaceNr = 0;
01718     ctxt->spaceMax = 0;
01719     ctxt->space = NULL;
01720     return(-1);
01721     }
01722     ctxt->spaceNr = 1;
01723     ctxt->spaceMax = 10;
01724     ctxt->spaceTab[0] = -1;
01725     ctxt->space = &ctxt->spaceTab[0];
01726     ctxt->userData = ctxt;
01727     ctxt->myDoc = NULL;
01728     ctxt->wellFormed = 1;
01729     ctxt->nsWellFormed = 1;
01730     ctxt->valid = 1;
01731     ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
01732     ctxt->validate = xmlDoValidityCheckingDefaultValue;
01733     ctxt->pedantic = xmlPedanticParserDefaultValue;
01734     ctxt->linenumbers = xmlLineNumbersDefaultValue;
01735     ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
01736     if (ctxt->keepBlanks == 0)
01737     ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
01738 
01739     ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
01740     ctxt->vctxt.userData = ctxt;
01741     ctxt->vctxt.error = xmlParserValidityError;
01742     ctxt->vctxt.warning = xmlParserValidityWarning;
01743     if (ctxt->validate) {
01744     if (xmlGetWarningsDefaultValue == 0)
01745         ctxt->vctxt.warning = NULL;
01746     else
01747         ctxt->vctxt.warning = xmlParserValidityWarning;
01748     ctxt->vctxt.nodeMax = 0;
01749     }
01750     ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
01751     ctxt->record_info = 0;
01752     ctxt->nbChars = 0;
01753     ctxt->checkIndex = 0;
01754     ctxt->inSubset = 0;
01755     ctxt->errNo = XML_ERR_OK;
01756     ctxt->depth = 0;
01757     ctxt->charset = XML_CHAR_ENCODING_UTF8;
01758     ctxt->catalogs = NULL;
01759     ctxt->nbentities = 0;
01760     xmlInitNodeInfoSeq(&ctxt->node_seq);
01761     return(0);
01762 }
01763 
01772 void
01773 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
01774 {
01775     xmlParserInputPtr input;
01776 
01777     if (ctxt == NULL) return;
01778 
01779     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
01780         xmlFreeInputStream(input);
01781     }
01782     if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
01783     if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
01784     if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
01785     if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
01786     if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
01787     if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
01788     if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
01789     if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
01790     if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
01791 #ifdef LIBXML_SAX1_ENABLED
01792     if ((ctxt->sax != NULL) &&
01793         (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
01794 #else
01795     if (ctxt->sax != NULL)
01796 #endif /* LIBXML_SAX1_ENABLED */
01797         xmlFree(ctxt->sax);
01798     if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
01799     if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
01800     if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
01801     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
01802     if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
01803     if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
01804     if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
01805     if (ctxt->attsDefault != NULL) 
01806         xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
01807     if (ctxt->attsSpecial != NULL)
01808         xmlHashFree(ctxt->attsSpecial, NULL);
01809     if (ctxt->freeElems != NULL) {
01810         xmlNodePtr cur, next;
01811 
01812     cur = ctxt->freeElems;
01813     while (cur != NULL) {
01814         next = cur->next;
01815         xmlFree(cur);
01816         cur = next;
01817     }
01818     }
01819     if (ctxt->freeAttrs != NULL) {
01820         xmlAttrPtr cur, next;
01821 
01822     cur = ctxt->freeAttrs;
01823     while (cur != NULL) {
01824         next = cur->next;
01825         xmlFree(cur);
01826         cur = next;
01827     }
01828     }
01829     /*
01830      * cleanup the error strings
01831      */
01832     if (ctxt->lastError.message != NULL)
01833         xmlFree(ctxt->lastError.message);
01834     if (ctxt->lastError.file != NULL)
01835         xmlFree(ctxt->lastError.file);
01836     if (ctxt->lastError.str1 != NULL)
01837         xmlFree(ctxt->lastError.str1);
01838     if (ctxt->lastError.str2 != NULL)
01839         xmlFree(ctxt->lastError.str2);
01840     if (ctxt->lastError.str3 != NULL)
01841         xmlFree(ctxt->lastError.str3);
01842 
01843 #ifdef LIBXML_CATALOG_ENABLED
01844     if (ctxt->catalogs != NULL)
01845     xmlCatalogFreeLocal(ctxt->catalogs);
01846 #endif
01847     xmlFree(ctxt);
01848 }
01849 
01858 xmlParserCtxtPtr
01859 xmlNewParserCtxt(void)
01860 {
01861     xmlParserCtxtPtr ctxt;
01862 
01863     ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
01864     if (ctxt == NULL) {
01865     xmlErrMemory(NULL, "cannot allocate parser context\n");
01866     return(NULL);
01867     }
01868     memset(ctxt, 0, sizeof(xmlParserCtxt));
01869     if (xmlInitParserCtxt(ctxt) < 0) {
01870         xmlFreeParserCtxt(ctxt);
01871     return(NULL);
01872     }
01873     return(ctxt);
01874 }
01875 
01876 /************************************************************************
01877  *                                  *
01878  *      Handling of node informations               *
01879  *                                  *
01880  ************************************************************************/
01881 
01889 void
01890 xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
01891 {
01892   if (ctxt==NULL)
01893     return;
01894   xmlClearNodeInfoSeq(&ctxt->node_seq);
01895   xmlCtxtReset(ctxt);
01896 }
01897 
01898 
01908 const xmlParserNodeInfo *
01909 xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
01910 {
01911     unsigned long pos;
01912 
01913     if ((ctx == NULL) || (node == NULL))
01914         return (NULL);
01915     /* Find position where node should be at */
01916     pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
01917     if (pos < ctx->node_seq.length
01918         && ctx->node_seq.buffer[pos].node == node)
01919         return &ctx->node_seq.buffer[pos];
01920     else
01921         return NULL;
01922 }
01923 
01924 
01931 void
01932 xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
01933 {
01934     if (seq == NULL)
01935         return;
01936     seq->length = 0;
01937     seq->maximum = 0;
01938     seq->buffer = NULL;
01939 }
01940 
01948 void
01949 xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
01950 {
01951     if (seq == NULL)
01952         return;
01953     if (seq->buffer != NULL)
01954         xmlFree(seq->buffer);
01955     xmlInitNodeInfoSeq(seq);
01956 }
01957 
01969 unsigned long
01970 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
01971                            const xmlNodePtr node)
01972 {
01973     unsigned long upper, lower, middle;
01974     int found = 0;
01975 
01976     if ((seq == NULL) || (node == NULL))
01977         return ((unsigned long) -1);
01978 
01979     /* Do a binary search for the key */
01980     lower = 1;
01981     upper = seq->length;
01982     middle = 0;
01983     while (lower <= upper && !found) {
01984         middle = lower + (upper - lower) / 2;
01985         if (node == seq->buffer[middle - 1].node)
01986             found = 1;
01987         else if (node < seq->buffer[middle - 1].node)
01988             upper = middle - 1;
01989         else
01990             lower = middle + 1;
01991     }
01992 
01993     /* Return position */
01994     if (middle == 0 || seq->buffer[middle - 1].node < node)
01995         return middle;
01996     else
01997         return middle - 1;
01998 }
01999 
02000 
02008 void
02009 xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
02010                      const xmlParserNodeInfoPtr info)
02011 {
02012     unsigned long pos;
02013 
02014     if ((ctxt == NULL) || (info == NULL)) return;
02015 
02016     /* Find pos and check to see if node is already in the sequence */
02017     pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
02018                                      info->node);
02019 
02020     if ((pos < ctxt->node_seq.length) && 
02021         (ctxt->node_seq.buffer != NULL) &&
02022         (ctxt->node_seq.buffer[pos].node == info->node)) {
02023         ctxt->node_seq.buffer[pos] = *info;
02024     }
02025 
02026     /* Otherwise, we need to add new node to buffer */
02027     else {
02028         if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
02029             xmlParserNodeInfo *tmp_buffer;
02030             unsigned int byte_size;
02031 
02032             if (ctxt->node_seq.maximum == 0)
02033                 ctxt->node_seq.maximum = 2;
02034             byte_size = (sizeof(*ctxt->node_seq.buffer) *
02035             (2 * ctxt->node_seq.maximum));
02036 
02037             if (ctxt->node_seq.buffer == NULL)
02038                 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
02039             else
02040                 tmp_buffer =
02041                     (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
02042                                                      byte_size);
02043 
02044             if (tmp_buffer == NULL) {
02045         xmlErrMemory(ctxt, "failed to allocate buffer\n");
02046                 return;
02047             }
02048             ctxt->node_seq.buffer = tmp_buffer;
02049             ctxt->node_seq.maximum *= 2;
02050         }
02051 
02052         /* If position is not at end, move elements out of the way */
02053         if (pos != ctxt->node_seq.length) {
02054             unsigned long i;
02055 
02056             for (i = ctxt->node_seq.length; i > pos; i--)
02057                 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
02058         }
02059 
02060         /* Copy element and increase length */
02061         ctxt->node_seq.buffer[pos] = *info;
02062         ctxt->node_seq.length++;
02063     }
02064 }
02065 
02066 /************************************************************************
02067  *                                  *
02068  *      Defaults settings                   *
02069  *                                  *
02070  ************************************************************************/
02080 int
02081 xmlPedanticParserDefault(int val) {
02082     int old = xmlPedanticParserDefaultValue;
02083 
02084     xmlPedanticParserDefaultValue = val;
02085     return(old);
02086 }
02087 
02098 int
02099 xmlLineNumbersDefault(int val) {
02100     int old = xmlLineNumbersDefaultValue;
02101 
02102     xmlLineNumbersDefaultValue = val;
02103     return(old);
02104 }
02105 
02120 int
02121 xmlSubstituteEntitiesDefault(int val) {
02122     int old = xmlSubstituteEntitiesDefaultValue;
02123 
02124     xmlSubstituteEntitiesDefaultValue = val;
02125     return(old);
02126 }
02127 
02152 int
02153 xmlKeepBlanksDefault(int val) {
02154     int old = xmlKeepBlanksDefaultValue;
02155 
02156     xmlKeepBlanksDefaultValue = val;
02157     if (!val) xmlIndentTreeOutput = 1;
02158     return(old);
02159 }
02160 
02161 #define bottom_parserInternals
02162 #include "elfgcchack.h"

Generated on Sat May 26 2012 04:33:19 for ReactOS by doxygen 1.7.6.1

ReactOS is a registered trademark or a trademark of ReactOS Foundation in the United States and other countries.